@economic/agents 0.0.1-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/index.d.mts +561 -0
- package/dist/index.mjs +683 -0
- package/package.json +50 -0
package/README.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# @economic/agents
|
|
2
|
+
|
|
3
|
+
Base classes and utilities for building LLM agents on Cloudflare's Agents SDK with lazy tool loading.
|
|
4
|
+
|
|
5
|
+
## Exports
|
|
6
|
+
|
|
7
|
+
- **`AIChatAgent`** — base class that owns the full `onChatMessage` lifecycle. Implement `getModel()`, `getTools()`, `getSkills()`, and `getSystemPrompt()`. Compaction is **enabled by default** (uses `getModel()` for summarisation).
|
|
8
|
+
- **`AIChatAgentBase`** — base class for when you need full control over `streamText`. Implement `getTools()`, `getSkills()`, and your own `onChatMessage` decorated with `@withSkills`. Compaction is **disabled by default**.
|
|
9
|
+
- **`withSkills`** — method decorator used with `AIChatAgentBase`.
|
|
10
|
+
- **`createSkills`** — lower-level factory for wiring lazy skill loading into any agent subclass yourself.
|
|
11
|
+
- **`filterEphemeralMessages`**, **`injectGuidance`** — utilities used internally, exported for custom wiring.
|
|
12
|
+
- **`compactIfNeeded`**, **`compactMessages`**, **`estimateMessagesTokens`**, **`COMPACT_TOKEN_THRESHOLD`** — compaction utilities, exported for use with `AIChatAgentBase` or fully custom agents.
|
|
13
|
+
- Types: `Tool`, `Skill`, `SkillsConfig`, `SkillsResult`, `SkillContext`.
|
|
14
|
+
|
|
15
|
+
See [COMPARISON.md](./COMPARISON.md) for a side-by-side code example of both base classes.
|
|
16
|
+
|
|
17
|
+
See [src/features/skills/README.md](./src/features/skills/README.md) for full `createSkills` documentation.
|
|
18
|
+
|
|
19
|
+
## Development
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
vp install # install dependencies
|
|
23
|
+
vp test # run tests
|
|
24
|
+
vp pack # build
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Implementing your own agent
|
|
30
|
+
|
|
31
|
+
Extend `AIChatAgent` and implement the four required methods:
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
import { AIChatAgent } from "@economic/agents";
|
|
35
|
+
|
|
36
|
+
export class MyAgent extends AIChatAgent {
|
|
37
|
+
getModel() {
|
|
38
|
+
return openai("gpt-4o");
|
|
39
|
+
}
|
|
40
|
+
getTools() {
|
|
41
|
+
return [myAlwaysOnTool];
|
|
42
|
+
}
|
|
43
|
+
getSkills() {
|
|
44
|
+
return [searchSkill, codeSkill];
|
|
45
|
+
}
|
|
46
|
+
getSystemPrompt() {
|
|
47
|
+
return "You are a helpful assistant.";
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Return the D1 binding — typed in Cloudflare.Env after `wrangler types`
|
|
51
|
+
protected getDB() {
|
|
52
|
+
return this.env.AGENT_DB;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
If you need control over the response — custom model options, middleware, varying the model per request — use `AIChatAgentBase` with the `@withSkills` decorator instead. See [COMPARISON.md](./COMPARISON.md) for a side-by-side example and `src/features/skills/README.md` for full `createSkills` documentation.
|
|
58
|
+
|
|
59
|
+
### Message compaction
|
|
60
|
+
|
|
61
|
+
`AIChatAgent` automatically compacts the conversation history when it approaches the token limit (140k tokens). Older messages are summarised by the LLM into a single system message; the most recent messages are kept verbatim. The verbatim tail size is `maxPersistedMessages - 1` (default: 49 messages + 1 summary message).
|
|
62
|
+
|
|
63
|
+
The compaction model defaults to `getModel()`. To use a cheaper model for summarisation, override `getCompactionModel()`:
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
protected override getCompactionModel(): LanguageModel {
|
|
67
|
+
return openai("gpt-4o-mini"); // cheaper model for summarisation
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
To disable compaction entirely, override `getCompactionModel()` to return `undefined`:
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
protected override getCompactionModel(): LanguageModel | undefined {
|
|
75
|
+
return undefined; // no compaction — older messages are dropped at maxPersistedMessages
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
`AIChatAgentBase` does not enable compaction by default. To add it, override `getCompactionModel()` to return a model — the `persistMessages` override will pick it up automatically:
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
protected override getCompactionModel(): LanguageModel {
|
|
83
|
+
return openai("gpt-4o-mini");
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Alternatively, import `compactIfNeeded` and `COMPACT_TOKEN_THRESHOLD` from `@economic/agents` and call them yourself inside a custom `persistMessages` override for full control over the compaction logic.
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
import * as ai from "ai";
|
|
2
|
+
import { LanguageModel, ModelMessage, PrepareStepFunction, StreamTextOnFinishCallback, ToolSet, UIMessage } from "ai";
|
|
3
|
+
import { AIChatAgent as AIChatAgent$1, OnChatMessageOptions } from "@cloudflare/ai-chat";
|
|
4
|
+
|
|
5
|
+
//#region ../../node_modules/partyserver/dist/index.d.ts
|
|
6
|
+
//#region src/types.d.ts
|
|
7
|
+
type ImmutablePrimitive = undefined | null | boolean | string | number;
|
|
8
|
+
type Immutable<T> = T extends ImmutablePrimitive ? T : T extends Array<infer U> ? ImmutableArray<U> : T extends Map<infer K, infer V> ? ImmutableMap<K, V> : T extends Set<infer M> ? ImmutableSet<M> : ImmutableObject<T>;
|
|
9
|
+
type ImmutableArray<T> = ReadonlyArray<Immutable<T>>;
|
|
10
|
+
type ImmutableMap<K, V> = ReadonlyMap<Immutable<K>, Immutable<V>>;
|
|
11
|
+
type ImmutableSet<T> = ReadonlySet<Immutable<T>>;
|
|
12
|
+
type ImmutableObject<T> = { readonly [K in keyof T]: Immutable<T[K]> };
|
|
13
|
+
type ConnectionState<T> = ImmutableObject<T> | null;
|
|
14
|
+
type ConnectionSetStateFn<T> = (prevState: ConnectionState<T>) => T;
|
|
15
|
+
type ConnectionContext = {
|
|
16
|
+
request: Request;
|
|
17
|
+
};
|
|
18
|
+
/** A WebSocket connected to the Server */
|
|
19
|
+
type Connection<TState = unknown> = WebSocket & {
|
|
20
|
+
/** Connection identifier */id: string;
|
|
21
|
+
/**
|
|
22
|
+
* Arbitrary state associated with this connection.
|
|
23
|
+
* Read-only — use {@link Connection.setState} to update.
|
|
24
|
+
*
|
|
25
|
+
* This property is configurable, meaning it can be redefined via
|
|
26
|
+
* `Object.defineProperty` by downstream consumers (e.g. the Cloudflare
|
|
27
|
+
* Agents SDK) to namespace or wrap internal state storage.
|
|
28
|
+
*/
|
|
29
|
+
state: ConnectionState<TState>;
|
|
30
|
+
/**
|
|
31
|
+
* Update the state associated with this connection.
|
|
32
|
+
*
|
|
33
|
+
* Accepts either a new state value or an updater function that receives
|
|
34
|
+
* the previous state and returns the next state.
|
|
35
|
+
*
|
|
36
|
+
* This property is configurable, meaning it can be redefined via
|
|
37
|
+
* `Object.defineProperty` by downstream consumers. If you redefine
|
|
38
|
+
* `state` and `setState`, you are responsible for calling
|
|
39
|
+
* `serializeAttachment` / `deserializeAttachment` yourself if you need
|
|
40
|
+
* the state to survive hibernation.
|
|
41
|
+
*/
|
|
42
|
+
setState(state: TState | ConnectionSetStateFn<TState> | null): ConnectionState<TState>;
|
|
43
|
+
/**
|
|
44
|
+
* @deprecated use {@link Connection.setState} instead.
|
|
45
|
+
*
|
|
46
|
+
* Low-level method to persist data in the connection's attachment storage.
|
|
47
|
+
* This property is configurable and can be redefined by downstream
|
|
48
|
+
* consumers that need to wrap or namespace the underlying storage.
|
|
49
|
+
*/
|
|
50
|
+
serializeAttachment<T = unknown>(attachment: T): void;
|
|
51
|
+
/**
|
|
52
|
+
* @deprecated use {@link Connection.state} instead.
|
|
53
|
+
*
|
|
54
|
+
* Low-level method to read data from the connection's attachment storage.
|
|
55
|
+
* This property is configurable and can be redefined by downstream
|
|
56
|
+
* consumers that need to wrap or namespace the underlying storage.
|
|
57
|
+
*/
|
|
58
|
+
deserializeAttachment<T = unknown>(): T | null;
|
|
59
|
+
/**
|
|
60
|
+
* Tags assigned to this connection via {@link Server.getConnectionTags}.
|
|
61
|
+
* Always includes the connection id as the first tag.
|
|
62
|
+
*/
|
|
63
|
+
tags: readonly string[];
|
|
64
|
+
/**
|
|
65
|
+
* @deprecated Use `this.name` on the Server instead.
|
|
66
|
+
* The server name. Populated from `Server.name` after initialization.
|
|
67
|
+
*/
|
|
68
|
+
server: string;
|
|
69
|
+
}; //#endregion
|
|
70
|
+
//#region src/index.d.ts
|
|
71
|
+
//#endregion
|
|
72
|
+
//#region ../../node_modules/zod/v4/core/schemas.d.cts
|
|
73
|
+
declare global {
|
|
74
|
+
interface File {}
|
|
75
|
+
}
|
|
76
|
+
//#endregion
|
|
77
|
+
//#region src/features/skills/types.d.ts
|
|
78
|
+
/**
|
|
79
|
+
* A single tool with a name, JSON Schema parameters, and an execute function.
|
|
80
|
+
*
|
|
81
|
+
* Tools are defined in this SDK-agnostic format and converted to the
|
|
82
|
+
* target SDK's format by the adapter layer (createSkills).
|
|
83
|
+
*/
|
|
84
|
+
interface Tool {
|
|
85
|
+
name: string;
|
|
86
|
+
description: string;
|
|
87
|
+
/** JSON Schema object describing the tool's input parameters */
|
|
88
|
+
parameters: Record<string, unknown>;
|
|
89
|
+
execute(args: Record<string, unknown>, options: {
|
|
90
|
+
toolCallId: string;
|
|
91
|
+
}): Promise<string>;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* A named group of related tools that can be loaded together on demand.
|
|
95
|
+
*
|
|
96
|
+
* The agent starts with only its always-on tools active. When the LLM calls
|
|
97
|
+
* activate_skill with a skill name, that skill's tools become available for
|
|
98
|
+
* the rest of the conversation.
|
|
99
|
+
*/
|
|
100
|
+
interface Skill {
|
|
101
|
+
name: string;
|
|
102
|
+
/** One-line description shown in the activate_skill tool schema */
|
|
103
|
+
description: string;
|
|
104
|
+
/**
|
|
105
|
+
* Guidance text for this skill — e.g. rate limits, preferred patterns,
|
|
106
|
+
* when to use each tool. Injected as a system message each turn for any
|
|
107
|
+
* skill that is loaded, keeping the `system` prompt static and cacheable.
|
|
108
|
+
*/
|
|
109
|
+
guidance?: string;
|
|
110
|
+
tools: Tool[];
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Configuration passed to createSkills().
|
|
114
|
+
*/
|
|
115
|
+
interface SkillsConfig {
|
|
116
|
+
/** Tools that are always active regardless of loaded skills */
|
|
117
|
+
tools: Tool[];
|
|
118
|
+
/** All available skills that can be loaded on demand */
|
|
119
|
+
skills: Skill[];
|
|
120
|
+
/**
|
|
121
|
+
* Skill names that were loaded in previous turns, read from D1 at turn
|
|
122
|
+
* start. Seeds the in-memory loadedSkills set so prior state is restored
|
|
123
|
+
* before the first LLM step.
|
|
124
|
+
*/
|
|
125
|
+
initialLoadedSkills?: string[];
|
|
126
|
+
/**
|
|
127
|
+
* Called after activate_skill successfully loads new skills.
|
|
128
|
+
* Receives the complete current set of loaded skill names. The agent
|
|
129
|
+
* buffers this value and writes it to D1 at turn end (in persistMessages),
|
|
130
|
+
* keeping the write co-located with message persistence — aligned with
|
|
131
|
+
* how slack-bot saves loaded_categories alongside messages.
|
|
132
|
+
*/
|
|
133
|
+
onSkillsChanged?: (skills: string[]) => Promise<void>;
|
|
134
|
+
/**
|
|
135
|
+
* Optional permission hook. Return false to deny access to a skill.
|
|
136
|
+
* Defaults to allow-all if not provided.
|
|
137
|
+
*/
|
|
138
|
+
filterSkill?: (skillName: string) => Promise<boolean> | boolean;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Skill context injected by the @withSkills decorator.
|
|
142
|
+
*
|
|
143
|
+
* Spread the context fields directly into streamText — messages already has
|
|
144
|
+
* guidance injected at the correct position:
|
|
145
|
+
*
|
|
146
|
+
* ```typescript
|
|
147
|
+
* const { messages, ...skillArgs } = ctx;
|
|
148
|
+
* return streamText({
|
|
149
|
+
* model: this.getModel(),
|
|
150
|
+
* system: "Your base prompt — static, never includes guidance",
|
|
151
|
+
* messages,
|
|
152
|
+
* ...skillArgs,
|
|
153
|
+
* onFinish,
|
|
154
|
+
* stopWhen: stepCountIs(20),
|
|
155
|
+
* }).toUIMessageStreamResponse();
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
interface SkillContext {
|
|
159
|
+
/** All registered tools — spread into streamText */
|
|
160
|
+
tools: ai.ToolSet;
|
|
161
|
+
/** Currently active tool names — spread into streamText */
|
|
162
|
+
activeTools: string[];
|
|
163
|
+
/**
|
|
164
|
+
* Updates active tools and the guidance system message before each LLM step.
|
|
165
|
+
* Spread into streamText.
|
|
166
|
+
*/
|
|
167
|
+
prepareStep: ai.PrepareStepFunction;
|
|
168
|
+
/**
|
|
169
|
+
* Conversation messages read from D1 with current skill guidance already
|
|
170
|
+
* injected just before the last message (the current user turn). Pass
|
|
171
|
+
* directly as the `messages` param of streamText.
|
|
172
|
+
*/
|
|
173
|
+
messages: ai.ModelMessage[];
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* The object returned by createSkills().
|
|
177
|
+
* Spread the SDK-specific fields into your streamText call.
|
|
178
|
+
*/
|
|
179
|
+
interface SkillsResult {
|
|
180
|
+
/** Guidance text for all currently-loaded skills */
|
|
181
|
+
getLoadedGuidance(): string;
|
|
182
|
+
/** Current loaded skill names */
|
|
183
|
+
getLoadedSkills(): string[];
|
|
184
|
+
}
|
|
185
|
+
//#endregion
|
|
186
|
+
//#region src/agents/chat/AIChatAgentBase.d.ts
|
|
187
|
+
/**
|
|
188
|
+
* Base class for chat agents with lazy skill loading.
|
|
189
|
+
*
|
|
190
|
+
* Owns:
|
|
191
|
+
* - D1 persistence for loaded skill state (skill names survive DO eviction)
|
|
192
|
+
* - Ephemeral message filtering (list_capabilities, no-op activate_skill calls)
|
|
193
|
+
* - Message compaction (LLM summarisation when history exceeds token threshold)
|
|
194
|
+
* - History replay to newly connected clients (onConnect override)
|
|
195
|
+
* - Skill context preparation for use with the @withSkills decorator
|
|
196
|
+
*
|
|
197
|
+
* Conversation messages are stored in Durable Object SQLite, managed
|
|
198
|
+
* automatically by the Cloudflare AIChatAgent — no D1 write needed for messages.
|
|
199
|
+
*
|
|
200
|
+
* D1 is written only when skills change (activate_skill was called this turn),
|
|
201
|
+
* not on every turn.
|
|
202
|
+
*
|
|
203
|
+
* ## Usage
|
|
204
|
+
*
|
|
205
|
+
* Extend this class when you want full control over `streamText`. Implement
|
|
206
|
+
* `getTools()`, `getSkills()`, and your own `onChatMessage` decorated with
|
|
207
|
+
* `@withSkills`:
|
|
208
|
+
*
|
|
209
|
+
* ```typescript
|
|
210
|
+
* export class MyAgent extends AIChatAgentBase {
|
|
211
|
+
* getTools() { return []; }
|
|
212
|
+
* getSkills() { return [searchSkill, codeSkill]; }
|
|
213
|
+
* getDB() { return this.env.AGENT_DB; }
|
|
214
|
+
*
|
|
215
|
+
* @withSkills
|
|
216
|
+
* async onChatMessage(onFinish, ctx: SkillContext, options?) {
|
|
217
|
+
* const { messages, ...skillArgs } = ctx;
|
|
218
|
+
* return streamText({
|
|
219
|
+
* model: openai("gpt-4o"),
|
|
220
|
+
* system: "You are a helpful assistant.",
|
|
221
|
+
* messages,
|
|
222
|
+
* ...skillArgs,
|
|
223
|
+
* onFinish,
|
|
224
|
+
* stopWhen: stepCountIs(20),
|
|
225
|
+
* }).toUIMessageStreamResponse();
|
|
226
|
+
* }
|
|
227
|
+
* }
|
|
228
|
+
* ```
|
|
229
|
+
*
|
|
230
|
+
* For a batteries-included experience where the base class owns `onChatMessage`,
|
|
231
|
+
* extend `AIChatAgent` instead.
|
|
232
|
+
*/
|
|
233
|
+
declare abstract class AIChatAgentBase<Env extends Cloudflare.Env = Cloudflare.Env> extends AIChatAgent$1<Env> {
|
|
234
|
+
/**
|
|
235
|
+
* Maximum number of messages stored in DO SQLite.
|
|
236
|
+
*
|
|
237
|
+
* Lowered from the Cloudflare AIChatAgent default of 200. When compaction
|
|
238
|
+
* is enabled, one slot is reserved for the summary message so the verbatim
|
|
239
|
+
* tail is maxPersistedMessages - 1 recent messages. Raise or lower per agent.
|
|
240
|
+
*/
|
|
241
|
+
maxPersistedMessages: number;
|
|
242
|
+
/** Tools that are always active regardless of loaded skills */
|
|
243
|
+
abstract getTools(): Tool[];
|
|
244
|
+
/** All skills available for on-demand loading */
|
|
245
|
+
abstract getSkills(): Skill[];
|
|
246
|
+
/**
|
|
247
|
+
* Return a LanguageModel to use for compaction summarisation.
|
|
248
|
+
*
|
|
249
|
+
* Return undefined (default) to disable compaction — messages are kept up
|
|
250
|
+
* to maxPersistedMessages and older ones are dropped by the Cloudflare
|
|
251
|
+
* AIChatAgent's built-in hard cap.
|
|
252
|
+
*
|
|
253
|
+
* Override to use a cheaper or faster model for summarisation, or to enable
|
|
254
|
+
* compaction in subclasses that do not override it automatically.
|
|
255
|
+
*/
|
|
256
|
+
protected getCompactionModel(): LanguageModel | undefined;
|
|
257
|
+
/**
|
|
258
|
+
* Return the D1 database binding for persisting loaded skill state.
|
|
259
|
+
*
|
|
260
|
+
* Override in your subclass to return the binding from env:
|
|
261
|
+
* ```typescript
|
|
262
|
+
* protected getDB() { return this.env.AGENT_DB; }
|
|
263
|
+
* ```
|
|
264
|
+
*
|
|
265
|
+
* Defaults to undefined — when undefined, loaded skills reset on every new
|
|
266
|
+
* conversation (skills still work within a turn, just not across turns).
|
|
267
|
+
*/
|
|
268
|
+
protected getDB(): D1Database | undefined;
|
|
269
|
+
/**
|
|
270
|
+
* Optional permission hook. Return false to deny the agent access to a
|
|
271
|
+
* skill when activate_skill is called. Defaults to allow-all.
|
|
272
|
+
*/
|
|
273
|
+
protected filterSkill(_skillName: string): Promise<boolean>;
|
|
274
|
+
/**
|
|
275
|
+
* Buffered skill state from the current turn.
|
|
276
|
+
*
|
|
277
|
+
* Set by the onSkillsChanged callback when activate_skill loads new skills
|
|
278
|
+
* mid-turn. Flushed to D1 in persistMessages at turn end — only written
|
|
279
|
+
* when this value is set, so D1 is not touched on turns where no new skills
|
|
280
|
+
* are loaded.
|
|
281
|
+
*/
|
|
282
|
+
protected _pendingSkills: string[] | undefined;
|
|
283
|
+
/**
|
|
284
|
+
* Reads loaded skill names from D1 for this agent.
|
|
285
|
+
*
|
|
286
|
+
* Returns an empty array if no record exists (first turn, or no skills
|
|
287
|
+
* loaded yet). Conversation messages are not read here — the Cloudflare
|
|
288
|
+
* AIChatAgent provides those via this.messages from DO SQLite.
|
|
289
|
+
*/
|
|
290
|
+
protected _readSkillState(): Promise<string[]>;
|
|
291
|
+
/**
|
|
292
|
+
* Writes loaded skill names to D1 for this agent.
|
|
293
|
+
*
|
|
294
|
+
* Uses INSERT OR REPLACE so the first skill load creates the row and
|
|
295
|
+
* subsequent loads update it. Only called when skills actually changed
|
|
296
|
+
* this turn (_pendingSkills is set).
|
|
297
|
+
*/
|
|
298
|
+
protected _writeSkillState(skills: string[]): Promise<void>;
|
|
299
|
+
/**
|
|
300
|
+
* Flush persisted message history to a newly connected client.
|
|
301
|
+
*
|
|
302
|
+
* The Cloudflare AIChatAgent broadcasts message updates to existing
|
|
303
|
+
* connections via persistMessages, but does nothing for connections that
|
|
304
|
+
* arrive after a conversation has ended. Without this override, a page
|
|
305
|
+
* refresh produces an empty UI even though the history is intact in DO SQLite.
|
|
306
|
+
*
|
|
307
|
+
* Skips replay when a stream is active — CF_AGENT_STREAM_RESUMING handles
|
|
308
|
+
* that case and replays in-progress chunks via its own protocol.
|
|
309
|
+
*/
|
|
310
|
+
onConnect(connection: Connection, ctx: ConnectionContext): Promise<void>;
|
|
311
|
+
/**
|
|
312
|
+
* Strips ephemeral content, conditionally saves skill state to D1, then
|
|
313
|
+
* delegates to super for DO SQLite persistence and WebSocket broadcast.
|
|
314
|
+
*
|
|
315
|
+
* The Cloudflare AIChatAgent calls persistMessages once per turn after all
|
|
316
|
+
* steps complete, so overriding here is the correct place to act — it runs
|
|
317
|
+
* after the full assistant message (including all tool results) is assembled.
|
|
318
|
+
*
|
|
319
|
+
* Two things happen here:
|
|
320
|
+
*
|
|
321
|
+
* 1. Ephemeral tool calls are stripped — list_capabilities (always) and
|
|
322
|
+
* activate_skill when nothing was newly loaded (no state change).
|
|
323
|
+
*
|
|
324
|
+
* 2. If skills changed this turn (_pendingSkills is set), the updated list
|
|
325
|
+
* is written to D1. Turns where no skills were loaded do not touch D1.
|
|
326
|
+
*
|
|
327
|
+
* Message persistence itself is handled by super.persistMessages, which
|
|
328
|
+
* writes to DO SQLite — no D1 write needed for messages.
|
|
329
|
+
*/
|
|
330
|
+
persistMessages(messages: UIMessage[], excludeBroadcastIds?: string[], options?: {
|
|
331
|
+
_deleteStaleRows?: boolean;
|
|
332
|
+
}): Promise<void>;
|
|
333
|
+
/**
|
|
334
|
+
* Widened onChatMessage signature that accommodates the @withSkills decorator.
|
|
335
|
+
*
|
|
336
|
+
* The decorator transforms the consumer's 3-arg form (onFinish, ctx, options) into
|
|
337
|
+
* a 2-arg wrapper at runtime. This declaration widens the base class signature so
|
|
338
|
+
* that TypeScript accepts the consumer's 3-arg override without errors.
|
|
339
|
+
*
|
|
340
|
+
* @ts-ignore — intentional: widens the Cloudflare AIChatAgent's (onFinish, options?) signature.
|
|
341
|
+
*/
|
|
342
|
+
onChatMessage(onFinish: StreamTextOnFinishCallback<ToolSet>, ctxOrOptions?: SkillContext | OnChatMessageOptions): Promise<Response | undefined>;
|
|
343
|
+
/**
|
|
344
|
+
* Called by the @withSkills decorator at the start of each turn.
|
|
345
|
+
*
|
|
346
|
+
* Reads loaded skill state from D1, seeds createSkills, injects guidance,
|
|
347
|
+
* and returns a SkillContext ready to use in a streamText call.
|
|
348
|
+
*
|
|
349
|
+
* The returned `messages` already has guidance injected just before the
|
|
350
|
+
* current user turn — pass it directly as the `messages` param of streamText.
|
|
351
|
+
* Guidance is never stored in DO SQLite, so loaded_skills in D1 is the
|
|
352
|
+
* single source of truth for which skills are active.
|
|
353
|
+
*/
|
|
354
|
+
protected _prepareSkillContext(): Promise<SkillContext>;
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Method decorator for use with AIChatAgentBase.
|
|
358
|
+
*
|
|
359
|
+
* Apply to `onChatMessage` to receive a pre-built SkillContext as the second
|
|
360
|
+
* argument. The decorator reads loaded skill state from D1, seeds createSkills,
|
|
361
|
+
* and injects guidance into the conversation history from DO SQLite. Skill state
|
|
362
|
+
* changes are buffered for D1 write at turn end (only when skills actually change).
|
|
363
|
+
* Ephemeral cleanup is handled automatically via the persistMessages override —
|
|
364
|
+
* no wiring needed.
|
|
365
|
+
*
|
|
366
|
+
* ```typescript
|
|
367
|
+
* @withSkills
|
|
368
|
+
* async onChatMessage(
|
|
369
|
+
* onFinish: StreamTextOnFinishCallback<ToolSet>,
|
|
370
|
+
* ctx: SkillContext,
|
|
371
|
+
* options?: OnChatMessageOptions,
|
|
372
|
+
* ) {
|
|
373
|
+
* const { messages, ...skillArgs } = ctx;
|
|
374
|
+
* return streamText({
|
|
375
|
+
* model: this.getModel(),
|
|
376
|
+
* system: "Your base prompt — static, never includes guidance",
|
|
377
|
+
* messages,
|
|
378
|
+
* ...skillArgs,
|
|
379
|
+
* onFinish,
|
|
380
|
+
* stopWhen: stepCountIs(20),
|
|
381
|
+
* }).toUIMessageStreamResponse();
|
|
382
|
+
* }
|
|
383
|
+
* ```
|
|
384
|
+
*/
|
|
385
|
+
type WithSkillsFn = (this: AIChatAgentBase, onFinish: StreamTextOnFinishCallback<ToolSet>, ctx: SkillContext, options?: OnChatMessageOptions) => Promise<Response | undefined>;
|
|
386
|
+
declare function withSkills(fn: WithSkillsFn, _context: ClassMethodDecoratorContext): WithSkillsFn;
|
|
387
|
+
//#endregion
|
|
388
|
+
//#region src/agents/chat/AIChatAgent.d.ts
|
|
389
|
+
/**
|
|
390
|
+
* Batteries-included base class for chat agents with lazy skill loading.
|
|
391
|
+
*
|
|
392
|
+
* Owns the full `onChatMessage` lifecycle. Implement four abstract methods and
|
|
393
|
+
* get lazy skill loading, cross-turn skill persistence, guidance injection,
|
|
394
|
+
* ephemeral message cleanup, and message compaction for free.
|
|
395
|
+
*
|
|
396
|
+
* Conversation messages are stored in Durable Object SQLite by the Cloudflare
|
|
397
|
+
* AIChatAgent automatically — available as this.messages at the start of each
|
|
398
|
+
* turn. Loaded skill state is stored in D1 (via getDB()) and read at turn start.
|
|
399
|
+
* Guidance is injected as a system message just before the current user turn,
|
|
400
|
+
* keeping the `system` param static and cacheable across all turns.
|
|
401
|
+
*
|
|
402
|
+
* ```typescript
|
|
403
|
+
* export class MyAgent extends AIChatAgent {
|
|
404
|
+
* getModel() { return openai("gpt-4o"); }
|
|
405
|
+
* getTools() { return []; }
|
|
406
|
+
* getSkills() { return [searchSkill, codeSkill]; }
|
|
407
|
+
* getSystemPrompt() { return "You are a helpful assistant."; }
|
|
408
|
+
* getDB() { return this.env.AGENT_DB; }
|
|
409
|
+
* }
|
|
410
|
+
* ```
|
|
411
|
+
*
|
|
412
|
+
* If you need full control over the `streamText` call (custom model options,
|
|
413
|
+
* streaming transforms, varying the model per request, etc.) use
|
|
414
|
+
* `AIChatAgentBase` with the `@withSkills` decorator instead.
|
|
415
|
+
*/
|
|
416
|
+
declare abstract class AIChatAgent<Env extends Cloudflare.Env = Cloudflare.Env> extends AIChatAgentBase<Env> {
|
|
417
|
+
/** Return the Vercel AI SDK LanguageModel to use for this agent */
|
|
418
|
+
abstract getModel(): LanguageModel;
|
|
419
|
+
/** Tools that are always active regardless of loaded skills */
|
|
420
|
+
abstract getTools(): Tool[];
|
|
421
|
+
/** All skills available for on-demand loading */
|
|
422
|
+
abstract getSkills(): Skill[];
|
|
423
|
+
/**
|
|
424
|
+
* Build the base system prompt. This string is passed to streamText as-is
|
|
425
|
+
* and never modified — skill guidance is injected as a separate system
|
|
426
|
+
* message so this value stays static and cacheable.
|
|
427
|
+
*/
|
|
428
|
+
abstract getSystemPrompt(): string;
|
|
429
|
+
/**
|
|
430
|
+
* Return the model used for compaction summarisation.
|
|
431
|
+
*
|
|
432
|
+
* Defaults to getModel() — the agent's primary model — so compaction is
|
|
433
|
+
* enabled automatically. Override to substitute a cheaper or faster model
|
|
434
|
+
* for summarisation (e.g. a smaller model when the primary is expensive).
|
|
435
|
+
*
|
|
436
|
+
* To opt out of message compaction: override and return undefined.
|
|
437
|
+
*/
|
|
438
|
+
protected getCompactionModel(): LanguageModel;
|
|
439
|
+
onChatMessage(onFinish: StreamTextOnFinishCallback<ToolSet>, options?: OnChatMessageOptions): Promise<Response | undefined>;
|
|
440
|
+
}
|
|
441
|
+
//#endregion
|
|
442
|
+
//#region src/features/skills/index.d.ts
|
|
443
|
+
/**
|
|
444
|
+
* Creates a skill loading system for use with the Vercel AI SDK.
|
|
445
|
+
*
|
|
446
|
+
* The agent starts with only its always-on tools active. The LLM can call
|
|
447
|
+
* activate_skill to load skill tools on demand. Which skills are loaded is
|
|
448
|
+
* persisted to D1 across turns — no message-history parsing required.
|
|
449
|
+
*
|
|
450
|
+
* Guidance from loaded skills is injected as a system message just before
|
|
451
|
+
* the current user turn, keeping the `system` prompt static and cacheable.
|
|
452
|
+
* prepareStep keeps the guidance message updated if new skills load mid-turn.
|
|
453
|
+
*
|
|
454
|
+
* Usage with streamText (ai v6):
|
|
455
|
+
* ```typescript
|
|
456
|
+
* import { streamText, convertToModelMessages, stepCountIs } from "ai";
|
|
457
|
+
*
|
|
458
|
+
* // initialLoadedSkills comes from D1 (read at turn start by the agent).
|
|
459
|
+
* // onSkillsChanged is called when new skills are loaded; the agent
|
|
460
|
+
* // buffers the value and writes it to D1 at turn end in persistMessages.
|
|
461
|
+
* const lt = createSkills({ tools, skills, initialLoadedSkills, onSkillsChanged });
|
|
462
|
+
* const messages = injectGuidance(modelMessages, lt.getLoadedGuidance());
|
|
463
|
+
*
|
|
464
|
+
* const result = streamText({
|
|
465
|
+
* model,
|
|
466
|
+
* system: baseSystemPrompt, // static — never contains guidance, stays cacheable
|
|
467
|
+
* messages,
|
|
468
|
+
* tools: lt.tools,
|
|
469
|
+
* activeTools: lt.activeTools,
|
|
470
|
+
* prepareStep: lt.prepareStep, // keeps guidance message updated mid-turn
|
|
471
|
+
* stopWhen: stepCountIs(20),
|
|
472
|
+
* });
|
|
473
|
+
* ```
|
|
474
|
+
*/
|
|
475
|
+
declare function createSkills(config: SkillsConfig): SkillsResult & {
|
|
476
|
+
tools: ToolSet;
|
|
477
|
+
activeTools: string[];
|
|
478
|
+
prepareStep: PrepareStepFunction;
|
|
479
|
+
};
|
|
480
|
+
/**
|
|
481
|
+
* Removes ephemeral messages from the conversation before it is saved to D1.
|
|
482
|
+
*
|
|
483
|
+
* Three kinds of messages are stripped:
|
|
484
|
+
*
|
|
485
|
+
* 1. list_capabilities tool calls — always stripped. Capability discovery is
|
|
486
|
+
* only relevant within the current turn; it adds no useful context for
|
|
487
|
+
* future turns.
|
|
488
|
+
*
|
|
489
|
+
* 2. activate_skill calls when nothing was newly loaded — stripped when all
|
|
490
|
+
* requested skills were already active, or when all were denied. In both
|
|
491
|
+
* cases nothing changed, so persisting the call would only add noise.
|
|
492
|
+
*
|
|
493
|
+
* 3. Guidance system messages — stripped by exact content match against the
|
|
494
|
+
* provided guidance string. Guidance is always recomputed from loaded skill
|
|
495
|
+
* definitions at turn start, so persisting it would create a redundant
|
|
496
|
+
* second source of truth alongside the loaded_skills D1 column.
|
|
497
|
+
*
|
|
498
|
+
* When skills ARE successfully loaded, the short "Loaded: X" result is kept
|
|
499
|
+
* in history for model context — so the model can see what was loaded in
|
|
500
|
+
* prior turns. Skill state is restored from D1 loaded_skills, not from these
|
|
501
|
+
* strings.
|
|
502
|
+
*
|
|
503
|
+
* If stripping leaves an assistant message with no parts, the entire message
|
|
504
|
+
* is dropped (e.g. a step that did nothing but call list_capabilities).
|
|
505
|
+
*/
|
|
506
|
+
declare function filterEphemeralMessages(messages: UIMessage[], guidanceToStrip?: string): UIMessage[];
|
|
507
|
+
/**
|
|
508
|
+
* Injects loaded skill guidance as a system message just before the last
|
|
509
|
+
* message in the array (typically the current user turn).
|
|
510
|
+
*
|
|
511
|
+
* Guidance is kept separate from the static `system` prompt so that the
|
|
512
|
+
* system prompt stays identical on every turn and can be prompt-cached.
|
|
513
|
+
* Positioning just before the last message means guidance survives any
|
|
514
|
+
* compaction strategy that preserves recent context.
|
|
515
|
+
*
|
|
516
|
+
* Pass `previousGuidance` (the string injected on the prior call) to remove
|
|
517
|
+
* the stale guidance message before inserting the updated one. Removal is by
|
|
518
|
+
* exact content match — not by role — so other system messages (memories,
|
|
519
|
+
* user preferences, etc.) are left untouched.
|
|
520
|
+
*
|
|
521
|
+
* At turn start, omit `previousGuidance` — guidance is never persisted to D1
|
|
522
|
+
* (it is stripped by filterEphemeralMessages before saving), so there is
|
|
523
|
+
* nothing to remove. prepareStep uses previousGuidance within a turn to
|
|
524
|
+
* handle guidance updates when new skills are loaded mid-turn.
|
|
525
|
+
*
|
|
526
|
+
* ```typescript
|
|
527
|
+
* // Turn start — just inject
|
|
528
|
+
* const messages = injectGuidance(modelMessages, skills.getLoadedGuidance());
|
|
529
|
+
*
|
|
530
|
+
* // prepareStep — remove stale guidance then re-inject updated guidance
|
|
531
|
+
* const messages = injectGuidance(stepMessages, newGuidance, previousGuidance);
|
|
532
|
+
* ```
|
|
533
|
+
*/
|
|
534
|
+
declare function injectGuidance(messages: ModelMessage[], guidance: string, previousGuidance?: string): ModelMessage[];
|
|
535
|
+
//#endregion
|
|
536
|
+
//#region src/agents/chat/compaction/index.d.ts
|
|
537
|
+
declare const COMPACT_TOKEN_THRESHOLD = 140000;
|
|
538
|
+
/**
|
|
539
|
+
* Estimates token count for a message array using a 3.5 chars/token
|
|
540
|
+
* approximation — the same heuristic used by slack-bot. Counts text from
|
|
541
|
+
* text parts, tool inputs/outputs, and reasoning parts.
|
|
542
|
+
*/
|
|
543
|
+
declare function estimateMessagesTokens(messages: UIMessage[]): number;
|
|
544
|
+
/**
|
|
545
|
+
* Summarizes older messages into a single system message and appends the
|
|
546
|
+
* recent verbatim tail. Returns messages unchanged if the history is already
|
|
547
|
+
* short enough to fit within tailSize.
|
|
548
|
+
*/
|
|
549
|
+
declare function compactMessages(messages: UIMessage[], model: LanguageModel, tailSize: number): Promise<UIMessage[]>;
|
|
550
|
+
/**
|
|
551
|
+
* Entry point called from persistMessages once per turn.
|
|
552
|
+
*
|
|
553
|
+
* Returns messages unchanged when:
|
|
554
|
+
* - model is undefined (compaction disabled on this agent)
|
|
555
|
+
* - estimated token count is under COMPACT_TOKEN_THRESHOLD
|
|
556
|
+
*
|
|
557
|
+
* Otherwise delegates to compactMessages.
|
|
558
|
+
*/
|
|
559
|
+
declare function compactIfNeeded(messages: UIMessage[], model: LanguageModel | undefined, tailSize: number): Promise<UIMessage[]>;
|
|
560
|
+
//#endregion
|
|
561
|
+
export { AIChatAgent, AIChatAgentBase, COMPACT_TOKEN_THRESHOLD, type Skill, type SkillContext, type SkillsConfig, type SkillsResult, type Tool, compactIfNeeded, compactMessages, createSkills, estimateMessagesTokens, filterEphemeralMessages, injectGuidance, withSkills };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
import { convertToModelMessages, generateText, jsonSchema, stepCountIs, streamText, tool } from "ai";
|
|
2
|
+
import { AIChatAgent as AIChatAgent$1 } from "@cloudflare/ai-chat";
|
|
3
|
+
//#region src/features/skills/meta-tools.ts
|
|
4
|
+
/**
|
|
5
|
+
* Names and descriptions for the built-in meta tools.
|
|
6
|
+
*
|
|
7
|
+
* The execute logic for these lives in createSkills() where it has
|
|
8
|
+
* access to the closure state (loadedSkills).
|
|
9
|
+
*/
|
|
10
|
+
const ACTIVATE_SKILL = "activate_skill";
|
|
11
|
+
const LIST_CAPABILITIES = "list_capabilities";
|
|
12
|
+
/**
|
|
13
|
+
* Builds the tool description for activate_skill, including the
|
|
14
|
+
* current list of available skills with their descriptions.
|
|
15
|
+
*/
|
|
16
|
+
function buildActivateSkillDescription(skills) {
|
|
17
|
+
return [
|
|
18
|
+
"Load additional skills to help with the user's request.",
|
|
19
|
+
"Call this BEFORE attempting actions that need tools from unloaded skills.",
|
|
20
|
+
"",
|
|
21
|
+
"Available skills:",
|
|
22
|
+
skills.map((s) => `• ${s.name} — ${s.description}`).join("\n")
|
|
23
|
+
].join("\n");
|
|
24
|
+
}
|
|
25
|
+
const LIST_CAPABILITIES_DESCRIPTION = "List all tools currently available to you, which skills are loaded, and which can still be loaded. Call this when the user asks about your capabilities or what you can do.";
|
|
26
|
+
//#endregion
|
|
27
|
+
//#region src/features/skills/index.ts
|
|
28
|
+
/**
|
|
29
|
+
* Creates a skill loading system for use with the Vercel AI SDK.
|
|
30
|
+
*
|
|
31
|
+
* The agent starts with only its always-on tools active. The LLM can call
|
|
32
|
+
* activate_skill to load skill tools on demand. Which skills are loaded is
|
|
33
|
+
* persisted to D1 across turns — no message-history parsing required.
|
|
34
|
+
*
|
|
35
|
+
* Guidance from loaded skills is injected as a system message just before
|
|
36
|
+
* the current user turn, keeping the `system` prompt static and cacheable.
|
|
37
|
+
* prepareStep keeps the guidance message updated if new skills load mid-turn.
|
|
38
|
+
*
|
|
39
|
+
* Usage with streamText (ai v6):
|
|
40
|
+
* ```typescript
|
|
41
|
+
* import { streamText, convertToModelMessages, stepCountIs } from "ai";
|
|
42
|
+
*
|
|
43
|
+
* // initialLoadedSkills comes from D1 (read at turn start by the agent).
|
|
44
|
+
* // onSkillsChanged is called when new skills are loaded; the agent
|
|
45
|
+
* // buffers the value and writes it to D1 at turn end in persistMessages.
|
|
46
|
+
* const lt = createSkills({ tools, skills, initialLoadedSkills, onSkillsChanged });
|
|
47
|
+
* const messages = injectGuidance(modelMessages, lt.getLoadedGuidance());
|
|
48
|
+
*
|
|
49
|
+
* const result = streamText({
|
|
50
|
+
* model,
|
|
51
|
+
* system: baseSystemPrompt, // static — never contains guidance, stays cacheable
|
|
52
|
+
* messages,
|
|
53
|
+
* tools: lt.tools,
|
|
54
|
+
* activeTools: lt.activeTools,
|
|
55
|
+
* prepareStep: lt.prepareStep, // keeps guidance message updated mid-turn
|
|
56
|
+
* stopWhen: stepCountIs(20),
|
|
57
|
+
* });
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
function createSkills(config) {
|
|
61
|
+
const { tools: alwaysOnTools, skills, filterSkill, onSkillsChanged } = config;
|
|
62
|
+
const loadedSkills = new Set(config.initialLoadedSkills ?? []);
|
|
63
|
+
const skillMap = new Map(skills.map((s) => [s.name, s]));
|
|
64
|
+
const allTools = {};
|
|
65
|
+
const registeredNames = /* @__PURE__ */ new Set();
|
|
66
|
+
function registerTool(t) {
|
|
67
|
+
if (registeredNames.has(t.name)) return;
|
|
68
|
+
allTools[t.name] = tool({
|
|
69
|
+
description: t.description,
|
|
70
|
+
inputSchema: jsonSchema(t.parameters),
|
|
71
|
+
execute: async (args, options) => t.execute(args, { toolCallId: options.toolCallId })
|
|
72
|
+
});
|
|
73
|
+
registeredNames.add(t.name);
|
|
74
|
+
}
|
|
75
|
+
for (const t of alwaysOnTools) registerTool(t);
|
|
76
|
+
for (const skill of skills) for (const t of skill.tools) registerTool(t);
|
|
77
|
+
function getActiveToolNames() {
|
|
78
|
+
const names = [
|
|
79
|
+
ACTIVATE_SKILL,
|
|
80
|
+
LIST_CAPABILITIES,
|
|
81
|
+
...alwaysOnTools.map((t) => t.name)
|
|
82
|
+
];
|
|
83
|
+
for (const skillName of loadedSkills) {
|
|
84
|
+
const skill = skillMap.get(skillName);
|
|
85
|
+
if (!skill) continue;
|
|
86
|
+
for (const t of skill.tools) if (!names.includes(t.name)) names.push(t.name);
|
|
87
|
+
}
|
|
88
|
+
return names;
|
|
89
|
+
}
|
|
90
|
+
function getLoadedGuidance() {
|
|
91
|
+
return [...loadedSkills].map((name) => skillMap.get(name)?.guidance).filter((g) => Boolean(g)).join("\n\n");
|
|
92
|
+
}
|
|
93
|
+
let previousGuidance = getLoadedGuidance();
|
|
94
|
+
allTools[ACTIVATE_SKILL] = tool({
|
|
95
|
+
description: buildActivateSkillDescription(skills),
|
|
96
|
+
inputSchema: jsonSchema({
|
|
97
|
+
type: "object",
|
|
98
|
+
properties: { skills: {
|
|
99
|
+
type: "array",
|
|
100
|
+
items: {
|
|
101
|
+
type: "string",
|
|
102
|
+
enum: skills.map((s) => s.name)
|
|
103
|
+
},
|
|
104
|
+
description: "Skills to load"
|
|
105
|
+
} },
|
|
106
|
+
required: ["skills"]
|
|
107
|
+
}),
|
|
108
|
+
execute: async ({ skills: requested }) => {
|
|
109
|
+
const newlyLoaded = [];
|
|
110
|
+
const denied = [];
|
|
111
|
+
for (const skillName of requested) {
|
|
112
|
+
if (!skillMap.get(skillName)) continue;
|
|
113
|
+
if (loadedSkills.has(skillName)) continue;
|
|
114
|
+
if (!(filterSkill ? await filterSkill(skillName) : true)) {
|
|
115
|
+
denied.push(skillName);
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
loadedSkills.add(skillName);
|
|
119
|
+
newlyLoaded.push(skillName);
|
|
120
|
+
}
|
|
121
|
+
if (newlyLoaded.length > 0 && onSkillsChanged) await onSkillsChanged([...loadedSkills]);
|
|
122
|
+
if (newlyLoaded.length > 0) {
|
|
123
|
+
let result = `Loaded: ${newlyLoaded.join(", ")}.`;
|
|
124
|
+
if (denied.length > 0) result += ` Access denied for: ${denied.join(", ")}.`;
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
if (denied.length > 0) return `Access denied for: ${denied.join(", ")}.`;
|
|
128
|
+
return ALREADY_LOADED_OUTPUT;
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
allTools[LIST_CAPABILITIES] = tool({
|
|
132
|
+
description: LIST_CAPABILITIES_DESCRIPTION,
|
|
133
|
+
inputSchema: jsonSchema({
|
|
134
|
+
type: "object",
|
|
135
|
+
properties: {},
|
|
136
|
+
required: []
|
|
137
|
+
}),
|
|
138
|
+
execute: async () => {
|
|
139
|
+
const activeNames = getActiveToolNames();
|
|
140
|
+
const loadedNames = [...loadedSkills];
|
|
141
|
+
const unloaded = skills.filter((s) => !loadedSkills.has(s.name)).map((s) => s.name);
|
|
142
|
+
return [
|
|
143
|
+
`Active tools (${activeNames.length}): ${activeNames.join(", ")}`,
|
|
144
|
+
`Loaded skills: ${loadedNames.length > 0 ? loadedNames.join(", ") : "none"}`,
|
|
145
|
+
`Available to load: ${unloaded.length > 0 ? unloaded.join(", ") : "none"}`
|
|
146
|
+
].join("\n");
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
const prepareStep = async ({ messages }) => {
|
|
150
|
+
const guidance = getLoadedGuidance();
|
|
151
|
+
const updatedMessages = injectGuidance(messages, guidance, previousGuidance);
|
|
152
|
+
previousGuidance = guidance;
|
|
153
|
+
return {
|
|
154
|
+
activeTools: getActiveToolNames(),
|
|
155
|
+
messages: updatedMessages
|
|
156
|
+
};
|
|
157
|
+
};
|
|
158
|
+
return {
|
|
159
|
+
tools: allTools,
|
|
160
|
+
activeTools: getActiveToolNames(),
|
|
161
|
+
prepareStep,
|
|
162
|
+
getLoadedGuidance,
|
|
163
|
+
getLoadedSkills() {
|
|
164
|
+
return [...loadedSkills];
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
const ALREADY_LOADED_OUTPUT = "All requested skills were already loaded.";
|
|
169
|
+
const DENIED_OUTPUT_PREFIX = "Access denied for:";
|
|
170
|
+
/**
|
|
171
|
+
* Removes ephemeral messages from the conversation before it is saved to D1.
|
|
172
|
+
*
|
|
173
|
+
* Three kinds of messages are stripped:
|
|
174
|
+
*
|
|
175
|
+
* 1. list_capabilities tool calls — always stripped. Capability discovery is
|
|
176
|
+
* only relevant within the current turn; it adds no useful context for
|
|
177
|
+
* future turns.
|
|
178
|
+
*
|
|
179
|
+
* 2. activate_skill calls when nothing was newly loaded — stripped when all
|
|
180
|
+
* requested skills were already active, or when all were denied. In both
|
|
181
|
+
* cases nothing changed, so persisting the call would only add noise.
|
|
182
|
+
*
|
|
183
|
+
* 3. Guidance system messages — stripped by exact content match against the
|
|
184
|
+
* provided guidance string. Guidance is always recomputed from loaded skill
|
|
185
|
+
* definitions at turn start, so persisting it would create a redundant
|
|
186
|
+
* second source of truth alongside the loaded_skills D1 column.
|
|
187
|
+
*
|
|
188
|
+
* When skills ARE successfully loaded, the short "Loaded: X" result is kept
|
|
189
|
+
* in history for model context — so the model can see what was loaded in
|
|
190
|
+
* prior turns. Skill state is restored from D1 loaded_skills, not from these
|
|
191
|
+
* strings.
|
|
192
|
+
*
|
|
193
|
+
* If stripping leaves an assistant message with no parts, the entire message
|
|
194
|
+
* is dropped (e.g. a step that did nothing but call list_capabilities).
|
|
195
|
+
*/
|
|
196
|
+
function filterEphemeralMessages(messages, guidanceToStrip) {
|
|
197
|
+
return messages.flatMap((msg) => {
|
|
198
|
+
if (msg.role === "system" && guidanceToStrip) {
|
|
199
|
+
if (msg.parts?.some((p) => "text" in p && p.text === guidanceToStrip)) return [];
|
|
200
|
+
}
|
|
201
|
+
if (msg.role !== "assistant" || !msg.parts?.length) return [msg];
|
|
202
|
+
const filtered = msg.parts.filter((part) => {
|
|
203
|
+
if (!("toolCallId" in part)) return true;
|
|
204
|
+
const { type, output } = part;
|
|
205
|
+
if (type === `tool-list_capabilities`) return false;
|
|
206
|
+
if (type === `tool-activate_skill`) {
|
|
207
|
+
if (typeof output !== "string") return true;
|
|
208
|
+
return output !== ALREADY_LOADED_OUTPUT && !output.startsWith(DENIED_OUTPUT_PREFIX);
|
|
209
|
+
}
|
|
210
|
+
return true;
|
|
211
|
+
});
|
|
212
|
+
if (filtered.length === 0) return [];
|
|
213
|
+
if (filtered.length === msg.parts.length) return [msg];
|
|
214
|
+
return [{
|
|
215
|
+
...msg,
|
|
216
|
+
parts: filtered
|
|
217
|
+
}];
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Injects loaded skill guidance as a system message just before the last
|
|
222
|
+
* message in the array (typically the current user turn).
|
|
223
|
+
*
|
|
224
|
+
* Guidance is kept separate from the static `system` prompt so that the
|
|
225
|
+
* system prompt stays identical on every turn and can be prompt-cached.
|
|
226
|
+
* Positioning just before the last message means guidance survives any
|
|
227
|
+
* compaction strategy that preserves recent context.
|
|
228
|
+
*
|
|
229
|
+
* Pass `previousGuidance` (the string injected on the prior call) to remove
|
|
230
|
+
* the stale guidance message before inserting the updated one. Removal is by
|
|
231
|
+
* exact content match — not by role — so other system messages (memories,
|
|
232
|
+
* user preferences, etc.) are left untouched.
|
|
233
|
+
*
|
|
234
|
+
* At turn start, omit `previousGuidance` — guidance is never persisted to D1
|
|
235
|
+
* (it is stripped by filterEphemeralMessages before saving), so there is
|
|
236
|
+
* nothing to remove. prepareStep uses previousGuidance within a turn to
|
|
237
|
+
* handle guidance updates when new skills are loaded mid-turn.
|
|
238
|
+
*
|
|
239
|
+
* ```typescript
|
|
240
|
+
* // Turn start — just inject
|
|
241
|
+
* const messages = injectGuidance(modelMessages, skills.getLoadedGuidance());
|
|
242
|
+
*
|
|
243
|
+
* // prepareStep — remove stale guidance then re-inject updated guidance
|
|
244
|
+
* const messages = injectGuidance(stepMessages, newGuidance, previousGuidance);
|
|
245
|
+
* ```
|
|
246
|
+
*/
|
|
247
|
+
function injectGuidance(messages, guidance, previousGuidance) {
|
|
248
|
+
if (!guidance) return messages;
|
|
249
|
+
const base = previousGuidance ? messages.filter((m) => !(m.role === "system" && m.content === previousGuidance)) : messages;
|
|
250
|
+
return [
|
|
251
|
+
...base.slice(0, -1),
|
|
252
|
+
{
|
|
253
|
+
role: "system",
|
|
254
|
+
content: guidance
|
|
255
|
+
},
|
|
256
|
+
base.at(-1)
|
|
257
|
+
];
|
|
258
|
+
}
|
|
259
|
+
//#endregion
|
|
260
|
+
//#region src/agents/chat/compaction/index.ts
|
|
261
|
+
/**
|
|
262
|
+
* Message compaction for long-running conversations.
|
|
263
|
+
*
|
|
264
|
+
* When the stored conversation history exceeds COMPACT_TOKEN_THRESHOLD, older
|
|
265
|
+
* messages are summarised via an LLM call and replaced with a single system
|
|
266
|
+
* message containing the summary, followed by the recent verbatim tail.
|
|
267
|
+
*
|
|
268
|
+
* Entry point: compactIfNeeded() — called once per turn from persistMessages.
|
|
269
|
+
*
|
|
270
|
+
* To remove compaction entirely: delete this directory, remove the import in
|
|
271
|
+
* AIChatAgentBase, and change `toSave` back to `filtered`.
|
|
272
|
+
*/
|
|
273
|
+
const COMPACT_TOKEN_THRESHOLD = 14e4;
|
|
274
|
+
const TOOL_RESULT_PREVIEW_CHARS = 200;
|
|
275
|
+
const SUMMARY_MAX_TOKENS = 4e3;
|
|
276
|
+
/**
|
|
277
|
+
* Estimates token count for a message array using a 3.5 chars/token
|
|
278
|
+
* approximation — the same heuristic used by slack-bot. Counts text from
|
|
279
|
+
* text parts, tool inputs/outputs, and reasoning parts.
|
|
280
|
+
*/
|
|
281
|
+
function estimateMessagesTokens(messages) {
|
|
282
|
+
let totalChars = 0;
|
|
283
|
+
for (const msg of messages) {
|
|
284
|
+
if (!msg.parts) continue;
|
|
285
|
+
for (const part of msg.parts) {
|
|
286
|
+
if ((part.type === "text" || part.type === "reasoning") && "text" in part) {
|
|
287
|
+
totalChars += part.text.length;
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
if ("toolCallId" in part) {
|
|
291
|
+
const toolPart = part;
|
|
292
|
+
if (toolPart.input) totalChars += JSON.stringify(toolPart.input).length;
|
|
293
|
+
if (toolPart.output !== void 0) {
|
|
294
|
+
const outputStr = typeof toolPart.output === "string" ? toolPart.output : JSON.stringify(toolPart.output);
|
|
295
|
+
totalChars += outputStr.length;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return Math.ceil(totalChars / 3.5);
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Renders messages as human-readable text for the compaction summary prompt.
|
|
304
|
+
* Text parts are included verbatim; tool calls show name and a truncated result.
|
|
305
|
+
* step-start and empty messages are omitted.
|
|
306
|
+
*/
|
|
307
|
+
function formatMessagesForSummary(messages) {
|
|
308
|
+
const lines = [];
|
|
309
|
+
for (const msg of messages) {
|
|
310
|
+
const roleLabel = msg.role.charAt(0).toUpperCase() + msg.role.slice(1);
|
|
311
|
+
const parts = [];
|
|
312
|
+
for (const part of msg.parts ?? []) {
|
|
313
|
+
if (part.type === "step-start") continue;
|
|
314
|
+
if ((part.type === "text" || part.type === "reasoning") && "text" in part) {
|
|
315
|
+
const text = part.text.trim();
|
|
316
|
+
if (text) parts.push(text);
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
if ("toolCallId" in part) {
|
|
320
|
+
const toolPart = part;
|
|
321
|
+
const toolName = toolPart.type.startsWith("tool-") ? toolPart.type.slice(5) : toolPart.type;
|
|
322
|
+
const rawOutput = toolPart.output === void 0 ? "no result" : typeof toolPart.output === "string" ? toolPart.output : JSON.stringify(toolPart.output);
|
|
323
|
+
const preview = rawOutput.slice(0, TOOL_RESULT_PREVIEW_CHARS);
|
|
324
|
+
const ellipsis = rawOutput.length > TOOL_RESULT_PREVIEW_CHARS ? "..." : "";
|
|
325
|
+
parts.push(`[Tool: ${toolName}, result: ${preview}${ellipsis}]`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
if (parts.length > 0) lines.push(`${roleLabel}: ${parts.join(" ")}`);
|
|
329
|
+
}
|
|
330
|
+
return lines.join("\n\n");
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Calls the LLM to produce a concise summary of old + recent message windows.
|
|
334
|
+
* Weights the prompt toward recent exchanges, matching slack-bot's approach.
|
|
335
|
+
*/
|
|
336
|
+
async function generateCompactionSummary(oldMessages, recentMessages, model) {
|
|
337
|
+
const prompt = `Summarize this conversation history concisely for an AI assistant to continue the conversation.
|
|
338
|
+
Focus MORE on recent exchanges (what the user was working on, what tools were used, what was found).
|
|
339
|
+
Include key facts, decisions, and context needed to continue the conversation.
|
|
340
|
+
Keep entity names, numbers, file paths, and specific details that might be referenced later.
|
|
341
|
+
Do NOT include pleasantries or meta-commentary - just the essential context.
|
|
342
|
+
|
|
343
|
+
OLDER MESSAGES (summarize briefly):
|
|
344
|
+
${formatMessagesForSummary(oldMessages)}
|
|
345
|
+
|
|
346
|
+
RECENT MESSAGES (summarize with more detail - this is where the user currently is):
|
|
347
|
+
${formatMessagesForSummary(recentMessages)}
|
|
348
|
+
|
|
349
|
+
Write a concise summary:`;
|
|
350
|
+
try {
|
|
351
|
+
const { text } = await generateText({
|
|
352
|
+
model,
|
|
353
|
+
messages: [{
|
|
354
|
+
role: "user",
|
|
355
|
+
content: prompt
|
|
356
|
+
}],
|
|
357
|
+
maxOutputTokens: SUMMARY_MAX_TOKENS
|
|
358
|
+
});
|
|
359
|
+
return text || "Unable to summarize conversation history.";
|
|
360
|
+
} catch (error) {
|
|
361
|
+
console.error("Compaction summarization error:", error);
|
|
362
|
+
return "Unable to summarize conversation history.";
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Summarizes older messages into a single system message and appends the
|
|
367
|
+
* recent verbatim tail. Returns messages unchanged if the history is already
|
|
368
|
+
* short enough to fit within tailSize.
|
|
369
|
+
*/
|
|
370
|
+
async function compactMessages(messages, model, tailSize) {
|
|
371
|
+
if (messages.length <= tailSize) return messages;
|
|
372
|
+
const splitIndex = messages.length - tailSize;
|
|
373
|
+
const oldMessages = messages.slice(0, splitIndex);
|
|
374
|
+
const recentTail = messages.slice(splitIndex);
|
|
375
|
+
const summary = await generateCompactionSummary(oldMessages, recentTail, model);
|
|
376
|
+
return [{
|
|
377
|
+
id: `compact_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`,
|
|
378
|
+
role: "system",
|
|
379
|
+
parts: [{
|
|
380
|
+
type: "text",
|
|
381
|
+
text: `[Conversation summary - older context was compacted]\n${summary}`,
|
|
382
|
+
state: "done"
|
|
383
|
+
}]
|
|
384
|
+
}, ...recentTail];
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Entry point called from persistMessages once per turn.
|
|
388
|
+
*
|
|
389
|
+
* Returns messages unchanged when:
|
|
390
|
+
* - model is undefined (compaction disabled on this agent)
|
|
391
|
+
* - estimated token count is under COMPACT_TOKEN_THRESHOLD
|
|
392
|
+
*
|
|
393
|
+
* Otherwise delegates to compactMessages.
|
|
394
|
+
*/
|
|
395
|
+
async function compactIfNeeded(messages, model, tailSize) {
|
|
396
|
+
if (!model || estimateMessagesTokens(messages) <= 14e4) return messages;
|
|
397
|
+
return compactMessages(messages, model, tailSize);
|
|
398
|
+
}
|
|
399
|
+
//#endregion
|
|
400
|
+
//#region src/agents/chat/AIChatAgentBase.ts
|
|
401
|
+
/**
|
|
402
|
+
* Base class for chat agents with lazy skill loading.
|
|
403
|
+
*
|
|
404
|
+
* Owns:
|
|
405
|
+
* - D1 persistence for loaded skill state (skill names survive DO eviction)
|
|
406
|
+
* - Ephemeral message filtering (list_capabilities, no-op activate_skill calls)
|
|
407
|
+
* - Message compaction (LLM summarisation when history exceeds token threshold)
|
|
408
|
+
* - History replay to newly connected clients (onConnect override)
|
|
409
|
+
* - Skill context preparation for use with the @withSkills decorator
|
|
410
|
+
*
|
|
411
|
+
* Conversation messages are stored in Durable Object SQLite, managed
|
|
412
|
+
* automatically by the Cloudflare AIChatAgent — no D1 write needed for messages.
|
|
413
|
+
*
|
|
414
|
+
* D1 is written only when skills change (activate_skill was called this turn),
|
|
415
|
+
* not on every turn.
|
|
416
|
+
*
|
|
417
|
+
* ## Usage
|
|
418
|
+
*
|
|
419
|
+
* Extend this class when you want full control over `streamText`. Implement
|
|
420
|
+
* `getTools()`, `getSkills()`, and your own `onChatMessage` decorated with
|
|
421
|
+
* `@withSkills`:
|
|
422
|
+
*
|
|
423
|
+
* ```typescript
|
|
424
|
+
* export class MyAgent extends AIChatAgentBase {
|
|
425
|
+
* getTools() { return []; }
|
|
426
|
+
* getSkills() { return [searchSkill, codeSkill]; }
|
|
427
|
+
* getDB() { return this.env.AGENT_DB; }
|
|
428
|
+
*
|
|
429
|
+
* @withSkills
|
|
430
|
+
* async onChatMessage(onFinish, ctx: SkillContext, options?) {
|
|
431
|
+
* const { messages, ...skillArgs } = ctx;
|
|
432
|
+
* return streamText({
|
|
433
|
+
* model: openai("gpt-4o"),
|
|
434
|
+
* system: "You are a helpful assistant.",
|
|
435
|
+
* messages,
|
|
436
|
+
* ...skillArgs,
|
|
437
|
+
* onFinish,
|
|
438
|
+
* stopWhen: stepCountIs(20),
|
|
439
|
+
* }).toUIMessageStreamResponse();
|
|
440
|
+
* }
|
|
441
|
+
* }
|
|
442
|
+
* ```
|
|
443
|
+
*
|
|
444
|
+
* For a batteries-included experience where the base class owns `onChatMessage`,
|
|
445
|
+
* extend `AIChatAgent` instead.
|
|
446
|
+
*/
|
|
447
|
+
var AIChatAgentBase = class extends AIChatAgent$1 {
|
|
448
|
+
/**
|
|
449
|
+
* Maximum number of messages stored in DO SQLite.
|
|
450
|
+
*
|
|
451
|
+
* Lowered from the Cloudflare AIChatAgent default of 200. When compaction
|
|
452
|
+
* is enabled, one slot is reserved for the summary message so the verbatim
|
|
453
|
+
* tail is maxPersistedMessages - 1 recent messages. Raise or lower per agent.
|
|
454
|
+
*/
|
|
455
|
+
maxPersistedMessages = 50;
|
|
456
|
+
/**
|
|
457
|
+
* Return a LanguageModel to use for compaction summarisation.
|
|
458
|
+
*
|
|
459
|
+
* Return undefined (default) to disable compaction — messages are kept up
|
|
460
|
+
* to maxPersistedMessages and older ones are dropped by the Cloudflare
|
|
461
|
+
* AIChatAgent's built-in hard cap.
|
|
462
|
+
*
|
|
463
|
+
* Override to use a cheaper or faster model for summarisation, or to enable
|
|
464
|
+
* compaction in subclasses that do not override it automatically.
|
|
465
|
+
*/
|
|
466
|
+
getCompactionModel() {}
|
|
467
|
+
/**
|
|
468
|
+
* Return the D1 database binding for persisting loaded skill state.
|
|
469
|
+
*
|
|
470
|
+
* Override in your subclass to return the binding from env:
|
|
471
|
+
* ```typescript
|
|
472
|
+
* protected getDB() { return this.env.AGENT_DB; }
|
|
473
|
+
* ```
|
|
474
|
+
*
|
|
475
|
+
* Defaults to undefined — when undefined, loaded skills reset on every new
|
|
476
|
+
* conversation (skills still work within a turn, just not across turns).
|
|
477
|
+
*/
|
|
478
|
+
getDB() {}
|
|
479
|
+
/**
|
|
480
|
+
* Optional permission hook. Return false to deny the agent access to a
|
|
481
|
+
* skill when activate_skill is called. Defaults to allow-all.
|
|
482
|
+
*/
|
|
483
|
+
async filterSkill(_skillName) {
|
|
484
|
+
return true;
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Buffered skill state from the current turn.
|
|
488
|
+
*
|
|
489
|
+
* Set by the onSkillsChanged callback when activate_skill loads new skills
|
|
490
|
+
* mid-turn. Flushed to D1 in persistMessages at turn end — only written
|
|
491
|
+
* when this value is set, so D1 is not touched on turns where no new skills
|
|
492
|
+
* are loaded.
|
|
493
|
+
*/
|
|
494
|
+
_pendingSkills;
|
|
495
|
+
/**
|
|
496
|
+
* Reads loaded skill names from D1 for this agent.
|
|
497
|
+
*
|
|
498
|
+
* Returns an empty array if no record exists (first turn, or no skills
|
|
499
|
+
* loaded yet). Conversation messages are not read here — the Cloudflare
|
|
500
|
+
* AIChatAgent provides those via this.messages from DO SQLite.
|
|
501
|
+
*/
|
|
502
|
+
async _readSkillState() {
|
|
503
|
+
const row = await this.getDB()?.prepare("SELECT loaded_skills FROM agent_state WHERE agent_id = ?").bind(this.name).first();
|
|
504
|
+
if (!row?.loaded_skills) return [];
|
|
505
|
+
return JSON.parse(row.loaded_skills);
|
|
506
|
+
}
|
|
507
|
+
/**
|
|
508
|
+
* Writes loaded skill names to D1 for this agent.
|
|
509
|
+
*
|
|
510
|
+
* Uses INSERT OR REPLACE so the first skill load creates the row and
|
|
511
|
+
* subsequent loads update it. Only called when skills actually changed
|
|
512
|
+
* this turn (_pendingSkills is set).
|
|
513
|
+
*/
|
|
514
|
+
async _writeSkillState(skills) {
|
|
515
|
+
await this.getDB()?.prepare("INSERT OR REPLACE INTO agent_state (agent_id, loaded_skills, last_updated) VALUES (?, ?, ?)").bind(this.name, JSON.stringify(skills), Date.now()).run();
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Flush persisted message history to a newly connected client.
|
|
519
|
+
*
|
|
520
|
+
* The Cloudflare AIChatAgent broadcasts message updates to existing
|
|
521
|
+
* connections via persistMessages, but does nothing for connections that
|
|
522
|
+
* arrive after a conversation has ended. Without this override, a page
|
|
523
|
+
* refresh produces an empty UI even though the history is intact in DO SQLite.
|
|
524
|
+
*
|
|
525
|
+
* Skips replay when a stream is active — CF_AGENT_STREAM_RESUMING handles
|
|
526
|
+
* that case and replays in-progress chunks via its own protocol.
|
|
527
|
+
*/
|
|
528
|
+
async onConnect(connection, ctx) {
|
|
529
|
+
await super.onConnect(connection, ctx);
|
|
530
|
+
if (!this._activeStreamId && this.messages.length > 0) connection.send(JSON.stringify({
|
|
531
|
+
type: "cf_agent_chat_messages",
|
|
532
|
+
messages: this.messages
|
|
533
|
+
}));
|
|
534
|
+
}
|
|
535
|
+
/**
|
|
536
|
+
* Strips ephemeral content, conditionally saves skill state to D1, then
|
|
537
|
+
* delegates to super for DO SQLite persistence and WebSocket broadcast.
|
|
538
|
+
*
|
|
539
|
+
* The Cloudflare AIChatAgent calls persistMessages once per turn after all
|
|
540
|
+
* steps complete, so overriding here is the correct place to act — it runs
|
|
541
|
+
* after the full assistant message (including all tool results) is assembled.
|
|
542
|
+
*
|
|
543
|
+
* Two things happen here:
|
|
544
|
+
*
|
|
545
|
+
* 1. Ephemeral tool calls are stripped — list_capabilities (always) and
|
|
546
|
+
* activate_skill when nothing was newly loaded (no state change).
|
|
547
|
+
*
|
|
548
|
+
* 2. If skills changed this turn (_pendingSkills is set), the updated list
|
|
549
|
+
* is written to D1. Turns where no skills were loaded do not touch D1.
|
|
550
|
+
*
|
|
551
|
+
* Message persistence itself is handled by super.persistMessages, which
|
|
552
|
+
* writes to DO SQLite — no D1 write needed for messages.
|
|
553
|
+
*/
|
|
554
|
+
async persistMessages(messages, excludeBroadcastIds = [], options) {
|
|
555
|
+
const filtered = filterEphemeralMessages(messages);
|
|
556
|
+
if (this._pendingSkills !== void 0) {
|
|
557
|
+
await this._writeSkillState(this._pendingSkills);
|
|
558
|
+
this._pendingSkills = void 0;
|
|
559
|
+
}
|
|
560
|
+
const toSave = await compactIfNeeded(filtered, this.getCompactionModel(), this.maxPersistedMessages - 1);
|
|
561
|
+
return super.persistMessages(toSave, excludeBroadcastIds, options);
|
|
562
|
+
}
|
|
563
|
+
/**
|
|
564
|
+
* Widened onChatMessage signature that accommodates the @withSkills decorator.
|
|
565
|
+
*
|
|
566
|
+
* The decorator transforms the consumer's 3-arg form (onFinish, ctx, options) into
|
|
567
|
+
* a 2-arg wrapper at runtime. This declaration widens the base class signature so
|
|
568
|
+
* that TypeScript accepts the consumer's 3-arg override without errors.
|
|
569
|
+
*
|
|
570
|
+
* @ts-ignore — intentional: widens the Cloudflare AIChatAgent's (onFinish, options?) signature.
|
|
571
|
+
*/
|
|
572
|
+
onChatMessage(onFinish, ctxOrOptions) {
|
|
573
|
+
return super.onChatMessage(onFinish, ctxOrOptions);
|
|
574
|
+
}
|
|
575
|
+
/**
|
|
576
|
+
* Called by the @withSkills decorator at the start of each turn.
|
|
577
|
+
*
|
|
578
|
+
* Reads loaded skill state from D1, seeds createSkills, injects guidance,
|
|
579
|
+
* and returns a SkillContext ready to use in a streamText call.
|
|
580
|
+
*
|
|
581
|
+
* The returned `messages` already has guidance injected just before the
|
|
582
|
+
* current user turn — pass it directly as the `messages` param of streamText.
|
|
583
|
+
* Guidance is never stored in DO SQLite, so loaded_skills in D1 is the
|
|
584
|
+
* single source of truth for which skills are active.
|
|
585
|
+
*/
|
|
586
|
+
async _prepareSkillContext() {
|
|
587
|
+
const loadedSkills = await this._readSkillState();
|
|
588
|
+
const skills = createSkills({
|
|
589
|
+
tools: this.getTools(),
|
|
590
|
+
skills: this.getSkills(),
|
|
591
|
+
initialLoadedSkills: loadedSkills,
|
|
592
|
+
onSkillsChanged: async (updated) => {
|
|
593
|
+
this._pendingSkills = updated;
|
|
594
|
+
},
|
|
595
|
+
filterSkill: (name) => this.filterSkill(name)
|
|
596
|
+
});
|
|
597
|
+
const guidance = skills.getLoadedGuidance();
|
|
598
|
+
const messages = injectGuidance(await convertToModelMessages(this.messages), guidance);
|
|
599
|
+
return {
|
|
600
|
+
tools: skills.tools,
|
|
601
|
+
activeTools: skills.activeTools,
|
|
602
|
+
prepareStep: skills.prepareStep,
|
|
603
|
+
messages
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
};
|
|
607
|
+
function withSkills(fn, _context) {
|
|
608
|
+
const wrapper = async function(onFinish, maybeOptions) {
|
|
609
|
+
const ctx = await this._prepareSkillContext();
|
|
610
|
+
return fn.call(this, onFinish, ctx, maybeOptions);
|
|
611
|
+
};
|
|
612
|
+
return wrapper;
|
|
613
|
+
}
|
|
614
|
+
//#endregion
|
|
615
|
+
//#region src/agents/chat/AIChatAgent.ts
|
|
616
|
+
/**
|
|
617
|
+
* Batteries-included base class for chat agents with lazy skill loading.
|
|
618
|
+
*
|
|
619
|
+
* Owns the full `onChatMessage` lifecycle. Implement four abstract methods and
|
|
620
|
+
* get lazy skill loading, cross-turn skill persistence, guidance injection,
|
|
621
|
+
* ephemeral message cleanup, and message compaction for free.
|
|
622
|
+
*
|
|
623
|
+
* Conversation messages are stored in Durable Object SQLite by the Cloudflare
|
|
624
|
+
* AIChatAgent automatically — available as this.messages at the start of each
|
|
625
|
+
* turn. Loaded skill state is stored in D1 (via getDB()) and read at turn start.
|
|
626
|
+
* Guidance is injected as a system message just before the current user turn,
|
|
627
|
+
* keeping the `system` param static and cacheable across all turns.
|
|
628
|
+
*
|
|
629
|
+
* ```typescript
|
|
630
|
+
* export class MyAgent extends AIChatAgent {
|
|
631
|
+
* getModel() { return openai("gpt-4o"); }
|
|
632
|
+
* getTools() { return []; }
|
|
633
|
+
* getSkills() { return [searchSkill, codeSkill]; }
|
|
634
|
+
* getSystemPrompt() { return "You are a helpful assistant."; }
|
|
635
|
+
* getDB() { return this.env.AGENT_DB; }
|
|
636
|
+
* }
|
|
637
|
+
* ```
|
|
638
|
+
*
|
|
639
|
+
* If you need full control over the `streamText` call (custom model options,
|
|
640
|
+
* streaming transforms, varying the model per request, etc.) use
|
|
641
|
+
* `AIChatAgentBase` with the `@withSkills` decorator instead.
|
|
642
|
+
*/
|
|
643
|
+
var AIChatAgent = class extends AIChatAgentBase {
|
|
644
|
+
/**
|
|
645
|
+
* Return the model used for compaction summarisation.
|
|
646
|
+
*
|
|
647
|
+
* Defaults to getModel() — the agent's primary model — so compaction is
|
|
648
|
+
* enabled automatically. Override to substitute a cheaper or faster model
|
|
649
|
+
* for summarisation (e.g. a smaller model when the primary is expensive).
|
|
650
|
+
*
|
|
651
|
+
* To opt out of message compaction: override and return undefined.
|
|
652
|
+
*/
|
|
653
|
+
getCompactionModel() {
|
|
654
|
+
return this.getModel();
|
|
655
|
+
}
|
|
656
|
+
async onChatMessage(onFinish, options) {
|
|
657
|
+
const loadedSkills = await this._readSkillState();
|
|
658
|
+
const skills = createSkills({
|
|
659
|
+
tools: this.getTools(),
|
|
660
|
+
skills: this.getSkills(),
|
|
661
|
+
initialLoadedSkills: loadedSkills,
|
|
662
|
+
onSkillsChanged: async (updated) => {
|
|
663
|
+
this._pendingSkills = updated;
|
|
664
|
+
},
|
|
665
|
+
filterSkill: (name) => this.filterSkill(name)
|
|
666
|
+
});
|
|
667
|
+
const guidance = skills.getLoadedGuidance();
|
|
668
|
+
const messages = injectGuidance(await convertToModelMessages(this.messages), guidance);
|
|
669
|
+
return streamText({
|
|
670
|
+
model: this.getModel(),
|
|
671
|
+
system: this.getSystemPrompt(),
|
|
672
|
+
messages,
|
|
673
|
+
tools: skills.tools,
|
|
674
|
+
activeTools: skills.activeTools,
|
|
675
|
+
prepareStep: skills.prepareStep,
|
|
676
|
+
stopWhen: stepCountIs(20),
|
|
677
|
+
abortSignal: options?.abortSignal,
|
|
678
|
+
onFinish
|
|
679
|
+
}).toUIMessageStreamResponse();
|
|
680
|
+
}
|
|
681
|
+
};
|
|
682
|
+
//#endregion
|
|
683
|
+
export { AIChatAgent, AIChatAgentBase, COMPACT_TOKEN_THRESHOLD, compactIfNeeded, compactMessages, createSkills, estimateMessagesTokens, filterEphemeralMessages, injectGuidance, withSkills };
|
package/package.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@economic/agents",
|
|
3
|
+
"version": "0.0.1-alpha.2",
|
|
4
|
+
"description": "A starter for creating a TypeScript package.",
|
|
5
|
+
"homepage": "https://github.com/author/library#readme",
|
|
6
|
+
"bugs": {
|
|
7
|
+
"url": "https://github.com/author/library/issues"
|
|
8
|
+
},
|
|
9
|
+
"license": "MIT",
|
|
10
|
+
"author": "Author Name <author.name@mail.com>",
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/author/library.git"
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"type": "module",
|
|
19
|
+
"exports": {
|
|
20
|
+
".": "./dist/index.mjs",
|
|
21
|
+
"./package.json": "./package.json"
|
|
22
|
+
},
|
|
23
|
+
"scripts": {
|
|
24
|
+
"build": "vp pack",
|
|
25
|
+
"dev": "vp pack --watch",
|
|
26
|
+
"test": "vp test",
|
|
27
|
+
"typecheck": "tsc --noEmit",
|
|
28
|
+
"release": "bumpp",
|
|
29
|
+
"prepublishOnly": "npm run build"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"@cloudflare/ai-chat": "^0.1.9",
|
|
33
|
+
"ai": "^6.0.116"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@cloudflare/workers-types": "^4.20260317.1",
|
|
37
|
+
"@types/node": "^25.5.0",
|
|
38
|
+
"@typescript/native-preview": "7.0.0-dev.20260316.1",
|
|
39
|
+
"bumpp": "^11.0.1",
|
|
40
|
+
"typescript": "^5.9.3",
|
|
41
|
+
"vite-plus": "latest",
|
|
42
|
+
"vitest": "npm:@voidzero-dev/vite-plus-test@latest"
|
|
43
|
+
},
|
|
44
|
+
"inlinedDependencies": {
|
|
45
|
+
"partyserver": "0.3.3",
|
|
46
|
+
"zod": "3.25.76",
|
|
47
|
+
"@modelcontextprotocol/sdk": "1.26.0",
|
|
48
|
+
"agents": "0.7.6"
|
|
49
|
+
}
|
|
50
|
+
}
|