stratus-sdk 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +429 -0
- package/dist/azure/chat-completions-model.d.ts +19 -0
- package/dist/azure/chat-completions-model.d.ts.map +1 -0
- package/dist/azure/chat-completions-model.js +256 -0
- package/dist/azure/chat-completions-model.js.map +1 -0
- package/dist/azure/endpoint.d.ts +18 -0
- package/dist/azure/endpoint.d.ts.map +1 -0
- package/dist/azure/endpoint.js +56 -0
- package/dist/azure/endpoint.js.map +1 -0
- package/dist/azure/index.d.ts +5 -0
- package/dist/azure/index.d.ts.map +1 -0
- package/dist/azure/index.js +3 -0
- package/dist/azure/index.js.map +1 -0
- package/dist/azure/responses-model.d.ts +20 -0
- package/dist/azure/responses-model.d.ts.map +1 -0
- package/dist/azure/responses-model.js +373 -0
- package/dist/azure/responses-model.js.map +1 -0
- package/dist/azure/sse-parser.d.ts +2 -0
- package/dist/azure/sse-parser.d.ts.map +1 -0
- package/dist/azure/sse-parser.js +39 -0
- package/dist/azure/sse-parser.js.map +1 -0
- package/dist/core/agent.d.ts +47 -0
- package/dist/core/agent.d.ts.map +1 -0
- package/dist/core/agent.js +74 -0
- package/dist/core/agent.js.map +1 -0
- package/dist/core/context.d.ts +10 -0
- package/dist/core/context.d.ts.map +1 -0
- package/dist/core/context.js +32 -0
- package/dist/core/context.js.map +1 -0
- package/dist/core/cost.d.ts +9 -0
- package/dist/core/cost.d.ts.map +1 -0
- package/dist/core/cost.js +14 -0
- package/dist/core/cost.js.map +1 -0
- package/dist/core/errors.d.ts +43 -0
- package/dist/core/errors.d.ts.map +1 -0
- package/dist/core/errors.js +75 -0
- package/dist/core/errors.js.map +1 -0
- package/dist/core/guardrails.d.ts +15 -0
- package/dist/core/guardrails.d.ts.map +1 -0
- package/dist/core/guardrails.js +24 -0
- package/dist/core/guardrails.js.map +1 -0
- package/dist/core/handoff.d.ts +18 -0
- package/dist/core/handoff.d.ts.map +1 -0
- package/dist/core/handoff.js +32 -0
- package/dist/core/handoff.js.map +1 -0
- package/dist/core/hooks.d.ts +90 -0
- package/dist/core/hooks.d.ts.map +1 -0
- package/dist/core/hooks.js +2 -0
- package/dist/core/hooks.js.map +1 -0
- package/dist/core/index.d.ts +27 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +14 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/model.d.ts +49 -0
- package/dist/core/model.d.ts.map +1 -0
- package/dist/core/model.js +2 -0
- package/dist/core/model.js.map +1 -0
- package/dist/core/result.d.ts +25 -0
- package/dist/core/result.d.ts.map +1 -0
- package/dist/core/result.js +21 -0
- package/dist/core/result.js.map +1 -0
- package/dist/core/run.d.ts +22 -0
- package/dist/core/run.d.ts.map +1 -0
- package/dist/core/run.js +688 -0
- package/dist/core/run.js.map +1 -0
- package/dist/core/session.d.ts +64 -0
- package/dist/core/session.d.ts.map +1 -0
- package/dist/core/session.js +143 -0
- package/dist/core/session.js.map +1 -0
- package/dist/core/subagent.d.ts +30 -0
- package/dist/core/subagent.d.ts.map +1 -0
- package/dist/core/subagent.js +52 -0
- package/dist/core/subagent.js.map +1 -0
- package/dist/core/tool.d.ts +20 -0
- package/dist/core/tool.d.ts.map +1 -0
- package/dist/core/tool.js +21 -0
- package/dist/core/tool.js.map +1 -0
- package/dist/core/tracing.d.ts +31 -0
- package/dist/core/tracing.d.ts.map +1 -0
- package/dist/core/tracing.js +62 -0
- package/dist/core/tracing.js.map +1 -0
- package/dist/core/types.d.ts +90 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +2 -0
- package/dist/core/types.js.map +1 -0
- package/dist/core/utils/zod.d.ts +5 -0
- package/dist/core/utils/zod.d.ts.map +1 -0
- package/dist/core/utils/zod.js +73 -0
- package/dist/core/utils/zod.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -0
- package/package.json +38 -0
package/README.md
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src=".github/logo.svg" alt="Stratus" width="80" height="80">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# Stratus
|
|
6
|
+
|
|
7
|
+
A TypeScript agent framework for Azure OpenAI. Build multi-agent systems with tools, handoffs, guardrails, streaming, structured output, and more.
|
|
8
|
+
|
|
9
|
+
`agents` `tools` `streaming` `structured output` `handoffs` `subagents` `guardrails` `hooks` `tracing` `sessions` `abort signals`
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
bun add stratus
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Stratus requires [Zod](https://zod.dev) as a peer dependency:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
bun add zod
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
```ts
|
|
26
|
+
import { z } from "zod";
|
|
27
|
+
import { Agent, AzureChatCompletionsModel, run, tool } from "stratus";
|
|
28
|
+
|
|
29
|
+
const model = new AzureChatCompletionsModel({
|
|
30
|
+
endpoint: process.env.AZURE_OPENAI_ENDPOINT!,
|
|
31
|
+
apiKey: process.env.AZURE_OPENAI_API_KEY!,
|
|
32
|
+
deployment: "gpt-5.2",
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const getWeather = tool({
|
|
36
|
+
name: "get_weather",
|
|
37
|
+
description: "Get the current weather for a city",
|
|
38
|
+
parameters: z.object({
|
|
39
|
+
city: z.string().describe("The city name"),
|
|
40
|
+
}),
|
|
41
|
+
execute: async (_ctx, { city }) => {
|
|
42
|
+
return `72°F and sunny in ${city}`;
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const agent = new Agent({
|
|
47
|
+
name: "weather-assistant",
|
|
48
|
+
instructions: "You are a helpful weather assistant.",
|
|
49
|
+
model,
|
|
50
|
+
tools: [getWeather],
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const result = await run(agent, "What's the weather in New York?");
|
|
54
|
+
console.log(result.output);
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Core Concepts
|
|
58
|
+
|
|
59
|
+
### Agents
|
|
60
|
+
|
|
61
|
+
Agents are the primary building block. Each agent has a name, instructions, a model, and optional tools, handoffs, guardrails, and hooks.
|
|
62
|
+
|
|
63
|
+
```ts
|
|
64
|
+
const agent = new Agent({
|
|
65
|
+
name: "my-agent",
|
|
66
|
+
instructions: "You are a helpful assistant.",
|
|
67
|
+
model,
|
|
68
|
+
tools: [myTool],
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Dynamic instructions based on context
|
|
72
|
+
const agent = new Agent({
|
|
73
|
+
name: "my-agent",
|
|
74
|
+
instructions: (ctx) => `You are helping ${ctx.userName}.`,
|
|
75
|
+
model,
|
|
76
|
+
});
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Tools
|
|
80
|
+
|
|
81
|
+
Define tools with Zod schemas for type-safe parameter validation:
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
const searchTool = tool({
|
|
85
|
+
name: "search",
|
|
86
|
+
description: "Search for information",
|
|
87
|
+
parameters: z.object({
|
|
88
|
+
query: z.string().describe("Search query"),
|
|
89
|
+
limit: z.number().optional().describe("Max results"),
|
|
90
|
+
}),
|
|
91
|
+
execute: async (context, { query, limit }) => {
|
|
92
|
+
// Tool logic here
|
|
93
|
+
return "search results";
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Streaming
|
|
99
|
+
|
|
100
|
+
Stream responses token-by-token:
|
|
101
|
+
|
|
102
|
+
```ts
|
|
103
|
+
const { stream: s, result } = stream(agent, "Tell me a story");
|
|
104
|
+
|
|
105
|
+
for await (const event of s) {
|
|
106
|
+
if (event.type === "content_delta") {
|
|
107
|
+
process.stdout.write(event.content);
|
|
108
|
+
} else if (event.type === "tool_call_start") {
|
|
109
|
+
console.log(`Calling: ${event.toolCall.name}`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const finalResult = await result;
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Structured Output
|
|
117
|
+
|
|
118
|
+
Use Zod schemas to get typed, validated output:
|
|
119
|
+
|
|
120
|
+
```ts
|
|
121
|
+
const PersonSchema = z.object({
|
|
122
|
+
name: z.string(),
|
|
123
|
+
age: z.number(),
|
|
124
|
+
occupation: z.string(),
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
const agent = new Agent({
|
|
128
|
+
name: "extractor",
|
|
129
|
+
instructions: "Extract person information.",
|
|
130
|
+
model,
|
|
131
|
+
outputType: PersonSchema,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const result = await run(agent, "Marie Curie was a 66-year-old physicist.");
|
|
135
|
+
console.log(result.finalOutput); // { name: "Marie Curie", age: 66, occupation: "physicist" }
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Sessions
|
|
139
|
+
|
|
140
|
+
Sessions maintain conversation history across multiple interactions:
|
|
141
|
+
|
|
142
|
+
```ts
|
|
143
|
+
import { createSession } from "stratus";
|
|
144
|
+
|
|
145
|
+
const session = createSession({ model, tools: [myTool] });
|
|
146
|
+
|
|
147
|
+
session.send("Hello!");
|
|
148
|
+
for await (const event of session.stream()) {
|
|
149
|
+
// handle events
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
session.send("Follow-up question");
|
|
153
|
+
for await (const event of session.stream()) {
|
|
154
|
+
// handle events
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Save and resume sessions
|
|
158
|
+
const snapshot = session.save();
|
|
159
|
+
const resumed = resumeSession(snapshot, { model });
|
|
160
|
+
|
|
161
|
+
// Fork a session (new ID, same history)
|
|
162
|
+
const forked = forkSession(snapshot, { model });
|
|
163
|
+
|
|
164
|
+
// Cleanup
|
|
165
|
+
session.close();
|
|
166
|
+
// Or use Symbol.asyncDispose:
|
|
167
|
+
await using session = createSession({ model });
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Handoffs
|
|
171
|
+
|
|
172
|
+
Transfer control between specialized agents:
|
|
173
|
+
|
|
174
|
+
```ts
|
|
175
|
+
import { handoff } from "stratus";
|
|
176
|
+
|
|
177
|
+
const orderAgent = new Agent({
|
|
178
|
+
name: "order_specialist",
|
|
179
|
+
instructions: "Help with order inquiries.",
|
|
180
|
+
model,
|
|
181
|
+
tools: [lookupOrder],
|
|
182
|
+
handoffDescription: "Transfer for order questions",
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
const triageAgent = new Agent({
|
|
186
|
+
name: "triage",
|
|
187
|
+
instructions: "Route to the right specialist.",
|
|
188
|
+
model,
|
|
189
|
+
handoffs: [
|
|
190
|
+
orderAgent, // shorthand
|
|
191
|
+
handoff({ // with options
|
|
192
|
+
agent: refundAgent,
|
|
193
|
+
onHandoff: () => console.log("Transferring..."),
|
|
194
|
+
}),
|
|
195
|
+
],
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
const result = await run(triageAgent, "Where is my order?");
|
|
199
|
+
console.log(result.lastAgent.name); // "order_specialist"
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Subagents
|
|
203
|
+
|
|
204
|
+
Delegate subtasks to child agents that run independently:
|
|
205
|
+
|
|
206
|
+
```ts
|
|
207
|
+
import { subagent } from "stratus";
|
|
208
|
+
|
|
209
|
+
const researcher = new Agent({
|
|
210
|
+
name: "researcher",
|
|
211
|
+
instructions: "Research topics thoroughly.",
|
|
212
|
+
model,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const parentAgent = new Agent({
|
|
216
|
+
name: "parent",
|
|
217
|
+
instructions: "Use the researcher for deep dives.",
|
|
218
|
+
model,
|
|
219
|
+
subagents: [
|
|
220
|
+
subagent({
|
|
221
|
+
agent: researcher,
|
|
222
|
+
inputSchema: z.object({ topic: z.string() }),
|
|
223
|
+
mapInput: ({ topic }) => `Research: ${topic}`,
|
|
224
|
+
}),
|
|
225
|
+
],
|
|
226
|
+
});
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Guardrails
|
|
230
|
+
|
|
231
|
+
Validate inputs and outputs with guardrails:
|
|
232
|
+
|
|
233
|
+
```ts
|
|
234
|
+
import type { InputGuardrail, OutputGuardrail } from "stratus";
|
|
235
|
+
|
|
236
|
+
const profanityFilter: InputGuardrail = {
|
|
237
|
+
name: "profanity_filter",
|
|
238
|
+
execute: (input) => ({
|
|
239
|
+
tripwireTriggered: containsProfanity(input),
|
|
240
|
+
outputInfo: "Blocked by profanity filter",
|
|
241
|
+
}),
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
const piiFilter: OutputGuardrail = {
|
|
245
|
+
name: "pii_filter",
|
|
246
|
+
execute: (output) => ({
|
|
247
|
+
tripwireTriggered: /\d{3}-\d{2}-\d{4}/.test(output),
|
|
248
|
+
outputInfo: "Output contained PII",
|
|
249
|
+
}),
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
const agent = new Agent({
|
|
253
|
+
name: "guarded",
|
|
254
|
+
model,
|
|
255
|
+
inputGuardrails: [profanityFilter],
|
|
256
|
+
outputGuardrails: [piiFilter],
|
|
257
|
+
});
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Guardrails run in parallel. When a tripwire is triggered, an `InputGuardrailTripwireTriggered` or `OutputGuardrailTripwireTriggered` error is thrown.
|
|
261
|
+
|
|
262
|
+
### Hooks
|
|
263
|
+
|
|
264
|
+
Lifecycle hooks for observability and control:
|
|
265
|
+
|
|
266
|
+
```ts
|
|
267
|
+
import type { AgentHooks } from "stratus";
|
|
268
|
+
|
|
269
|
+
const hooks: AgentHooks = {
|
|
270
|
+
beforeRun: ({ agent, input }) => { /* ... */ },
|
|
271
|
+
afterRun: ({ agent, result }) => { /* ... */ },
|
|
272
|
+
|
|
273
|
+
// Return a decision to allow, deny, or modify tool calls
|
|
274
|
+
beforeToolCall: ({ toolCall }) => {
|
|
275
|
+
if (toolCall.function.name === "dangerous_tool") {
|
|
276
|
+
return { decision: "deny", reason: "Not allowed" };
|
|
277
|
+
}
|
|
278
|
+
return { decision: "allow" };
|
|
279
|
+
},
|
|
280
|
+
afterToolCall: ({ toolCall, result }) => { /* ... */ },
|
|
281
|
+
|
|
282
|
+
// Allow or deny handoffs
|
|
283
|
+
beforeHandoff: ({ fromAgent, toAgent }) => {
|
|
284
|
+
return { decision: "allow" };
|
|
285
|
+
},
|
|
286
|
+
};
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Tracing
|
|
290
|
+
|
|
291
|
+
Opt-in tracing with zero overhead when inactive:
|
|
292
|
+
|
|
293
|
+
```ts
|
|
294
|
+
import { withTrace } from "stratus";
|
|
295
|
+
|
|
296
|
+
const { result, trace } = await withTrace("my-workflow", () =>
|
|
297
|
+
run(agent, "Hello"),
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
console.log(trace.id);
|
|
301
|
+
console.log(trace.duration);
|
|
302
|
+
for (const span of trace.spans) {
|
|
303
|
+
console.log(`[${span.type}] ${span.name} (${span.duration}ms)`);
|
|
304
|
+
// span.type: "model_call" | "tool_execution" | "handoff" | "guardrail" | "subagent" | "custom"
|
|
305
|
+
}
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Abort Signals
|
|
309
|
+
|
|
310
|
+
Cancel runs with `AbortSignal`:
|
|
311
|
+
|
|
312
|
+
```ts
|
|
313
|
+
const controller = new AbortController();
|
|
314
|
+
|
|
315
|
+
setTimeout(() => controller.abort(), 5000);
|
|
316
|
+
|
|
317
|
+
try {
|
|
318
|
+
const result = await run(agent, "Long task...", {
|
|
319
|
+
signal: controller.signal,
|
|
320
|
+
});
|
|
321
|
+
} catch (error) {
|
|
322
|
+
if (error instanceof RunAbortedError) {
|
|
323
|
+
console.log("Run was cancelled");
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### Tool Choice & Tool Use Behavior
|
|
329
|
+
|
|
330
|
+
Control how the model uses tools:
|
|
331
|
+
|
|
332
|
+
```ts
|
|
333
|
+
const agent = new Agent({
|
|
334
|
+
name: "my-agent",
|
|
335
|
+
model,
|
|
336
|
+
tools: [myTool],
|
|
337
|
+
modelSettings: {
|
|
338
|
+
// "auto" | "none" | "required" | { type: "function", function: { name: "..." } }
|
|
339
|
+
toolChoice: "required",
|
|
340
|
+
},
|
|
341
|
+
// "run_llm_again" (default) | "stop_on_first_tool" | { stopAtToolNames: ["..."] }
|
|
342
|
+
toolUseBehavior: "stop_on_first_tool",
|
|
343
|
+
});
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
## Imports
|
|
347
|
+
|
|
348
|
+
Stratus provides three export paths:
|
|
349
|
+
|
|
350
|
+
```ts
|
|
351
|
+
// Everything (core + Azure)
|
|
352
|
+
import { Agent, run, tool, AzureChatCompletionsModel, AzureResponsesModel } from "stratus";
|
|
353
|
+
|
|
354
|
+
// Core only (provider-agnostic)
|
|
355
|
+
import { Agent, run, tool } from "stratus/core";
|
|
356
|
+
|
|
357
|
+
// Azure provider only
|
|
358
|
+
import { AzureChatCompletionsModel, AzureResponsesModel } from "stratus/azure";
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
## Configuration
|
|
362
|
+
|
|
363
|
+
### Azure OpenAI
|
|
364
|
+
|
|
365
|
+
Stratus includes two interchangeable Azure model implementations:
|
|
366
|
+
|
|
367
|
+
```ts
|
|
368
|
+
// Chat Completions API
|
|
369
|
+
const model = new AzureChatCompletionsModel({
|
|
370
|
+
endpoint: process.env.AZURE_OPENAI_ENDPOINT!,
|
|
371
|
+
apiKey: process.env.AZURE_OPENAI_API_KEY!,
|
|
372
|
+
deployment: "gpt-5.2",
|
|
373
|
+
apiVersion: "2025-03-01-preview", // optional, this is the default
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
// Responses API
|
|
377
|
+
const model = new AzureResponsesModel({
|
|
378
|
+
endpoint: process.env.AZURE_OPENAI_ENDPOINT!,
|
|
379
|
+
apiKey: process.env.AZURE_OPENAI_API_KEY!,
|
|
380
|
+
deployment: "gpt-5.2",
|
|
381
|
+
apiVersion: "2025-04-01-preview", // optional, this is the default
|
|
382
|
+
});
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
Both implement the same `Model` interface — swap one for the other without changing any agent, tool, or session code.
|
|
386
|
+
|
|
387
|
+
### Environment Variables
|
|
388
|
+
|
|
389
|
+
```
|
|
390
|
+
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
|
|
391
|
+
AZURE_OPENAI_API_KEY=your-api-key
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
## Error Handling
|
|
395
|
+
|
|
396
|
+
All errors extend `StratusError`:
|
|
397
|
+
|
|
398
|
+
| Error | Description |
|
|
399
|
+
|---|---|
|
|
400
|
+
| `StratusError` | Base error class |
|
|
401
|
+
| `ModelError` | API call failures (includes `status` and `code`) |
|
|
402
|
+
| `ContentFilterError` | Content filtered by Azure's content management policy |
|
|
403
|
+
| `MaxTurnsExceededError` | Agent exceeded the `maxTurns` limit |
|
|
404
|
+
| `OutputParseError` | Structured output failed Zod validation |
|
|
405
|
+
| `RunAbortedError` | Run cancelled via `AbortSignal` |
|
|
406
|
+
| `InputGuardrailTripwireTriggered` | Input guardrail blocked the request |
|
|
407
|
+
| `OutputGuardrailTripwireTriggered` | Output guardrail blocked the response |
|
|
408
|
+
|
|
409
|
+
```ts
|
|
410
|
+
import { ModelError, MaxTurnsExceededError, RunAbortedError } from "stratus";
|
|
411
|
+
|
|
412
|
+
try {
|
|
413
|
+
await run(agent, input);
|
|
414
|
+
} catch (error) {
|
|
415
|
+
if (error instanceof MaxTurnsExceededError) {
|
|
416
|
+
// Agent ran too many turns
|
|
417
|
+
} else if (error instanceof ModelError) {
|
|
418
|
+
console.log(error.status, error.code);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
## Development
|
|
424
|
+
|
|
425
|
+
```bash
|
|
426
|
+
bun test # Run tests
|
|
427
|
+
bun run lint # Lint with Biome
|
|
428
|
+
bun run typecheck # TypeScript type checking
|
|
429
|
+
```
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Model, ModelRequest, ModelRequestOptions, ModelResponse, StreamEvent } from "../core/model";
|
|
2
|
+
export interface AzureChatCompletionsModelConfig {
|
|
3
|
+
endpoint: string;
|
|
4
|
+
apiKey: string;
|
|
5
|
+
deployment: string;
|
|
6
|
+
apiVersion?: string;
|
|
7
|
+
}
|
|
8
|
+
export declare class AzureChatCompletionsModel implements Model {
|
|
9
|
+
private readonly url;
|
|
10
|
+
private readonly apiKey;
|
|
11
|
+
constructor(config: AzureChatCompletionsModelConfig);
|
|
12
|
+
getResponse(request: ModelRequest, options?: ModelRequestOptions): Promise<ModelResponse>;
|
|
13
|
+
getStreamedResponse(request: ModelRequest, options?: ModelRequestOptions): AsyncGenerator<StreamEvent>;
|
|
14
|
+
private buildRequestBody;
|
|
15
|
+
private doFetch;
|
|
16
|
+
private handleErrorResponse;
|
|
17
|
+
private parseResponse;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=chat-completions-model.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chat-completions-model.d.ts","sourceRoot":"","sources":["../../src/azure/chat-completions-model.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACX,KAAK,EACL,YAAY,EACZ,mBAAmB,EACnB,aAAa,EACb,WAAW,EAEX,MAAM,eAAe,CAAC;AAKvB,MAAM,WAAW,+BAA+B;IAC/C,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAID,qBAAa,yBAA0B,YAAW,KAAK;IACtD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAS;IAC7B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;gBAEpB,MAAM,EAAE,+BAA+B;IAS7C,WAAW,CAChB,OAAO,EAAE,YAAY,EACrB,OAAO,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,aAAa,CAAC;IAOlB,mBAAmB,CACzB,OAAO,EAAE,YAAY,EACrB,OAAO,CAAC,EAAE,mBAAmB,GAC3B,cAAc,CAAC,WAAW,CAAC;IAiG9B,OAAO,CAAC,gBAAgB;YAyCV,OAAO;YAmCP,mBAAmB;IA2BjC,OAAO,CAAC,aAAa;CAyCrB"}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import { ContentFilterError, ModelError } from "../core/errors";
|
|
2
|
+
import { resolveChatCompletionsUrl } from "./endpoint";
|
|
3
|
+
import { parseSSE } from "./sse-parser";
|
|
4
|
+
const DEFAULT_API_VERSION = "2025-03-01-preview";
|
|
5
|
+
export class AzureChatCompletionsModel {
|
|
6
|
+
url;
|
|
7
|
+
apiKey;
|
|
8
|
+
constructor(config) {
|
|
9
|
+
this.apiKey = config.apiKey;
|
|
10
|
+
this.url = resolveChatCompletionsUrl(config.endpoint, config.deployment, config.apiVersion ?? DEFAULT_API_VERSION);
|
|
11
|
+
}
|
|
12
|
+
async getResponse(request, options) {
|
|
13
|
+
const body = this.buildRequestBody(request, false);
|
|
14
|
+
const response = await this.doFetch(body, options?.signal);
|
|
15
|
+
const json = await response.json();
|
|
16
|
+
return this.parseResponse(json);
|
|
17
|
+
}
|
|
18
|
+
async *getStreamedResponse(request, options) {
|
|
19
|
+
const body = this.buildRequestBody(request, true);
|
|
20
|
+
const response = await this.doFetch(body, options?.signal);
|
|
21
|
+
if (!response.body) {
|
|
22
|
+
throw new ModelError("Response body is null");
|
|
23
|
+
}
|
|
24
|
+
let content = "";
|
|
25
|
+
const toolCalls = new Map();
|
|
26
|
+
let finishReason;
|
|
27
|
+
let usage;
|
|
28
|
+
for await (const data of parseSSE(response.body)) {
|
|
29
|
+
let chunk;
|
|
30
|
+
try {
|
|
31
|
+
chunk = JSON.parse(data);
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (chunk.usage) {
|
|
37
|
+
usage = {
|
|
38
|
+
promptTokens: chunk.usage.prompt_tokens,
|
|
39
|
+
completionTokens: chunk.usage.completion_tokens,
|
|
40
|
+
totalTokens: chunk.usage.total_tokens,
|
|
41
|
+
...(chunk.usage.prompt_tokens_details?.cached_tokens !== undefined
|
|
42
|
+
? { cacheReadTokens: chunk.usage.prompt_tokens_details.cached_tokens }
|
|
43
|
+
: {}),
|
|
44
|
+
...(chunk.usage.completion_tokens_details?.reasoning_tokens !== undefined
|
|
45
|
+
? { reasoningTokens: chunk.usage.completion_tokens_details.reasoning_tokens }
|
|
46
|
+
: {}),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
const choice = chunk.choices?.[0];
|
|
50
|
+
if (!choice)
|
|
51
|
+
continue;
|
|
52
|
+
if (choice.finish_reason) {
|
|
53
|
+
finishReason = choice.finish_reason;
|
|
54
|
+
}
|
|
55
|
+
const delta = choice.delta;
|
|
56
|
+
if (!delta)
|
|
57
|
+
continue;
|
|
58
|
+
if (delta.content) {
|
|
59
|
+
content += delta.content;
|
|
60
|
+
yield { type: "content_delta", content: delta.content };
|
|
61
|
+
}
|
|
62
|
+
if (delta.tool_calls) {
|
|
63
|
+
for (const tc of delta.tool_calls) {
|
|
64
|
+
const existing = toolCalls.get(tc.index);
|
|
65
|
+
if (!existing) {
|
|
66
|
+
const id = tc.id ?? "";
|
|
67
|
+
const name = tc.function?.name ?? "";
|
|
68
|
+
toolCalls.set(tc.index, { id, name, arguments: tc.function?.arguments ?? "" });
|
|
69
|
+
if (id && name) {
|
|
70
|
+
yield { type: "tool_call_start", toolCall: { id, name } };
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
if (tc.id)
|
|
75
|
+
existing.id = tc.id;
|
|
76
|
+
if (tc.function?.name)
|
|
77
|
+
existing.name = tc.function.name;
|
|
78
|
+
if (tc.function?.arguments) {
|
|
79
|
+
existing.arguments += tc.function.arguments;
|
|
80
|
+
yield {
|
|
81
|
+
type: "tool_call_delta",
|
|
82
|
+
toolCallId: existing.id,
|
|
83
|
+
arguments: tc.function.arguments,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const finalToolCalls = Array.from(toolCalls.values()).map((tc) => ({
|
|
91
|
+
id: tc.id,
|
|
92
|
+
type: "function",
|
|
93
|
+
function: { name: tc.name, arguments: tc.arguments },
|
|
94
|
+
}));
|
|
95
|
+
for (const tc of finalToolCalls) {
|
|
96
|
+
yield { type: "tool_call_done", toolCallId: tc.id };
|
|
97
|
+
}
|
|
98
|
+
yield {
|
|
99
|
+
type: "done",
|
|
100
|
+
response: {
|
|
101
|
+
content: content || null,
|
|
102
|
+
toolCalls: finalToolCalls,
|
|
103
|
+
usage,
|
|
104
|
+
finishReason,
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
buildRequestBody(request, stream) {
|
|
109
|
+
const body = {
|
|
110
|
+
messages: request.messages.map(serializeMessage),
|
|
111
|
+
};
|
|
112
|
+
if (stream) {
|
|
113
|
+
body.stream = true;
|
|
114
|
+
body.stream_options = { include_usage: true };
|
|
115
|
+
}
|
|
116
|
+
if (request.tools && request.tools.length > 0) {
|
|
117
|
+
body.tools = request.tools;
|
|
118
|
+
}
|
|
119
|
+
if (request.responseFormat) {
|
|
120
|
+
body.response_format = request.responseFormat;
|
|
121
|
+
}
|
|
122
|
+
const s = request.modelSettings;
|
|
123
|
+
if (s) {
|
|
124
|
+
if (s.temperature !== undefined)
|
|
125
|
+
body.temperature = s.temperature;
|
|
126
|
+
if (s.topP !== undefined)
|
|
127
|
+
body.top_p = s.topP;
|
|
128
|
+
if (s.maxTokens !== undefined)
|
|
129
|
+
body.max_tokens = s.maxTokens;
|
|
130
|
+
if (s.maxCompletionTokens !== undefined)
|
|
131
|
+
body.max_completion_tokens = s.maxCompletionTokens;
|
|
132
|
+
if (s.stop !== undefined)
|
|
133
|
+
body.stop = s.stop;
|
|
134
|
+
if (s.presencePenalty !== undefined)
|
|
135
|
+
body.presence_penalty = s.presencePenalty;
|
|
136
|
+
if (s.frequencyPenalty !== undefined)
|
|
137
|
+
body.frequency_penalty = s.frequencyPenalty;
|
|
138
|
+
if (s.toolChoice !== undefined)
|
|
139
|
+
body.tool_choice = s.toolChoice;
|
|
140
|
+
if (s.parallelToolCalls !== undefined)
|
|
141
|
+
body.parallel_tool_calls = s.parallelToolCalls;
|
|
142
|
+
if (s.seed !== undefined)
|
|
143
|
+
body.seed = s.seed;
|
|
144
|
+
if (s.reasoningEffort !== undefined)
|
|
145
|
+
body.reasoning_effort = s.reasoningEffort;
|
|
146
|
+
if (s.promptCacheKey !== undefined)
|
|
147
|
+
body.prompt_cache_key = s.promptCacheKey;
|
|
148
|
+
}
|
|
149
|
+
return body;
|
|
150
|
+
}
|
|
151
|
+
async doFetch(body, signal) {
|
|
152
|
+
const maxRetries = 3;
|
|
153
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
154
|
+
const response = await fetch(this.url, {
|
|
155
|
+
method: "POST",
|
|
156
|
+
headers: {
|
|
157
|
+
"Content-Type": "application/json",
|
|
158
|
+
"api-key": this.apiKey,
|
|
159
|
+
},
|
|
160
|
+
body: JSON.stringify(body),
|
|
161
|
+
signal,
|
|
162
|
+
});
|
|
163
|
+
if (response.status === 429 && attempt < maxRetries) {
|
|
164
|
+
const retryAfter = response.headers.get("retry-after");
|
|
165
|
+
const waitMs = retryAfter
|
|
166
|
+
? Number.parseInt(retryAfter, 10) * 1000
|
|
167
|
+
: Math.min(1000 * 2 ** attempt, 30000);
|
|
168
|
+
await new Promise((r) => setTimeout(r, waitMs));
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
if (!response.ok) {
|
|
172
|
+
await this.handleErrorResponse(response);
|
|
173
|
+
}
|
|
174
|
+
return response;
|
|
175
|
+
}
|
|
176
|
+
throw new ModelError("Max retries exceeded for Azure API request");
|
|
177
|
+
}
|
|
178
|
+
async handleErrorResponse(response) {
|
|
179
|
+
let errorBody;
|
|
180
|
+
try {
|
|
181
|
+
errorBody = await response.text();
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
errorBody = "";
|
|
185
|
+
}
|
|
186
|
+
if (response.status === 400) {
|
|
187
|
+
let parsed;
|
|
188
|
+
try {
|
|
189
|
+
parsed = JSON.parse(errorBody);
|
|
190
|
+
}
|
|
191
|
+
catch {
|
|
192
|
+
// ignore parse errors
|
|
193
|
+
}
|
|
194
|
+
if (parsed?.error?.code === "content_filter") {
|
|
195
|
+
throw new ContentFilterError(parsed.error.message, { status: 400 });
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
throw new ModelError(`Azure API error (${response.status}): ${errorBody || response.statusText}`, { status: response.status });
|
|
199
|
+
}
|
|
200
|
+
parseResponse(json) {
|
|
201
|
+
const choice = json.choices?.[0];
|
|
202
|
+
if (!choice) {
|
|
203
|
+
throw new ModelError("No choices in response");
|
|
204
|
+
}
|
|
205
|
+
if (choice.finish_reason === "content_filter") {
|
|
206
|
+
throw new ContentFilterError();
|
|
207
|
+
}
|
|
208
|
+
const toolCalls = (choice.message.tool_calls ?? []).map((tc) => ({
|
|
209
|
+
id: tc.id,
|
|
210
|
+
type: "function",
|
|
211
|
+
function: {
|
|
212
|
+
name: tc.function.name,
|
|
213
|
+
arguments: tc.function.arguments,
|
|
214
|
+
},
|
|
215
|
+
}));
|
|
216
|
+
const usage = json.usage
|
|
217
|
+
? {
|
|
218
|
+
promptTokens: json.usage.prompt_tokens,
|
|
219
|
+
completionTokens: json.usage.completion_tokens,
|
|
220
|
+
totalTokens: json.usage.total_tokens,
|
|
221
|
+
...(json.usage.prompt_tokens_details?.cached_tokens !== undefined
|
|
222
|
+
? { cacheReadTokens: json.usage.prompt_tokens_details.cached_tokens }
|
|
223
|
+
: {}),
|
|
224
|
+
...(json.usage.completion_tokens_details?.reasoning_tokens !== undefined
|
|
225
|
+
? { reasoningTokens: json.usage.completion_tokens_details.reasoning_tokens }
|
|
226
|
+
: {}),
|
|
227
|
+
}
|
|
228
|
+
: undefined;
|
|
229
|
+
return {
|
|
230
|
+
content: choice.message.content,
|
|
231
|
+
toolCalls,
|
|
232
|
+
usage,
|
|
233
|
+
finishReason: choice.finish_reason,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
function serializeMessage(msg) {
|
|
238
|
+
switch (msg.role) {
|
|
239
|
+
case "system":
|
|
240
|
+
return { role: "system", content: msg.content };
|
|
241
|
+
case "developer":
|
|
242
|
+
return { role: "developer", content: msg.content };
|
|
243
|
+
case "user":
|
|
244
|
+
return { role: "user", content: msg.content };
|
|
245
|
+
case "assistant": {
|
|
246
|
+
const out = { role: "assistant", content: msg.content };
|
|
247
|
+
if (msg.tool_calls && msg.tool_calls.length > 0) {
|
|
248
|
+
out.tool_calls = msg.tool_calls;
|
|
249
|
+
}
|
|
250
|
+
return out;
|
|
251
|
+
}
|
|
252
|
+
case "tool":
|
|
253
|
+
return { role: "tool", tool_call_id: msg.tool_call_id, content: msg.content };
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
//# sourceMappingURL=chat-completions-model.js.map
|