@radaros/core 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/agent.ts +113 -14
- package/src/agent/llm-loop.ts +31 -4
- package/src/agent/types.ts +13 -1
- package/src/index.ts +8 -1
- package/src/logger/logger.ts +19 -2
- package/src/memory/user-memory.ts +211 -0
- package/src/models/providers/anthropic.ts +34 -4
- package/src/models/providers/google.ts +29 -4
- package/src/models/providers/openai.ts +28 -6
- package/src/models/providers/vertex.ts +31 -4
- package/src/models/types.ts +12 -0
- package/src/tools/define-tool.ts +3 -1
- package/src/tools/tool-executor.ts +86 -4
- package/src/tools/types.ts +7 -0
- package/src/utils/retry.ts +56 -0
- package/dist/index.d.ts +0 -1317
- package/dist/index.js +0 -4823
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import { v4 as uuidv4 } from "uuid";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { InMemoryStorage } from "../storage/in-memory.js";
|
|
4
|
+
import type { StorageDriver } from "../storage/driver.js";
|
|
5
|
+
import type { ModelProvider } from "../models/provider.js";
|
|
6
|
+
import type { ChatMessage } from "../models/types.js";
|
|
7
|
+
import type { ToolDef } from "../tools/types.js";
|
|
8
|
+
|
|
9
|
+
const USER_MEMORY_NS = "memory:user";
|
|
10
|
+
|
|
11
|
+
export interface UserMemoryConfig {
|
|
12
|
+
storage?: StorageDriver;
|
|
13
|
+
/** LLM used for auto-extraction of facts from conversations. */
|
|
14
|
+
model?: ModelProvider;
|
|
15
|
+
/** Maximum number of facts stored per user (default 100). */
|
|
16
|
+
maxFacts?: number;
|
|
17
|
+
/** Whether auto-extraction is enabled (default true). */
|
|
18
|
+
enabled?: boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface UserFact {
|
|
22
|
+
id: string;
|
|
23
|
+
fact: string;
|
|
24
|
+
createdAt: Date;
|
|
25
|
+
source: "auto" | "manual";
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const EXTRACTION_PROMPT = `You are a memory extraction assistant. Analyze the conversation below and extract important facts about the user that would be useful for future personalization.
|
|
29
|
+
|
|
30
|
+
Rules:
|
|
31
|
+
- Extract concrete facts like preferences, location, profession, interests, goals, communication style
|
|
32
|
+
- Each fact should be a short, self-contained statement (e.g., "Lives in Mumbai", "Prefers concise answers")
|
|
33
|
+
- Do NOT extract transient information (e.g., "asked about weather today")
|
|
34
|
+
- Do NOT extract information about the assistant
|
|
35
|
+
- If there are no new meaningful facts, return an empty array
|
|
36
|
+
- Return ONLY a valid JSON array of strings, nothing else
|
|
37
|
+
|
|
38
|
+
Existing facts about this user (avoid duplicates):
|
|
39
|
+
{existingFacts}
|
|
40
|
+
|
|
41
|
+
Conversation:
|
|
42
|
+
{conversation}
|
|
43
|
+
|
|
44
|
+
Return a JSON array of new fact strings:`;
|
|
45
|
+
|
|
46
|
+
export class UserMemory {
|
|
47
|
+
private storage: StorageDriver;
|
|
48
|
+
private model?: ModelProvider;
|
|
49
|
+
private maxFacts: number;
|
|
50
|
+
private enabled: boolean;
|
|
51
|
+
private initPromise: Promise<void> | null = null;
|
|
52
|
+
|
|
53
|
+
constructor(config?: UserMemoryConfig) {
|
|
54
|
+
this.storage = config?.storage ?? new InMemoryStorage();
|
|
55
|
+
this.model = config?.model;
|
|
56
|
+
this.maxFacts = config?.maxFacts ?? 100;
|
|
57
|
+
this.enabled = config?.enabled ?? true;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private ensureInitialized(): Promise<void> {
|
|
61
|
+
if (!this.initPromise) {
|
|
62
|
+
this.initPromise = (async () => {
|
|
63
|
+
if (typeof (this.storage as any).initialize === "function") {
|
|
64
|
+
await (this.storage as any).initialize();
|
|
65
|
+
}
|
|
66
|
+
})();
|
|
67
|
+
}
|
|
68
|
+
return this.initPromise;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async getFacts(userId: string): Promise<UserFact[]> {
|
|
72
|
+
await this.ensureInitialized();
|
|
73
|
+
return (await this.storage.get<UserFact[]>(USER_MEMORY_NS, userId)) ?? [];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async addFacts(userId: string, facts: string[], source: "auto" | "manual" = "manual"): Promise<void> {
|
|
77
|
+
await this.ensureInitialized();
|
|
78
|
+
const existing = await this.getFacts(userId);
|
|
79
|
+
const existingSet = new Set(existing.map((f) => f.fact.toLowerCase()));
|
|
80
|
+
|
|
81
|
+
const newFacts: UserFact[] = [];
|
|
82
|
+
for (const fact of facts) {
|
|
83
|
+
const normalized = fact.trim();
|
|
84
|
+
if (!normalized || existingSet.has(normalized.toLowerCase())) continue;
|
|
85
|
+
newFacts.push({
|
|
86
|
+
id: uuidv4(),
|
|
87
|
+
fact: normalized,
|
|
88
|
+
createdAt: new Date(),
|
|
89
|
+
source,
|
|
90
|
+
});
|
|
91
|
+
existingSet.add(normalized.toLowerCase());
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (newFacts.length === 0) return;
|
|
95
|
+
|
|
96
|
+
let updated = [...existing, ...newFacts];
|
|
97
|
+
if (updated.length > this.maxFacts) {
|
|
98
|
+
updated = updated.slice(updated.length - this.maxFacts);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
await this.storage.set(USER_MEMORY_NS, userId, updated);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async removeFact(userId: string, factId: string): Promise<void> {
|
|
105
|
+
await this.ensureInitialized();
|
|
106
|
+
const existing = await this.getFacts(userId);
|
|
107
|
+
const updated = existing.filter((f) => f.id !== factId);
|
|
108
|
+
await this.storage.set(USER_MEMORY_NS, userId, updated);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async clear(userId: string): Promise<void> {
|
|
112
|
+
await this.ensureInitialized();
|
|
113
|
+
await this.storage.delete(USER_MEMORY_NS, userId);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async getContextString(userId: string): Promise<string> {
|
|
117
|
+
if (!this.enabled) return "";
|
|
118
|
+
const facts = await this.getFacts(userId);
|
|
119
|
+
if (facts.length === 0) return "";
|
|
120
|
+
const factList = facts.map((f) => `- ${f.fact}`).join("\n");
|
|
121
|
+
return `What you know about this user:\n${factList}`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
asTool(config?: { name?: string; description?: string }): ToolDef {
|
|
125
|
+
const mem = this;
|
|
126
|
+
return {
|
|
127
|
+
name: config?.name ?? "recall_user_facts",
|
|
128
|
+
description:
|
|
129
|
+
config?.description ??
|
|
130
|
+
"Retrieve stored facts about the current user — preferences, background, interests, and other personal details from past conversations. Call this when the user asks what you know or remember about them.",
|
|
131
|
+
parameters: z.object({}),
|
|
132
|
+
execute: async (_args, ctx) => {
|
|
133
|
+
const uid = ctx.userId;
|
|
134
|
+
if (!uid) return "No user identified for this session.";
|
|
135
|
+
const facts = await mem.getFacts(uid);
|
|
136
|
+
if (facts.length === 0) return "No stored facts about this user yet.";
|
|
137
|
+
return facts.map((f) => `- ${f.fact}`).join("\n");
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async extractAndStore(
|
|
143
|
+
userId: string,
|
|
144
|
+
messages: ChatMessage[],
|
|
145
|
+
fallbackModel?: ModelProvider
|
|
146
|
+
): Promise<void> {
|
|
147
|
+
if (!this.enabled) return;
|
|
148
|
+
|
|
149
|
+
const model = this.model ?? fallbackModel;
|
|
150
|
+
if (!model) return;
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
const existing = await this.getFacts(userId);
|
|
154
|
+
const existingStr =
|
|
155
|
+
existing.length > 0
|
|
156
|
+
? existing.map((f) => `- ${f.fact}`).join("\n")
|
|
157
|
+
: "(none)";
|
|
158
|
+
|
|
159
|
+
const conversationStr = messages
|
|
160
|
+
.filter((m) => m.role === "user" || m.role === "assistant")
|
|
161
|
+
.map((m) => {
|
|
162
|
+
const content = typeof m.content === "string" ? m.content : "(multimodal)";
|
|
163
|
+
return `${m.role}: ${content}`;
|
|
164
|
+
})
|
|
165
|
+
.join("\n");
|
|
166
|
+
|
|
167
|
+
const prompt = EXTRACTION_PROMPT
|
|
168
|
+
.replace("{existingFacts}", existingStr)
|
|
169
|
+
.replace("{conversation}", conversationStr);
|
|
170
|
+
|
|
171
|
+
const response = await model.generate(
|
|
172
|
+
[{ role: "user", content: prompt }],
|
|
173
|
+
{ temperature: 0, maxTokens: 500 }
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
const text =
|
|
177
|
+
typeof response.message.content === "string"
|
|
178
|
+
? response.message.content
|
|
179
|
+
: "";
|
|
180
|
+
|
|
181
|
+
if (!text) return;
|
|
182
|
+
|
|
183
|
+
const jsonStr = this.extractJsonArray(text);
|
|
184
|
+
const parsed = JSON.parse(jsonStr);
|
|
185
|
+
|
|
186
|
+
if (Array.isArray(parsed) && parsed.length > 0) {
|
|
187
|
+
const validFacts = parsed.filter(
|
|
188
|
+
(f: unknown) => typeof f === "string" && f.trim().length > 0
|
|
189
|
+
);
|
|
190
|
+
if (validFacts.length > 0) {
|
|
191
|
+
await this.addFacts(userId, validFacts, "auto");
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
} catch (err) {
|
|
195
|
+
console.warn("[UserMemory] extractAndStore failed:", (err as Error).message ?? err);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
private extractJsonArray(text: string): string {
|
|
200
|
+
const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
201
|
+
if (fenceMatch) return fenceMatch[1].trim();
|
|
202
|
+
|
|
203
|
+
const bracketStart = text.indexOf("[");
|
|
204
|
+
const bracketEnd = text.lastIndexOf("]");
|
|
205
|
+
if (bracketStart !== -1 && bracketEnd > bracketStart) {
|
|
206
|
+
return text.slice(bracketStart, bracketEnd + 1);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return text.trim();
|
|
210
|
+
}
|
|
211
|
+
}
|
|
@@ -81,6 +81,13 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
81
81
|
if (options?.tools?.length) {
|
|
82
82
|
params.tools = this.toAnthropicTools(options.tools);
|
|
83
83
|
}
|
|
84
|
+
if (options?.reasoning?.enabled) {
|
|
85
|
+
params.thinking = {
|
|
86
|
+
type: "enabled",
|
|
87
|
+
budget_tokens: options.reasoning.budgetTokens ?? 10000,
|
|
88
|
+
};
|
|
89
|
+
delete params.temperature;
|
|
90
|
+
}
|
|
84
91
|
|
|
85
92
|
const client = this.getClient(options?.apiKey);
|
|
86
93
|
const response = await client.messages.create(params);
|
|
@@ -109,11 +116,19 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
109
116
|
if (options?.tools?.length) {
|
|
110
117
|
params.tools = this.toAnthropicTools(options.tools);
|
|
111
118
|
}
|
|
119
|
+
if (options?.reasoning?.enabled) {
|
|
120
|
+
params.thinking = {
|
|
121
|
+
type: "enabled",
|
|
122
|
+
budget_tokens: options.reasoning.budgetTokens ?? 10000,
|
|
123
|
+
};
|
|
124
|
+
delete params.temperature;
|
|
125
|
+
}
|
|
112
126
|
|
|
113
127
|
const client = this.getClient(options?.apiKey);
|
|
114
128
|
const stream = await client.messages.create(params);
|
|
115
129
|
|
|
116
130
|
let currentToolId = "";
|
|
131
|
+
let inThinkingBlock = false;
|
|
117
132
|
|
|
118
133
|
for await (const event of stream) {
|
|
119
134
|
switch (event.type) {
|
|
@@ -127,11 +142,15 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
127
142
|
name: event.content_block.name,
|
|
128
143
|
},
|
|
129
144
|
};
|
|
145
|
+
} else if (event.content_block?.type === "thinking") {
|
|
146
|
+
inThinkingBlock = true;
|
|
130
147
|
}
|
|
131
148
|
break;
|
|
132
149
|
}
|
|
133
150
|
case "content_block_delta": {
|
|
134
|
-
if (event.delta?.type === "
|
|
151
|
+
if (event.delta?.type === "thinking_delta") {
|
|
152
|
+
yield { type: "thinking", text: event.delta.thinking };
|
|
153
|
+
} else if (event.delta?.type === "text_delta") {
|
|
135
154
|
yield { type: "text", text: event.delta.text };
|
|
136
155
|
} else if (event.delta?.type === "input_json_delta") {
|
|
137
156
|
yield {
|
|
@@ -143,7 +162,9 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
143
162
|
break;
|
|
144
163
|
}
|
|
145
164
|
case "content_block_stop": {
|
|
146
|
-
if (
|
|
165
|
+
if (inThinkingBlock) {
|
|
166
|
+
inThinkingBlock = false;
|
|
167
|
+
} else if (currentToolId) {
|
|
147
168
|
yield { type: "tool_call_end", toolCallId: currentToolId };
|
|
148
169
|
currentToolId = "";
|
|
149
170
|
}
|
|
@@ -288,13 +309,16 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
288
309
|
}));
|
|
289
310
|
}
|
|
290
311
|
|
|
291
|
-
private normalizeResponse(response: any): ModelResponse {
|
|
312
|
+
private normalizeResponse(response: any): ModelResponse & { thinking?: string } {
|
|
292
313
|
const toolCalls: ToolCall[] = [];
|
|
293
314
|
let textContent = "";
|
|
315
|
+
let thinkingContent = "";
|
|
294
316
|
|
|
295
317
|
for (const block of response.content ?? []) {
|
|
296
318
|
if (block.type === "text") {
|
|
297
319
|
textContent += block.text;
|
|
320
|
+
} else if (block.type === "thinking") {
|
|
321
|
+
thinkingContent += block.thinking;
|
|
298
322
|
} else if (block.type === "tool_use") {
|
|
299
323
|
toolCalls.push({
|
|
300
324
|
id: block.id,
|
|
@@ -316,7 +340,7 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
316
340
|
if (response.stop_reason === "tool_use") finishReason = "tool_calls";
|
|
317
341
|
else if (response.stop_reason === "max_tokens") finishReason = "length";
|
|
318
342
|
|
|
319
|
-
|
|
343
|
+
const result: ModelResponse & { thinking?: string } = {
|
|
320
344
|
message: {
|
|
321
345
|
role: "assistant",
|
|
322
346
|
content: textContent || null,
|
|
@@ -326,5 +350,11 @@ export class AnthropicProvider implements ModelProvider {
|
|
|
326
350
|
finishReason,
|
|
327
351
|
raw: response,
|
|
328
352
|
};
|
|
353
|
+
|
|
354
|
+
if (thinkingContent) {
|
|
355
|
+
result.thinking = thinkingContent;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
return result;
|
|
329
359
|
}
|
|
330
360
|
}
|
|
@@ -82,6 +82,12 @@ export class GoogleProvider implements ModelProvider {
|
|
|
82
82
|
}
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
+
if (options?.reasoning?.enabled) {
|
|
86
|
+
config.thinkingConfig = {
|
|
87
|
+
thinkingBudget: options.reasoning.budgetTokens ?? 10000,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
85
91
|
const params: Record<string, unknown> = {
|
|
86
92
|
model: this.modelId,
|
|
87
93
|
contents,
|
|
@@ -116,6 +122,12 @@ export class GoogleProvider implements ModelProvider {
|
|
|
116
122
|
if (options?.topP !== undefined) config.topP = options.topP;
|
|
117
123
|
if (options?.stop) config.stopSequences = options.stop;
|
|
118
124
|
|
|
125
|
+
if (options?.reasoning?.enabled) {
|
|
126
|
+
config.thinkingConfig = {
|
|
127
|
+
thinkingBudget: options.reasoning.budgetTokens ?? 10000,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
119
131
|
const params: Record<string, unknown> = {
|
|
120
132
|
model: this.modelId,
|
|
121
133
|
contents,
|
|
@@ -142,7 +154,9 @@ export class GoogleProvider implements ModelProvider {
|
|
|
142
154
|
if (!candidate?.content?.parts) continue;
|
|
143
155
|
|
|
144
156
|
for (const part of candidate.content.parts) {
|
|
145
|
-
if (part.
|
|
157
|
+
if (part.thought) {
|
|
158
|
+
yield { type: "thinking", text: part.text ?? "" };
|
|
159
|
+
} else if (part.text) {
|
|
146
160
|
yield { type: "text", text: part.text };
|
|
147
161
|
}
|
|
148
162
|
|
|
@@ -313,16 +327,19 @@ export class GoogleProvider implements ModelProvider {
|
|
|
313
327
|
return cleaned;
|
|
314
328
|
}
|
|
315
329
|
|
|
316
|
-
private normalizeResponse(response: any): ModelResponse {
|
|
330
|
+
private normalizeResponse(response: any): ModelResponse & { thinking?: string } {
|
|
317
331
|
const candidate = response.candidates?.[0];
|
|
318
332
|
const parts = candidate?.content?.parts ?? [];
|
|
319
333
|
|
|
320
334
|
let textContent = "";
|
|
335
|
+
let thinkingContent = "";
|
|
321
336
|
const toolCalls: ToolCall[] = [];
|
|
322
337
|
let toolCallCounter = 0;
|
|
323
338
|
|
|
324
339
|
for (const part of parts) {
|
|
325
|
-
if (part.text) {
|
|
340
|
+
if (part.thought && part.text) {
|
|
341
|
+
thinkingContent += part.text;
|
|
342
|
+
} else if (part.text) {
|
|
326
343
|
textContent += part.text;
|
|
327
344
|
}
|
|
328
345
|
if (part.functionCall) {
|
|
@@ -334,10 +351,12 @@ export class GoogleProvider implements ModelProvider {
|
|
|
334
351
|
}
|
|
335
352
|
}
|
|
336
353
|
|
|
354
|
+
const thinkingTokens = response.usageMetadata?.thoughtsTokenCount ?? 0;
|
|
337
355
|
const usage: TokenUsage = {
|
|
338
356
|
promptTokens: response.usageMetadata?.promptTokenCount ?? 0,
|
|
339
357
|
completionTokens: response.usageMetadata?.candidatesTokenCount ?? 0,
|
|
340
358
|
totalTokens: response.usageMetadata?.totalTokenCount ?? 0,
|
|
359
|
+
...(thinkingTokens > 0 ? { reasoningTokens: thinkingTokens } : {}),
|
|
341
360
|
};
|
|
342
361
|
|
|
343
362
|
let finishReason: ModelResponse["finishReason"] = "stop";
|
|
@@ -347,7 +366,7 @@ export class GoogleProvider implements ModelProvider {
|
|
|
347
366
|
else if (candidate?.finishReason === "SAFETY")
|
|
348
367
|
finishReason = "content_filter";
|
|
349
368
|
|
|
350
|
-
|
|
369
|
+
const result: ModelResponse & { thinking?: string } = {
|
|
351
370
|
message: {
|
|
352
371
|
role: "assistant",
|
|
353
372
|
content: textContent || null,
|
|
@@ -357,5 +376,11 @@ export class GoogleProvider implements ModelProvider {
|
|
|
357
376
|
finishReason,
|
|
358
377
|
raw: response,
|
|
359
378
|
};
|
|
379
|
+
|
|
380
|
+
if (thinkingContent) {
|
|
381
|
+
result.thinking = thinkingContent;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return result;
|
|
360
385
|
}
|
|
361
386
|
}
|
|
@@ -72,8 +72,12 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
72
72
|
messages: this.toOpenAIMessages(messages),
|
|
73
73
|
};
|
|
74
74
|
|
|
75
|
-
if (options?.
|
|
76
|
-
params.
|
|
75
|
+
if (options?.reasoning?.enabled) {
|
|
76
|
+
params.reasoning_effort = options.reasoning.effort ?? "medium";
|
|
77
|
+
} else {
|
|
78
|
+
if (options?.temperature !== undefined)
|
|
79
|
+
params.temperature = options.temperature;
|
|
80
|
+
}
|
|
77
81
|
if (options?.maxTokens !== undefined)
|
|
78
82
|
params.max_tokens = options.maxTokens;
|
|
79
83
|
if (options?.topP !== undefined) params.top_p = options.topP;
|
|
@@ -98,8 +102,12 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
98
102
|
stream: true,
|
|
99
103
|
};
|
|
100
104
|
|
|
101
|
-
if (options?.
|
|
102
|
-
params.
|
|
105
|
+
if (options?.reasoning?.enabled) {
|
|
106
|
+
params.reasoning_effort = options.reasoning.effort ?? "medium";
|
|
107
|
+
} else {
|
|
108
|
+
if (options?.temperature !== undefined)
|
|
109
|
+
params.temperature = options.temperature;
|
|
110
|
+
}
|
|
103
111
|
if (options?.maxTokens !== undefined)
|
|
104
112
|
params.max_tokens = options.maxTokens;
|
|
105
113
|
if (options?.topP !== undefined) params.top_p = options.topP;
|
|
@@ -165,11 +173,16 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
165
173
|
}
|
|
166
174
|
}
|
|
167
175
|
|
|
176
|
+
if (delta?.reasoning_content) {
|
|
177
|
+
yield { type: "thinking", text: delta.reasoning_content };
|
|
178
|
+
}
|
|
179
|
+
|
|
168
180
|
if (choice.finish_reason) {
|
|
169
181
|
for (const [, tc] of activeToolCalls) {
|
|
170
182
|
yield { type: "tool_call_end", toolCallId: tc.id };
|
|
171
183
|
}
|
|
172
184
|
|
|
185
|
+
const reasoningTkns = chunk.usage?.completion_tokens_details?.reasoning_tokens ?? 0;
|
|
173
186
|
yield {
|
|
174
187
|
type: "finish",
|
|
175
188
|
finishReason:
|
|
@@ -181,6 +194,7 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
181
194
|
promptTokens: chunk.usage.prompt_tokens ?? 0,
|
|
182
195
|
completionTokens: chunk.usage.completion_tokens ?? 0,
|
|
183
196
|
totalTokens: chunk.usage.total_tokens ?? 0,
|
|
197
|
+
...(reasoningTkns > 0 ? { reasoningTokens: reasoningTkns } : {}),
|
|
184
198
|
}
|
|
185
199
|
: undefined,
|
|
186
200
|
};
|
|
@@ -287,7 +301,7 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
287
301
|
}));
|
|
288
302
|
}
|
|
289
303
|
|
|
290
|
-
private normalizeResponse(response: any): ModelResponse {
|
|
304
|
+
private normalizeResponse(response: any): ModelResponse & { thinking?: string } {
|
|
291
305
|
const choice = response.choices[0];
|
|
292
306
|
const msg = choice.message;
|
|
293
307
|
|
|
@@ -297,10 +311,12 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
297
311
|
arguments: JSON.parse(tc.function.arguments || "{}"),
|
|
298
312
|
}));
|
|
299
313
|
|
|
314
|
+
const reasoningTokens = response.usage?.completion_tokens_details?.reasoning_tokens ?? 0;
|
|
300
315
|
const usage: TokenUsage = {
|
|
301
316
|
promptTokens: response.usage?.prompt_tokens ?? 0,
|
|
302
317
|
completionTokens: response.usage?.completion_tokens ?? 0,
|
|
303
318
|
totalTokens: response.usage?.total_tokens ?? 0,
|
|
319
|
+
...(reasoningTokens > 0 ? { reasoningTokens } : {}),
|
|
304
320
|
};
|
|
305
321
|
|
|
306
322
|
let finishReason: ModelResponse["finishReason"] = "stop";
|
|
@@ -309,7 +325,7 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
309
325
|
else if (choice.finish_reason === "content_filter")
|
|
310
326
|
finishReason = "content_filter";
|
|
311
327
|
|
|
312
|
-
|
|
328
|
+
const result: ModelResponse & { thinking?: string } = {
|
|
313
329
|
message: {
|
|
314
330
|
role: "assistant",
|
|
315
331
|
content: msg.content ?? null,
|
|
@@ -319,5 +335,11 @@ export class OpenAIProvider implements ModelProvider {
|
|
|
319
335
|
finishReason,
|
|
320
336
|
raw: response,
|
|
321
337
|
};
|
|
338
|
+
|
|
339
|
+
if (msg.reasoning_content) {
|
|
340
|
+
result.thinking = msg.reasoning_content;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return result;
|
|
322
344
|
}
|
|
323
345
|
}
|
|
@@ -108,6 +108,12 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
108
108
|
}
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
if (options?.reasoning?.enabled) {
|
|
112
|
+
config.thinkingConfig = {
|
|
113
|
+
thinkingBudget: options.reasoning.budgetTokens ?? 10000,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
111
117
|
const params: Record<string, unknown> = {
|
|
112
118
|
model: this.modelId,
|
|
113
119
|
contents,
|
|
@@ -140,6 +146,12 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
140
146
|
if (options?.topP !== undefined) config.topP = options.topP;
|
|
141
147
|
if (options?.stop) config.stopSequences = options.stop;
|
|
142
148
|
|
|
149
|
+
if (options?.reasoning?.enabled) {
|
|
150
|
+
config.thinkingConfig = {
|
|
151
|
+
thinkingBudget: options.reasoning.budgetTokens ?? 10000,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
143
155
|
const params: Record<string, unknown> = {
|
|
144
156
|
model: this.modelId,
|
|
145
157
|
contents,
|
|
@@ -163,7 +175,9 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
163
175
|
if (!candidate?.content?.parts) continue;
|
|
164
176
|
|
|
165
177
|
for (const part of candidate.content.parts) {
|
|
166
|
-
if (part.
|
|
178
|
+
if (part.thought) {
|
|
179
|
+
yield { type: "thinking", text: part.text ?? "" };
|
|
180
|
+
} else if (part.text) {
|
|
167
181
|
yield { type: "text", text: part.text };
|
|
168
182
|
}
|
|
169
183
|
|
|
@@ -354,16 +368,21 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
354
368
|
return cleaned;
|
|
355
369
|
}
|
|
356
370
|
|
|
357
|
-
private normalizeResponse(response: any): ModelResponse {
|
|
371
|
+
private normalizeResponse(response: any): ModelResponse & { thinking?: string } {
|
|
358
372
|
const candidate = response.candidates?.[0];
|
|
359
373
|
const parts = candidate?.content?.parts ?? [];
|
|
360
374
|
|
|
361
375
|
let textContent = "";
|
|
376
|
+
let thinkingContent = "";
|
|
362
377
|
const toolCalls: ToolCall[] = [];
|
|
363
378
|
let toolCallCounter = 0;
|
|
364
379
|
|
|
365
380
|
for (const part of parts) {
|
|
366
|
-
if (part.
|
|
381
|
+
if (part.thought && part.text) {
|
|
382
|
+
thinkingContent += part.text;
|
|
383
|
+
} else if (part.text) {
|
|
384
|
+
textContent += part.text;
|
|
385
|
+
}
|
|
367
386
|
if (part.functionCall) {
|
|
368
387
|
toolCalls.push({
|
|
369
388
|
id: `vertex_tc_${toolCallCounter++}`,
|
|
@@ -373,10 +392,12 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
373
392
|
}
|
|
374
393
|
}
|
|
375
394
|
|
|
395
|
+
const thinkingTokens = response.usageMetadata?.thoughtsTokenCount ?? 0;
|
|
376
396
|
const usage: TokenUsage = {
|
|
377
397
|
promptTokens: response.usageMetadata?.promptTokenCount ?? 0,
|
|
378
398
|
completionTokens: response.usageMetadata?.candidatesTokenCount ?? 0,
|
|
379
399
|
totalTokens: response.usageMetadata?.totalTokenCount ?? 0,
|
|
400
|
+
...(thinkingTokens > 0 ? { reasoningTokens: thinkingTokens } : {}),
|
|
380
401
|
};
|
|
381
402
|
|
|
382
403
|
let finishReason: ModelResponse["finishReason"] = "stop";
|
|
@@ -386,7 +407,7 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
386
407
|
else if (candidate?.finishReason === "SAFETY")
|
|
387
408
|
finishReason = "content_filter";
|
|
388
409
|
|
|
389
|
-
|
|
410
|
+
const result: ModelResponse & { thinking?: string } = {
|
|
390
411
|
message: {
|
|
391
412
|
role: "assistant",
|
|
392
413
|
content: textContent || null,
|
|
@@ -396,5 +417,11 @@ export class VertexAIProvider implements ModelProvider {
|
|
|
396
417
|
finishReason,
|
|
397
418
|
raw: response,
|
|
398
419
|
};
|
|
420
|
+
|
|
421
|
+
if (thinkingContent) {
|
|
422
|
+
result.thinking = thinkingContent;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return result;
|
|
399
426
|
}
|
|
400
427
|
}
|
package/src/models/types.ts
CHANGED
|
@@ -64,6 +64,7 @@ export interface TokenUsage {
|
|
|
64
64
|
promptTokens: number;
|
|
65
65
|
completionTokens: number;
|
|
66
66
|
totalTokens: number;
|
|
67
|
+
reasoningTokens?: number;
|
|
67
68
|
}
|
|
68
69
|
|
|
69
70
|
// ── Model response ────────────────────────────────────────────────────────
|
|
@@ -77,6 +78,7 @@ export interface ModelResponse {
|
|
|
77
78
|
|
|
78
79
|
export type StreamChunk =
|
|
79
80
|
| { type: "text"; text: string }
|
|
81
|
+
| { type: "thinking"; text: string }
|
|
80
82
|
| { type: "tool_call_start"; toolCall: { id: string; name: string } }
|
|
81
83
|
| { type: "tool_call_delta"; toolCallId: string; argumentsDelta: string }
|
|
82
84
|
| { type: "tool_call_end"; toolCallId: string }
|
|
@@ -84,6 +86,14 @@ export type StreamChunk =
|
|
|
84
86
|
|
|
85
87
|
// ── Model config ──────────────────────────────────────────────────────────
|
|
86
88
|
|
|
89
|
+
export interface ReasoningConfig {
|
|
90
|
+
enabled: boolean;
|
|
91
|
+
/** Reasoning effort for OpenAI o-series models. */
|
|
92
|
+
effort?: "low" | "medium" | "high";
|
|
93
|
+
/** Token budget for thinking (Anthropic / Gemini). */
|
|
94
|
+
budgetTokens?: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
87
97
|
export interface ModelConfig {
|
|
88
98
|
temperature?: number;
|
|
89
99
|
maxTokens?: number;
|
|
@@ -92,6 +102,8 @@ export interface ModelConfig {
|
|
|
92
102
|
responseFormat?: "text" | "json" | { type: "json_schema"; schema: Record<string, unknown>; name?: string };
|
|
93
103
|
/** Per-request API key override. When provided, the provider uses this key instead of the one set at construction. */
|
|
94
104
|
apiKey?: string;
|
|
105
|
+
/** Enable extended thinking / reasoning. */
|
|
106
|
+
reasoning?: ReasoningConfig;
|
|
95
107
|
}
|
|
96
108
|
|
|
97
109
|
// ── Helpers ───────────────────────────────────────────────────────────────
|
package/src/tools/define-tool.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { z } from "zod";
|
|
2
2
|
import type { RunContext } from "../agent/run-context.js";
|
|
3
|
-
import type { ToolDef, ToolResult } from "./types.js";
|
|
3
|
+
import type { ToolDef, ToolResult, ToolCacheConfig } from "./types.js";
|
|
4
4
|
|
|
5
5
|
export function defineTool<T extends z.ZodObject<any>>(config: {
|
|
6
6
|
name: string;
|
|
@@ -10,11 +10,13 @@ export function defineTool<T extends z.ZodObject<any>>(config: {
|
|
|
10
10
|
args: z.infer<T>,
|
|
11
11
|
ctx: RunContext
|
|
12
12
|
) => Promise<string | ToolResult>;
|
|
13
|
+
cache?: ToolCacheConfig;
|
|
13
14
|
}): ToolDef {
|
|
14
15
|
return {
|
|
15
16
|
name: config.name,
|
|
16
17
|
description: config.description,
|
|
17
18
|
parameters: config.parameters,
|
|
18
19
|
execute: config.execute as ToolDef["execute"],
|
|
20
|
+
cache: config.cache,
|
|
19
21
|
};
|
|
20
22
|
}
|