@strayl/agent 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -1
- package/skills/api-creation/SKILL.md +631 -0
- package/skills/authentication/SKILL.md +294 -0
- package/skills/frontend-design/SKILL.md +108 -0
- package/skills/landing-creation/SKILL.md +125 -0
- package/skills/reference/SKILL.md +149 -0
- package/skills/web-application-creation/SKILL.md +231 -0
- package/src/agent.ts +0 -465
- package/src/checkpoints/manager.ts +0 -112
- package/src/context/manager.ts +0 -185
- package/src/context/summarizer.ts +0 -104
- package/src/context/trim.ts +0 -55
- package/src/emitter.ts +0 -14
- package/src/hitl/manager.ts +0 -77
- package/src/hitl/transport.ts +0 -13
- package/src/index.ts +0 -116
- package/src/llm/client.ts +0 -276
- package/src/llm/gemini-native.ts +0 -307
- package/src/llm/models.ts +0 -64
- package/src/middleware/compose.ts +0 -24
- package/src/middleware/credential-scrubbing.ts +0 -31
- package/src/middleware/forbidden-packages.ts +0 -107
- package/src/middleware/plan-mode.ts +0 -143
- package/src/middleware/prompt-caching.ts +0 -21
- package/src/middleware/tool-compression.ts +0 -25
- package/src/middleware/tool-filter.ts +0 -13
- package/src/prompts/implementation-mode.md +0 -16
- package/src/prompts/plan-mode.md +0 -51
- package/src/prompts/system.ts +0 -173
- package/src/skills/loader.ts +0 -53
- package/src/stdin-listener.ts +0 -61
- package/src/subagents/definitions.ts +0 -72
- package/src/subagents/manager.ts +0 -161
- package/src/todos/manager.ts +0 -61
- package/src/tools/builtin/delete.ts +0 -29
- package/src/tools/builtin/edit.ts +0 -74
- package/src/tools/builtin/exec.ts +0 -216
- package/src/tools/builtin/glob.ts +0 -104
- package/src/tools/builtin/grep.ts +0 -115
- package/src/tools/builtin/ls.ts +0 -54
- package/src/tools/builtin/move.ts +0 -31
- package/src/tools/builtin/read.ts +0 -69
- package/src/tools/builtin/write.ts +0 -42
- package/src/tools/executor.ts +0 -51
- package/src/tools/external/database.ts +0 -285
- package/src/tools/external/enter-plan-mode.ts +0 -34
- package/src/tools/external/generate-image.ts +0 -110
- package/src/tools/external/hitl-tools.ts +0 -118
- package/src/tools/external/preview.ts +0 -28
- package/src/tools/external/proxy-fetch.ts +0 -51
- package/src/tools/external/task.ts +0 -38
- package/src/tools/external/wait.ts +0 -20
- package/src/tools/external/web-fetch.ts +0 -57
- package/src/tools/external/web-search.ts +0 -61
- package/src/tools/registry.ts +0 -36
- package/src/tools/zod-to-json-schema.ts +0 -86
- package/src/types.ts +0 -151
package/src/llm/client.ts
DELETED
|
@@ -1,276 +0,0 @@
|
|
|
1
|
-
import OpenAI from "openai";
|
|
2
|
-
import type { Message } from "../types.js";
|
|
3
|
-
import { resolveModel, getModelLimits } from "./models.js";
|
|
4
|
-
import { streamGeminiNative } from "./gemini-native.js";
|
|
5
|
-
|
|
6
|
-
export type LLMChunk =
|
|
7
|
-
| { type: "text"; text: string }
|
|
8
|
-
| { type: "reasoning"; text: string }
|
|
9
|
-
| { type: "tool_call_delta"; index: number; id?: string; name?: string; arguments: string }
|
|
10
|
-
| { type: "tool_call_complete"; id: string; name: string; arguments: string }
|
|
11
|
-
| { type: "usage"; input_tokens: number; output_tokens: number; cost?: number };
|
|
12
|
-
|
|
13
|
-
// Provider routing is handled by the proxy (api.strayl.dev).
|
|
14
|
-
// The SDK sends all requests to STRAYL_API_URL with a session token.
|
|
15
|
-
// The proxy adds real API keys, routes to the correct provider, and checks billing.
|
|
16
|
-
//
|
|
17
|
-
// For local development without a proxy, set STRAYL_LLM_DIRECT=1 and provide
|
|
18
|
-
// provider-specific env vars (OPENROUTER_API_KEY, GOOGLE_GENERATIVE_AI_API_KEY).
|
|
19
|
-
|
|
20
|
-
type Mode = "proxy" | "direct";
|
|
21
|
-
|
|
22
|
-
const GEMINI_MODELS = new Set(["gemini-3-flash-preview", "google/gemini-3.1-pro-preview"]);
|
|
23
|
-
|
|
24
|
-
export interface LLMClientConfig {
|
|
25
|
-
modelTier: string;
|
|
26
|
-
env: Record<string, string>;
|
|
27
|
-
sessionId?: string;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
export class LLMClient {
|
|
31
|
-
readonly modelName: string;
|
|
32
|
-
readonly maxTokens: number;
|
|
33
|
-
readonly maxInputTokens: number;
|
|
34
|
-
private client: OpenAI;
|
|
35
|
-
private mode: Mode;
|
|
36
|
-
private geminiApiKey?: string;
|
|
37
|
-
private sessionId?: string;
|
|
38
|
-
|
|
39
|
-
constructor(config: LLMClientConfig) {
|
|
40
|
-
this.modelName = resolveModel(config.modelTier);
|
|
41
|
-
const limits = getModelLimits(this.modelName);
|
|
42
|
-
this.maxTokens = limits.maxTokens;
|
|
43
|
-
this.maxInputTokens = limits.maxInputTokens;
|
|
44
|
-
this.sessionId = config.sessionId;
|
|
45
|
-
|
|
46
|
-
// Direct mode: local dev with raw API keys (no proxy)
|
|
47
|
-
// Proxy mode: production, SDK has no API keys
|
|
48
|
-
this.mode = config.env.STRAYL_LLM_DIRECT === "1" ? "direct" : "proxy";
|
|
49
|
-
this.geminiApiKey = config.env.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
50
|
-
|
|
51
|
-
this.client = this.createClient(config);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
private createClient(config: LLMClientConfig): OpenAI {
|
|
55
|
-
if (this.mode === "proxy") {
|
|
56
|
-
// All requests go through the Strayl API proxy.
|
|
57
|
-
// Proxy handles: provider routing, API key injection, billing, rate limiting.
|
|
58
|
-
// Auth: STRAYL_SESSION_TOKEN passed as Bearer token.
|
|
59
|
-
// Model header: X-Strayl-Model tells proxy which provider to route to.
|
|
60
|
-
const apiUrl = config.env.STRAYL_API_URL ?? "https://api.strayl.dev";
|
|
61
|
-
return new OpenAI({
|
|
62
|
-
apiKey: config.env.STRAYL_SESSION_TOKEN ?? "no-token",
|
|
63
|
-
baseURL: `${apiUrl}/v1`,
|
|
64
|
-
defaultHeaders: {
|
|
65
|
-
"X-Strayl-Model": this.modelName,
|
|
66
|
-
"X-Strayl-Session": config.sessionId ?? "",
|
|
67
|
-
},
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
// Direct mode — for local development
|
|
72
|
-
if (GEMINI_MODELS.has(this.modelName) && config.env.GOOGLE_GENERATIVE_AI_API_KEY) {
|
|
73
|
-
return new OpenAI({
|
|
74
|
-
apiKey: config.env.GOOGLE_GENERATIVE_AI_API_KEY,
|
|
75
|
-
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
|
|
76
|
-
fetch: this.directGeminiFetch.bind(this),
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
return new OpenAI({
|
|
81
|
-
apiKey: config.env.OPENROUTER_API_KEY,
|
|
82
|
-
baseURL: "https://openrouter.ai/api/v1",
|
|
83
|
-
});
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
private async directGeminiFetch(url: RequestInfo | URL, init?: RequestInit): Promise<Response> {
|
|
87
|
-
if (!init?.body || typeof init.body !== "string") return fetch(url, init);
|
|
88
|
-
|
|
89
|
-
let body: Record<string, unknown>;
|
|
90
|
-
try {
|
|
91
|
-
body = JSON.parse(init.body);
|
|
92
|
-
} catch {
|
|
93
|
-
return fetch(url, init);
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
const ALLOWED = new Set([
|
|
97
|
-
"model", "messages", "tools", "tool_choice",
|
|
98
|
-
"max_tokens", "max_completion_tokens",
|
|
99
|
-
"temperature", "top_p", "n", "stream", "stop",
|
|
100
|
-
"presence_penalty", "frequency_penalty",
|
|
101
|
-
"response_format", "reasoning_effort", "seed", "google",
|
|
102
|
-
]);
|
|
103
|
-
|
|
104
|
-
for (const key of Object.keys(body)) {
|
|
105
|
-
if (!ALLOWED.has(key)) delete body[key];
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
body.reasoning_effort = "medium";
|
|
109
|
-
|
|
110
|
-
// Inject thought_signature bypass for multi-turn tool calling
|
|
111
|
-
if (Array.isArray(body.messages)) {
|
|
112
|
-
for (const msg of body.messages as Record<string, unknown>[]) {
|
|
113
|
-
if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
|
|
114
|
-
for (const tc of msg.tool_calls as Record<string, unknown>[]) {
|
|
115
|
-
const extra = tc.extra_content as Record<string, Record<string, unknown>> | undefined;
|
|
116
|
-
if (!extra?.google?.thought_signature) {
|
|
117
|
-
tc.extra_content = {
|
|
118
|
-
...(extra ?? {}),
|
|
119
|
-
google: {
|
|
120
|
-
...(extra?.google ?? {}),
|
|
121
|
-
thought_signature: "skip_thought_signature_validator",
|
|
122
|
-
},
|
|
123
|
-
};
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const modifiedInit = { ...init, body: JSON.stringify(body) };
|
|
131
|
-
|
|
132
|
-
// Retry with exponential backoff for 429/503
|
|
133
|
-
const MAX_RETRIES = 4;
|
|
134
|
-
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
135
|
-
const response = await fetch(url, modifiedInit);
|
|
136
|
-
if ((response.status === 429 || response.status === 503) && attempt < MAX_RETRIES) {
|
|
137
|
-
const retryAfter = response.headers.get("retry-after");
|
|
138
|
-
const delay = retryAfter
|
|
139
|
-
? Math.min(parseInt(retryAfter, 10) * 1000, 30_000)
|
|
140
|
-
: 1000 * Math.pow(2, attempt - 1);
|
|
141
|
-
await new Promise(r => setTimeout(r, delay));
|
|
142
|
-
continue;
|
|
143
|
-
}
|
|
144
|
-
if (!response.ok) {
|
|
145
|
-
try {
|
|
146
|
-
const clone = response.clone();
|
|
147
|
-
const arrayBuf = await clone.arrayBuffer();
|
|
148
|
-
const bytes = new Uint8Array(arrayBuf);
|
|
149
|
-
let errorText: string;
|
|
150
|
-
if (bytes[0] === 0x1f && bytes[1] === 0x8b) {
|
|
151
|
-
const ds = new DecompressionStream("gzip");
|
|
152
|
-
const decompressed = new Response(new Blob([bytes]).stream().pipeThrough(ds));
|
|
153
|
-
errorText = await decompressed.text();
|
|
154
|
-
} else {
|
|
155
|
-
errorText = new TextDecoder().decode(bytes);
|
|
156
|
-
}
|
|
157
|
-
throw new Error(`Gemini API ${response.status}: ${errorText.slice(0, 500)}`);
|
|
158
|
-
} catch (e) {
|
|
159
|
-
if (e instanceof Error && e.message.startsWith("Gemini API")) throw e;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
return response;
|
|
163
|
-
}
|
|
164
|
-
return fetch(url, modifiedInit);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
async *stream(messages: Message[], tools: OpenAI.ChatCompletionTool[]): AsyncGenerator<LLMChunk> {
|
|
168
|
-
// Native Gemini API for thinking support (direct mode only)
|
|
169
|
-
if (this.mode === "direct" && GEMINI_MODELS.has(this.modelName) && this.geminiApiKey) {
|
|
170
|
-
yield* streamGeminiNative(this.geminiApiKey, this.modelName, messages, tools, this.maxTokens);
|
|
171
|
-
return;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const params: OpenAI.ChatCompletionCreateParamsStreaming = {
|
|
175
|
-
model: this.modelName,
|
|
176
|
-
messages: messages as OpenAI.ChatCompletionMessageParam[],
|
|
177
|
-
tools: tools.length > 0 ? tools : undefined,
|
|
178
|
-
stream: true,
|
|
179
|
-
max_tokens: this.maxTokens,
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
// Request usage tracking (proxy passes this through to OpenRouter)
|
|
183
|
-
(params as unknown as Record<string, unknown>).usage = { include: true };
|
|
184
|
-
|
|
185
|
-
const response = await this.client.chat.completions.create(params);
|
|
186
|
-
|
|
187
|
-
const partialToolCalls = new Map<number, { id: string; name: string; arguments: string }>();
|
|
188
|
-
let emittedToolCalls = false;
|
|
189
|
-
|
|
190
|
-
for await (const chunk of response) {
|
|
191
|
-
const choice = chunk.choices[0];
|
|
192
|
-
if (!choice) {
|
|
193
|
-
if (chunk.usage) {
|
|
194
|
-
yield {
|
|
195
|
-
type: "usage",
|
|
196
|
-
input_tokens: chunk.usage.prompt_tokens ?? 0,
|
|
197
|
-
output_tokens: chunk.usage.completion_tokens ?? 0,
|
|
198
|
-
cost: (chunk as unknown as { usage?: { cost?: number } }).usage?.cost,
|
|
199
|
-
};
|
|
200
|
-
}
|
|
201
|
-
continue;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
const delta = choice.delta;
|
|
205
|
-
|
|
206
|
-
if (delta?.content) {
|
|
207
|
-
yield { type: "text", text: delta.content };
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// Reasoning content (provider-specific fields — proxy passes them through)
|
|
211
|
-
const anyDelta = delta as Record<string, unknown> | undefined;
|
|
212
|
-
if (anyDelta?.thought && typeof anyDelta.thought === "string") {
|
|
213
|
-
yield { type: "reasoning", text: anyDelta.thought };
|
|
214
|
-
}
|
|
215
|
-
if (anyDelta?.reasoning_content && typeof anyDelta.reasoning_content === "string") {
|
|
216
|
-
yield { type: "reasoning", text: anyDelta.reasoning_content };
|
|
217
|
-
}
|
|
218
|
-
if (anyDelta?.reasoning && typeof anyDelta.reasoning === "string") {
|
|
219
|
-
yield { type: "reasoning", text: anyDelta.reasoning };
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
if (delta?.tool_calls) {
|
|
223
|
-
for (const tc of delta.tool_calls) {
|
|
224
|
-
const idx = tc.index;
|
|
225
|
-
let partial = partialToolCalls.get(idx);
|
|
226
|
-
|
|
227
|
-
if (!partial) {
|
|
228
|
-
partial = { id: tc.id || "", name: tc.function?.name || "", arguments: "" };
|
|
229
|
-
partialToolCalls.set(idx, partial);
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
if (tc.id) partial.id = tc.id;
|
|
233
|
-
if (tc.function?.name) partial.name = tc.function.name;
|
|
234
|
-
if (tc.function?.arguments) {
|
|
235
|
-
partial.arguments += tc.function.arguments;
|
|
236
|
-
yield {
|
|
237
|
-
type: "tool_call_delta",
|
|
238
|
-
index: idx,
|
|
239
|
-
id: partial.id,
|
|
240
|
-
name: partial.name,
|
|
241
|
-
arguments: tc.function.arguments,
|
|
242
|
-
};
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
if (choice.finish_reason === "tool_calls" || choice.finish_reason === "stop") {
|
|
248
|
-
if (!emittedToolCalls) {
|
|
249
|
-
emittedToolCalls = true;
|
|
250
|
-
for (const [, partial] of partialToolCalls) {
|
|
251
|
-
if (partial.id && partial.name) {
|
|
252
|
-
yield { type: "tool_call_complete", id: partial.id, name: partial.name, arguments: partial.arguments };
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
if (chunk.usage) {
|
|
259
|
-
yield {
|
|
260
|
-
type: "usage",
|
|
261
|
-
input_tokens: chunk.usage.prompt_tokens ?? 0,
|
|
262
|
-
output_tokens: chunk.usage.completion_tokens ?? 0,
|
|
263
|
-
cost: (chunk as unknown as { usage?: { cost?: number } }).usage?.cost,
|
|
264
|
-
};
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (!emittedToolCalls) {
|
|
269
|
-
for (const [, partial] of partialToolCalls) {
|
|
270
|
-
if (partial.id && partial.name && partial.arguments) {
|
|
271
|
-
yield { type: "tool_call_complete", id: partial.id, name: partial.name, arguments: partial.arguments };
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
}
|
package/src/llm/gemini-native.ts
DELETED
|
@@ -1,307 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Native Gemini API client for streaming with thinking support.
|
|
3
|
-
* Uses the generateContent endpoint directly (not OpenAI-compat).
|
|
4
|
-
*
|
|
5
|
-
* Key difference from OpenAI-compat: thinking text is returned as `thought: true`
|
|
6
|
-
* parts, and tool calls carry `thoughtSignature` that must be replayed in
|
|
7
|
-
* subsequent requests for multi-turn to work.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import type { Message } from "../types.js";
|
|
11
|
-
import type { LLMChunk } from "./client.js";
|
|
12
|
-
import type OpenAI from "openai";
|
|
13
|
-
|
|
14
|
-
// Extended chunk type for Gemini thought signatures
|
|
15
|
-
export type GeminiLLMChunk = LLMChunk
|
|
16
|
-
| { type: "thought_signature"; index: number; signature: string };
|
|
17
|
-
|
|
18
|
-
interface GeminiPart {
|
|
19
|
-
text?: string;
|
|
20
|
-
thought?: boolean;
|
|
21
|
-
thoughtSignature?: string;
|
|
22
|
-
functionCall?: { name: string; args: Record<string, unknown> };
|
|
23
|
-
functionResponse?: { name: string; response: Record<string, unknown> };
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
interface GeminiContent {
|
|
27
|
-
role: "user" | "model";
|
|
28
|
-
parts: GeminiPart[];
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
interface GeminiFunctionDeclaration {
|
|
32
|
-
name: string;
|
|
33
|
-
description: string;
|
|
34
|
-
parameters?: Record<string, unknown>;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
// ─── Message Conversion ──────────────────────────────────────────────
|
|
38
|
-
|
|
39
|
-
/**
|
|
40
|
-
* We store thoughtSignature in the ToolCall id field as a prefix:
|
|
41
|
-
* "gemini_fc_<ts>_<idx>|<signature>"
|
|
42
|
-
* This is a hack, but it avoids changing the Message type just for Gemini.
|
|
43
|
-
*/
|
|
44
|
-
|
|
45
|
-
function convertMessages(messages: Message[]): {
|
|
46
|
-
systemInstruction: { parts: Array<{ text: string }> } | undefined;
|
|
47
|
-
contents: GeminiContent[];
|
|
48
|
-
} {
|
|
49
|
-
let systemInstruction: { parts: Array<{ text: string }> } | undefined;
|
|
50
|
-
const contents: GeminiContent[] = [];
|
|
51
|
-
|
|
52
|
-
for (const msg of messages) {
|
|
53
|
-
const textContent = typeof msg.content === "string"
|
|
54
|
-
? msg.content
|
|
55
|
-
: msg.content?.map(p => p.text ?? "").join("") ?? "";
|
|
56
|
-
|
|
57
|
-
switch (msg.role) {
|
|
58
|
-
case "system":
|
|
59
|
-
if (!systemInstruction) {
|
|
60
|
-
systemInstruction = { parts: [{ text: textContent }] };
|
|
61
|
-
} else {
|
|
62
|
-
systemInstruction.parts.push({ text: textContent });
|
|
63
|
-
}
|
|
64
|
-
break;
|
|
65
|
-
|
|
66
|
-
case "user":
|
|
67
|
-
contents.push({ role: "user", parts: [{ text: textContent }] });
|
|
68
|
-
break;
|
|
69
|
-
|
|
70
|
-
case "assistant": {
|
|
71
|
-
const parts: GeminiPart[] = [];
|
|
72
|
-
if (textContent) parts.push({ text: textContent });
|
|
73
|
-
if (msg.tool_calls) {
|
|
74
|
-
for (const tc of msg.tool_calls) {
|
|
75
|
-
let args: Record<string, unknown> = {};
|
|
76
|
-
try { args = JSON.parse(tc.function.arguments); } catch {}
|
|
77
|
-
|
|
78
|
-
// Extract thoughtSignature from ID (stored as "id|signature")
|
|
79
|
-
const pipeIdx = tc.id.indexOf("|");
|
|
80
|
-
const thoughtSignature = pipeIdx >= 0 ? tc.id.slice(pipeIdx + 1) : undefined;
|
|
81
|
-
|
|
82
|
-
const part: GeminiPart = {
|
|
83
|
-
functionCall: { name: tc.function.name, args },
|
|
84
|
-
};
|
|
85
|
-
if (thoughtSignature) {
|
|
86
|
-
part.thoughtSignature = thoughtSignature;
|
|
87
|
-
}
|
|
88
|
-
parts.push(part);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
if (parts.length > 0) {
|
|
92
|
-
contents.push({ role: "model", parts });
|
|
93
|
-
}
|
|
94
|
-
break;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
case "tool": {
|
|
98
|
-
let response: Record<string, unknown> = {};
|
|
99
|
-
try {
|
|
100
|
-
const parsed = JSON.parse(textContent);
|
|
101
|
-
// Gemini requires response to be a plain object (not array)
|
|
102
|
-
if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
|
|
103
|
-
response = parsed as Record<string, unknown>;
|
|
104
|
-
} else {
|
|
105
|
-
response = { result: parsed };
|
|
106
|
-
}
|
|
107
|
-
} catch {
|
|
108
|
-
response = { result: textContent };
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// Tool responses are role: "user" in Gemini API
|
|
112
|
-
const last = contents[contents.length - 1];
|
|
113
|
-
if (last?.role === "user" && last.parts.some(p => p.functionResponse)) {
|
|
114
|
-
last.parts.push({
|
|
115
|
-
functionResponse: { name: msg.name ?? "unknown", response },
|
|
116
|
-
});
|
|
117
|
-
} else {
|
|
118
|
-
contents.push({
|
|
119
|
-
role: "user",
|
|
120
|
-
parts: [{
|
|
121
|
-
functionResponse: { name: msg.name ?? "unknown", response },
|
|
122
|
-
}],
|
|
123
|
-
});
|
|
124
|
-
}
|
|
125
|
-
break;
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
return { systemInstruction, contents };
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// ─── Tool Conversion ─────────────────────────────────────────────────
|
|
134
|
-
|
|
135
|
-
function convertTools(tools: OpenAI.ChatCompletionTool[]): Array<{ functionDeclarations: GeminiFunctionDeclaration[] }> | undefined {
|
|
136
|
-
const decls: GeminiFunctionDeclaration[] = [];
|
|
137
|
-
for (const t of tools) {
|
|
138
|
-
if (t.type !== "function") continue;
|
|
139
|
-
decls.push({
|
|
140
|
-
name: t.function.name,
|
|
141
|
-
description: t.function.description ?? "",
|
|
142
|
-
parameters: t.function.parameters as Record<string, unknown> | undefined,
|
|
143
|
-
});
|
|
144
|
-
}
|
|
145
|
-
return decls.length > 0 ? [{ functionDeclarations: decls }] : undefined;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// ─── SSE Stream Parser ───────────────────────────────────────────────
|
|
149
|
-
|
|
150
|
-
async function* parseSSEStream(response: Response): AsyncGenerator<Record<string, unknown>> {
|
|
151
|
-
const reader = response.body!.getReader();
|
|
152
|
-
const decoder = new TextDecoder();
|
|
153
|
-
let buffer = "";
|
|
154
|
-
|
|
155
|
-
while (true) {
|
|
156
|
-
const { done, value } = await reader.read();
|
|
157
|
-
if (done) break;
|
|
158
|
-
|
|
159
|
-
buffer += decoder.decode(value, { stream: true });
|
|
160
|
-
const lines = buffer.split("\n");
|
|
161
|
-
buffer = lines.pop()!;
|
|
162
|
-
|
|
163
|
-
for (const line of lines) {
|
|
164
|
-
const trimmed = line.trim();
|
|
165
|
-
if (trimmed.startsWith("data: ")) {
|
|
166
|
-
const jsonStr = trimmed.slice(6);
|
|
167
|
-
if (jsonStr === "[DONE]") return;
|
|
168
|
-
try {
|
|
169
|
-
yield JSON.parse(jsonStr);
|
|
170
|
-
} catch {}
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
if (buffer.trim().startsWith("data: ")) {
|
|
176
|
-
try { yield JSON.parse(buffer.trim().slice(6)); } catch {}
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// ─── Main Streaming Function ─────────────────────────────────────────
|
|
181
|
-
|
|
182
|
-
export async function* streamGeminiNative(
|
|
183
|
-
apiKey: string,
|
|
184
|
-
modelName: string,
|
|
185
|
-
messages: Message[],
|
|
186
|
-
tools: OpenAI.ChatCompletionTool[],
|
|
187
|
-
maxTokens: number,
|
|
188
|
-
): AsyncGenerator<LLMChunk> {
|
|
189
|
-
const { systemInstruction, contents } = convertMessages(messages);
|
|
190
|
-
const geminiTools = convertTools(tools);
|
|
191
|
-
|
|
192
|
-
const body: Record<string, unknown> = {
|
|
193
|
-
contents,
|
|
194
|
-
generationConfig: {
|
|
195
|
-
maxOutputTokens: maxTokens,
|
|
196
|
-
thinkingConfig: {
|
|
197
|
-
thinkingBudget: 8192,
|
|
198
|
-
includeThoughts: true,
|
|
199
|
-
},
|
|
200
|
-
},
|
|
201
|
-
};
|
|
202
|
-
|
|
203
|
-
if (systemInstruction) body.systemInstruction = systemInstruction;
|
|
204
|
-
if (geminiTools) body.tools = geminiTools;
|
|
205
|
-
|
|
206
|
-
const url = `https://generativelanguage.googleapis.com/v1beta/models/${modelName}:streamGenerateContent?alt=sse&key=${apiKey}`;
|
|
207
|
-
|
|
208
|
-
const MAX_RETRIES = 4;
|
|
209
|
-
let response: Response | null = null;
|
|
210
|
-
|
|
211
|
-
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
212
|
-
response = await fetch(url, {
|
|
213
|
-
method: "POST",
|
|
214
|
-
headers: { "Content-Type": "application/json" },
|
|
215
|
-
body: JSON.stringify(body),
|
|
216
|
-
});
|
|
217
|
-
|
|
218
|
-
if ((response.status === 429 || response.status === 503) && attempt < MAX_RETRIES) {
|
|
219
|
-
const retryAfter = response.headers.get("retry-after");
|
|
220
|
-
const delay = retryAfter
|
|
221
|
-
? Math.min(parseInt(retryAfter, 10) * 1000, 30_000)
|
|
222
|
-
: 1000 * Math.pow(2, attempt - 1);
|
|
223
|
-
await new Promise(r => setTimeout(r, delay));
|
|
224
|
-
continue;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
if (!response.ok) {
|
|
228
|
-
let errorText = "";
|
|
229
|
-
try { errorText = await response.text(); } catch {}
|
|
230
|
-
throw new Error(`Gemini API ${response.status}: ${errorText.slice(0, 500)}`);
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
break;
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
if (!response?.body) {
|
|
237
|
-
throw new Error("Gemini API returned no body");
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
let toolCallIndex = 0;
|
|
241
|
-
let usageEmitted = false;
|
|
242
|
-
|
|
243
|
-
for await (const chunk of parseSSEStream(response)) {
|
|
244
|
-
const candidates = chunk.candidates as Array<{
|
|
245
|
-
content?: { parts?: GeminiPart[] };
|
|
246
|
-
finishReason?: string;
|
|
247
|
-
}> | undefined;
|
|
248
|
-
|
|
249
|
-
// Process parts
|
|
250
|
-
if (candidates?.[0]?.content?.parts) {
|
|
251
|
-
for (const part of candidates[0].content!.parts!) {
|
|
252
|
-
// Thinking content
|
|
253
|
-
if (part.thought && part.text) {
|
|
254
|
-
yield { type: "reasoning", text: part.text };
|
|
255
|
-
continue;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// Text content (skip empty text and thoughtSignature-only parts)
|
|
259
|
-
if (part.text && !part.thought && !part.thoughtSignature) {
|
|
260
|
-
yield { type: "text", text: part.text };
|
|
261
|
-
continue;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// Function call — include thoughtSignature in the ID
|
|
265
|
-
if (part.functionCall) {
|
|
266
|
-
const baseId = `gemini_fc_${Date.now()}_${toolCallIndex}`;
|
|
267
|
-
// Encode thoughtSignature into the ID so it survives the message round-trip
|
|
268
|
-
const id = part.thoughtSignature
|
|
269
|
-
? `${baseId}|${part.thoughtSignature}`
|
|
270
|
-
: baseId;
|
|
271
|
-
const argsStr = JSON.stringify(part.functionCall.args ?? {});
|
|
272
|
-
|
|
273
|
-
yield {
|
|
274
|
-
type: "tool_call_delta",
|
|
275
|
-
index: toolCallIndex,
|
|
276
|
-
id,
|
|
277
|
-
name: part.functionCall.name,
|
|
278
|
-
arguments: argsStr,
|
|
279
|
-
};
|
|
280
|
-
yield {
|
|
281
|
-
type: "tool_call_complete",
|
|
282
|
-
id,
|
|
283
|
-
name: part.functionCall.name,
|
|
284
|
-
arguments: argsStr,
|
|
285
|
-
};
|
|
286
|
-
toolCallIndex++;
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
// Usage in final chunk
|
|
292
|
-
const meta = chunk.usageMetadata as {
|
|
293
|
-
promptTokenCount?: number;
|
|
294
|
-
candidatesTokenCount?: number;
|
|
295
|
-
thoughtsTokenCount?: number;
|
|
296
|
-
} | undefined;
|
|
297
|
-
|
|
298
|
-
if (meta && candidates?.[0]?.finishReason && !usageEmitted) {
|
|
299
|
-
usageEmitted = true;
|
|
300
|
-
yield {
|
|
301
|
-
type: "usage",
|
|
302
|
-
input_tokens: meta.promptTokenCount ?? 0,
|
|
303
|
-
output_tokens: (meta.candidatesTokenCount ?? 0) + (meta.thoughtsTokenCount ?? 0),
|
|
304
|
-
};
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
}
|
package/src/llm/models.ts
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
export const MODEL_MAP: Record<string, string> = {
|
|
2
|
-
// Tier defaults
|
|
3
|
-
auto: "gemini-3-flash-preview",
|
|
4
|
-
light: "x-ai/grok-4.1-fast",
|
|
5
|
-
pro: "anthropic/claude-sonnet-4.6",
|
|
6
|
-
deep: "anthropic/claude-opus-4.6",
|
|
7
|
-
|
|
8
|
-
// Short aliases
|
|
9
|
-
haiku: "anthropic/claude-haiku-4.5",
|
|
10
|
-
kimi: "moonshotai/kimi-k2.5",
|
|
11
|
-
grok: "x-ai/grok-4.1-fast",
|
|
12
|
-
gemini: "google/gemini-3.1-pro-preview",
|
|
13
|
-
sonnet: "anthropic/claude-sonnet-4.6",
|
|
14
|
-
codex: "openai/gpt-5.3-codex",
|
|
15
|
-
opus: "anthropic/claude-opus-4.6",
|
|
16
|
-
flash: "gemini-3-flash-preview",
|
|
17
|
-
|
|
18
|
-
// Full IDs map to themselves
|
|
19
|
-
"anthropic/claude-haiku-4.5": "anthropic/claude-haiku-4.5",
|
|
20
|
-
"moonshotai/kimi-k2.5": "moonshotai/kimi-k2.5",
|
|
21
|
-
"x-ai/grok-4.1-fast": "x-ai/grok-4.1-fast",
|
|
22
|
-
"google/gemini-3.1-pro-preview": "google/gemini-3.1-pro-preview",
|
|
23
|
-
"anthropic/claude-sonnet-4.6": "anthropic/claude-sonnet-4.6",
|
|
24
|
-
"openai/gpt-5.3-codex": "openai/gpt-5.3-codex",
|
|
25
|
-
"anthropic/claude-opus-4.6": "anthropic/claude-opus-4.6",
|
|
26
|
-
"gemini-3-flash-preview": "gemini-3-flash-preview",
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
export const MODEL_LIMITS: Record<string, { maxTokens: number; maxInputTokens: number }> = {
|
|
30
|
-
"gemini-3-flash-preview": { maxTokens: 65_536, maxInputTokens: 1_000_000 },
|
|
31
|
-
"openai/gpt-5.3-codex": { maxTokens: 100_000, maxInputTokens: 280_000 },
|
|
32
|
-
"anthropic/claude-opus-4.6": { maxTokens: 100_000, maxInputTokens: 900_000 },
|
|
33
|
-
"anthropic/claude-haiku-4.5": { maxTokens: 16_384, maxInputTokens: 200_000 },
|
|
34
|
-
"moonshotai/kimi-k2.5": { maxTokens: 65_536, maxInputTokens: 262_000 },
|
|
35
|
-
"x-ai/grok-4.1-fast": { maxTokens: 100_000, maxInputTokens: 800_000 },
|
|
36
|
-
"google/gemini-3.1-pro-preview": { maxTokens: 65_536, maxInputTokens: 900_000 },
|
|
37
|
-
"anthropic/claude-sonnet-4.6": { maxTokens: 100_000, maxInputTokens: 900_000 },
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
export const DEFAULT_LIMITS = { maxTokens: 100_000, maxInputTokens: 280_000 };
|
|
41
|
-
|
|
42
|
-
export type ModelTier = "light" | "pro" | "deep";
|
|
43
|
-
|
|
44
|
-
export const MODEL_TO_TIER: Record<string, ModelTier> = {
|
|
45
|
-
"auto": "light",
|
|
46
|
-
"gemini-3-flash-preview": "light",
|
|
47
|
-
"anthropic/claude-haiku-4.5": "light",
|
|
48
|
-
"moonshotai/kimi-k2.5": "light",
|
|
49
|
-
"x-ai/grok-4.1-fast": "light",
|
|
50
|
-
"google/gemini-3.1-pro-preview": "pro",
|
|
51
|
-
"anthropic/claude-sonnet-4.6": "pro",
|
|
52
|
-
"openai/gpt-5.3-codex": "pro",
|
|
53
|
-
"anthropic/claude-opus-4.6": "deep",
|
|
54
|
-
};
|
|
55
|
-
export const SUBAGENT_MODEL = "x-ai/grok-4.1-fast";
|
|
56
|
-
export const SUMMARIZATION_MODEL = "google/gemini-2.5-flash-lite";
|
|
57
|
-
|
|
58
|
-
export function resolveModel(tier: string): string {
|
|
59
|
-
return MODEL_MAP[tier] ?? tier;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
export function getModelLimits(model: string): { maxTokens: number; maxInputTokens: number } {
|
|
63
|
-
return MODEL_LIMITS[model] ?? DEFAULT_LIMITS;
|
|
64
|
-
}
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import type { Middleware, Message } from "../types.js";
|
|
2
|
-
import type OpenAI from "openai";
|
|
3
|
-
|
|
4
|
-
export function composeMiddleware(middlewares: Middleware[]): {
|
|
5
|
-
beforeModel: (messages: Message[]) => Message[];
|
|
6
|
-
filterTools: (tools: OpenAI.ChatCompletionTool[]) => OpenAI.ChatCompletionTool[];
|
|
7
|
-
} {
|
|
8
|
-
return {
|
|
9
|
-
beforeModel: (messages) => {
|
|
10
|
-
let result = messages;
|
|
11
|
-
for (const mw of middlewares) {
|
|
12
|
-
if (mw.beforeModel) result = mw.beforeModel(result);
|
|
13
|
-
}
|
|
14
|
-
return result;
|
|
15
|
-
},
|
|
16
|
-
filterTools: (tools) => {
|
|
17
|
-
let result = tools;
|
|
18
|
-
for (const mw of middlewares) {
|
|
19
|
-
if (mw.filterTools) result = mw.filterTools(result);
|
|
20
|
-
}
|
|
21
|
-
return result;
|
|
22
|
-
},
|
|
23
|
-
};
|
|
24
|
-
}
|