@strayl/agent 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/agent.js +6 -7
  2. package/package.json +5 -1
  3. package/skills/api-creation/SKILL.md +631 -0
  4. package/skills/authentication/SKILL.md +294 -0
  5. package/skills/frontend-design/SKILL.md +108 -0
  6. package/skills/landing-creation/SKILL.md +125 -0
  7. package/skills/reference/SKILL.md +149 -0
  8. package/skills/web-application-creation/SKILL.md +231 -0
  9. package/src/agent.ts +0 -465
  10. package/src/checkpoints/manager.ts +0 -112
  11. package/src/context/manager.ts +0 -185
  12. package/src/context/summarizer.ts +0 -104
  13. package/src/context/trim.ts +0 -55
  14. package/src/emitter.ts +0 -14
  15. package/src/hitl/manager.ts +0 -77
  16. package/src/hitl/transport.ts +0 -13
  17. package/src/index.ts +0 -116
  18. package/src/llm/client.ts +0 -276
  19. package/src/llm/gemini-native.ts +0 -307
  20. package/src/llm/models.ts +0 -64
  21. package/src/middleware/compose.ts +0 -24
  22. package/src/middleware/credential-scrubbing.ts +0 -31
  23. package/src/middleware/forbidden-packages.ts +0 -107
  24. package/src/middleware/plan-mode.ts +0 -143
  25. package/src/middleware/prompt-caching.ts +0 -21
  26. package/src/middleware/tool-compression.ts +0 -25
  27. package/src/middleware/tool-filter.ts +0 -13
  28. package/src/prompts/implementation-mode.md +0 -16
  29. package/src/prompts/plan-mode.md +0 -51
  30. package/src/prompts/system.ts +0 -173
  31. package/src/skills/loader.ts +0 -53
  32. package/src/stdin-listener.ts +0 -61
  33. package/src/subagents/definitions.ts +0 -72
  34. package/src/subagents/manager.ts +0 -161
  35. package/src/todos/manager.ts +0 -61
  36. package/src/tools/builtin/delete.ts +0 -29
  37. package/src/tools/builtin/edit.ts +0 -74
  38. package/src/tools/builtin/exec.ts +0 -216
  39. package/src/tools/builtin/glob.ts +0 -104
  40. package/src/tools/builtin/grep.ts +0 -115
  41. package/src/tools/builtin/ls.ts +0 -54
  42. package/src/tools/builtin/move.ts +0 -31
  43. package/src/tools/builtin/read.ts +0 -69
  44. package/src/tools/builtin/write.ts +0 -42
  45. package/src/tools/executor.ts +0 -51
  46. package/src/tools/external/database.ts +0 -285
  47. package/src/tools/external/enter-plan-mode.ts +0 -34
  48. package/src/tools/external/generate-image.ts +0 -110
  49. package/src/tools/external/hitl-tools.ts +0 -118
  50. package/src/tools/external/preview.ts +0 -28
  51. package/src/tools/external/proxy-fetch.ts +0 -51
  52. package/src/tools/external/task.ts +0 -38
  53. package/src/tools/external/wait.ts +0 -20
  54. package/src/tools/external/web-fetch.ts +0 -57
  55. package/src/tools/external/web-search.ts +0 -61
  56. package/src/tools/registry.ts +0 -36
  57. package/src/tools/zod-to-json-schema.ts +0 -86
  58. package/src/types.ts +0 -151
package/src/llm/client.ts DELETED
@@ -1,276 +0,0 @@
1
- import OpenAI from "openai";
2
- import type { Message } from "../types.js";
3
- import { resolveModel, getModelLimits } from "./models.js";
4
- import { streamGeminiNative } from "./gemini-native.js";
5
-
6
- export type LLMChunk =
7
- | { type: "text"; text: string }
8
- | { type: "reasoning"; text: string }
9
- | { type: "tool_call_delta"; index: number; id?: string; name?: string; arguments: string }
10
- | { type: "tool_call_complete"; id: string; name: string; arguments: string }
11
- | { type: "usage"; input_tokens: number; output_tokens: number; cost?: number };
12
-
13
- // Provider routing is handled by the proxy (api.strayl.dev).
14
- // The SDK sends all requests to STRAYL_API_URL with a session token.
15
- // The proxy adds real API keys, routes to the correct provider, and checks billing.
16
- //
17
- // For local development without a proxy, set STRAYL_LLM_DIRECT=1 and provide
18
- // provider-specific env vars (OPENROUTER_API_KEY, GOOGLE_GENERATIVE_AI_API_KEY).
19
-
20
- type Mode = "proxy" | "direct";
21
-
22
- const GEMINI_MODELS = new Set(["gemini-3-flash-preview", "google/gemini-3.1-pro-preview"]);
23
-
24
- export interface LLMClientConfig {
25
- modelTier: string;
26
- env: Record<string, string>;
27
- sessionId?: string;
28
- }
29
-
30
- export class LLMClient {
31
- readonly modelName: string;
32
- readonly maxTokens: number;
33
- readonly maxInputTokens: number;
34
- private client: OpenAI;
35
- private mode: Mode;
36
- private geminiApiKey?: string;
37
- private sessionId?: string;
38
-
39
- constructor(config: LLMClientConfig) {
40
- this.modelName = resolveModel(config.modelTier);
41
- const limits = getModelLimits(this.modelName);
42
- this.maxTokens = limits.maxTokens;
43
- this.maxInputTokens = limits.maxInputTokens;
44
- this.sessionId = config.sessionId;
45
-
46
- // Direct mode: local dev with raw API keys (no proxy)
47
- // Proxy mode: production, SDK has no API keys
48
- this.mode = config.env.STRAYL_LLM_DIRECT === "1" ? "direct" : "proxy";
49
- this.geminiApiKey = config.env.GOOGLE_GENERATIVE_AI_API_KEY;
50
-
51
- this.client = this.createClient(config);
52
- }
53
-
54
- private createClient(config: LLMClientConfig): OpenAI {
55
- if (this.mode === "proxy") {
56
- // All requests go through the Strayl API proxy.
57
- // Proxy handles: provider routing, API key injection, billing, rate limiting.
58
- // Auth: STRAYL_SESSION_TOKEN passed as Bearer token.
59
- // Model header: X-Strayl-Model tells proxy which provider to route to.
60
- const apiUrl = config.env.STRAYL_API_URL ?? "https://api.strayl.dev";
61
- return new OpenAI({
62
- apiKey: config.env.STRAYL_SESSION_TOKEN ?? "no-token",
63
- baseURL: `${apiUrl}/v1`,
64
- defaultHeaders: {
65
- "X-Strayl-Model": this.modelName,
66
- "X-Strayl-Session": config.sessionId ?? "",
67
- },
68
- });
69
- }
70
-
71
- // Direct mode — for local development
72
- if (GEMINI_MODELS.has(this.modelName) && config.env.GOOGLE_GENERATIVE_AI_API_KEY) {
73
- return new OpenAI({
74
- apiKey: config.env.GOOGLE_GENERATIVE_AI_API_KEY,
75
- baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
76
- fetch: this.directGeminiFetch.bind(this),
77
- });
78
- }
79
-
80
- return new OpenAI({
81
- apiKey: config.env.OPENROUTER_API_KEY,
82
- baseURL: "https://openrouter.ai/api/v1",
83
- });
84
- }
85
-
86
- private async directGeminiFetch(url: RequestInfo | URL, init?: RequestInit): Promise<Response> {
87
- if (!init?.body || typeof init.body !== "string") return fetch(url, init);
88
-
89
- let body: Record<string, unknown>;
90
- try {
91
- body = JSON.parse(init.body);
92
- } catch {
93
- return fetch(url, init);
94
- }
95
-
96
- const ALLOWED = new Set([
97
- "model", "messages", "tools", "tool_choice",
98
- "max_tokens", "max_completion_tokens",
99
- "temperature", "top_p", "n", "stream", "stop",
100
- "presence_penalty", "frequency_penalty",
101
- "response_format", "reasoning_effort", "seed", "google",
102
- ]);
103
-
104
- for (const key of Object.keys(body)) {
105
- if (!ALLOWED.has(key)) delete body[key];
106
- }
107
-
108
- body.reasoning_effort = "medium";
109
-
110
- // Inject thought_signature bypass for multi-turn tool calling
111
- if (Array.isArray(body.messages)) {
112
- for (const msg of body.messages as Record<string, unknown>[]) {
113
- if (msg.role === "assistant" && Array.isArray(msg.tool_calls)) {
114
- for (const tc of msg.tool_calls as Record<string, unknown>[]) {
115
- const extra = tc.extra_content as Record<string, Record<string, unknown>> | undefined;
116
- if (!extra?.google?.thought_signature) {
117
- tc.extra_content = {
118
- ...(extra ?? {}),
119
- google: {
120
- ...(extra?.google ?? {}),
121
- thought_signature: "skip_thought_signature_validator",
122
- },
123
- };
124
- }
125
- }
126
- }
127
- }
128
- }
129
-
130
- const modifiedInit = { ...init, body: JSON.stringify(body) };
131
-
132
- // Retry with exponential backoff for 429/503
133
- const MAX_RETRIES = 4;
134
- for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
135
- const response = await fetch(url, modifiedInit);
136
- if ((response.status === 429 || response.status === 503) && attempt < MAX_RETRIES) {
137
- const retryAfter = response.headers.get("retry-after");
138
- const delay = retryAfter
139
- ? Math.min(parseInt(retryAfter, 10) * 1000, 30_000)
140
- : 1000 * Math.pow(2, attempt - 1);
141
- await new Promise(r => setTimeout(r, delay));
142
- continue;
143
- }
144
- if (!response.ok) {
145
- try {
146
- const clone = response.clone();
147
- const arrayBuf = await clone.arrayBuffer();
148
- const bytes = new Uint8Array(arrayBuf);
149
- let errorText: string;
150
- if (bytes[0] === 0x1f && bytes[1] === 0x8b) {
151
- const ds = new DecompressionStream("gzip");
152
- const decompressed = new Response(new Blob([bytes]).stream().pipeThrough(ds));
153
- errorText = await decompressed.text();
154
- } else {
155
- errorText = new TextDecoder().decode(bytes);
156
- }
157
- throw new Error(`Gemini API ${response.status}: ${errorText.slice(0, 500)}`);
158
- } catch (e) {
159
- if (e instanceof Error && e.message.startsWith("Gemini API")) throw e;
160
- }
161
- }
162
- return response;
163
- }
164
- return fetch(url, modifiedInit);
165
- }
166
-
167
- async *stream(messages: Message[], tools: OpenAI.ChatCompletionTool[]): AsyncGenerator<LLMChunk> {
168
- // Native Gemini API for thinking support (direct mode only)
169
- if (this.mode === "direct" && GEMINI_MODELS.has(this.modelName) && this.geminiApiKey) {
170
- yield* streamGeminiNative(this.geminiApiKey, this.modelName, messages, tools, this.maxTokens);
171
- return;
172
- }
173
-
174
- const params: OpenAI.ChatCompletionCreateParamsStreaming = {
175
- model: this.modelName,
176
- messages: messages as OpenAI.ChatCompletionMessageParam[],
177
- tools: tools.length > 0 ? tools : undefined,
178
- stream: true,
179
- max_tokens: this.maxTokens,
180
- };
181
-
182
- // Request usage tracking (proxy passes this through to OpenRouter)
183
- (params as unknown as Record<string, unknown>).usage = { include: true };
184
-
185
- const response = await this.client.chat.completions.create(params);
186
-
187
- const partialToolCalls = new Map<number, { id: string; name: string; arguments: string }>();
188
- let emittedToolCalls = false;
189
-
190
- for await (const chunk of response) {
191
- const choice = chunk.choices[0];
192
- if (!choice) {
193
- if (chunk.usage) {
194
- yield {
195
- type: "usage",
196
- input_tokens: chunk.usage.prompt_tokens ?? 0,
197
- output_tokens: chunk.usage.completion_tokens ?? 0,
198
- cost: (chunk as unknown as { usage?: { cost?: number } }).usage?.cost,
199
- };
200
- }
201
- continue;
202
- }
203
-
204
- const delta = choice.delta;
205
-
206
- if (delta?.content) {
207
- yield { type: "text", text: delta.content };
208
- }
209
-
210
- // Reasoning content (provider-specific fields — proxy passes them through)
211
- const anyDelta = delta as Record<string, unknown> | undefined;
212
- if (anyDelta?.thought && typeof anyDelta.thought === "string") {
213
- yield { type: "reasoning", text: anyDelta.thought };
214
- }
215
- if (anyDelta?.reasoning_content && typeof anyDelta.reasoning_content === "string") {
216
- yield { type: "reasoning", text: anyDelta.reasoning_content };
217
- }
218
- if (anyDelta?.reasoning && typeof anyDelta.reasoning === "string") {
219
- yield { type: "reasoning", text: anyDelta.reasoning };
220
- }
221
-
222
- if (delta?.tool_calls) {
223
- for (const tc of delta.tool_calls) {
224
- const idx = tc.index;
225
- let partial = partialToolCalls.get(idx);
226
-
227
- if (!partial) {
228
- partial = { id: tc.id || "", name: tc.function?.name || "", arguments: "" };
229
- partialToolCalls.set(idx, partial);
230
- }
231
-
232
- if (tc.id) partial.id = tc.id;
233
- if (tc.function?.name) partial.name = tc.function.name;
234
- if (tc.function?.arguments) {
235
- partial.arguments += tc.function.arguments;
236
- yield {
237
- type: "tool_call_delta",
238
- index: idx,
239
- id: partial.id,
240
- name: partial.name,
241
- arguments: tc.function.arguments,
242
- };
243
- }
244
- }
245
- }
246
-
247
- if (choice.finish_reason === "tool_calls" || choice.finish_reason === "stop") {
248
- if (!emittedToolCalls) {
249
- emittedToolCalls = true;
250
- for (const [, partial] of partialToolCalls) {
251
- if (partial.id && partial.name) {
252
- yield { type: "tool_call_complete", id: partial.id, name: partial.name, arguments: partial.arguments };
253
- }
254
- }
255
- }
256
- }
257
-
258
- if (chunk.usage) {
259
- yield {
260
- type: "usage",
261
- input_tokens: chunk.usage.prompt_tokens ?? 0,
262
- output_tokens: chunk.usage.completion_tokens ?? 0,
263
- cost: (chunk as unknown as { usage?: { cost?: number } }).usage?.cost,
264
- };
265
- }
266
- }
267
-
268
- if (!emittedToolCalls) {
269
- for (const [, partial] of partialToolCalls) {
270
- if (partial.id && partial.name && partial.arguments) {
271
- yield { type: "tool_call_complete", id: partial.id, name: partial.name, arguments: partial.arguments };
272
- }
273
- }
274
- }
275
- }
276
- }
@@ -1,307 +0,0 @@
1
- /**
2
- * Native Gemini API client for streaming with thinking support.
3
- * Uses the generateContent endpoint directly (not OpenAI-compat).
4
- *
5
- * Key difference from OpenAI-compat: thinking text is returned as `thought: true`
6
- * parts, and tool calls carry `thoughtSignature` that must be replayed in
7
- * subsequent requests for multi-turn to work.
8
- */
9
-
10
- import type { Message } from "../types.js";
11
- import type { LLMChunk } from "./client.js";
12
- import type OpenAI from "openai";
13
-
14
- // Extended chunk type for Gemini thought signatures
15
- export type GeminiLLMChunk = LLMChunk
16
- | { type: "thought_signature"; index: number; signature: string };
17
-
18
- interface GeminiPart {
19
- text?: string;
20
- thought?: boolean;
21
- thoughtSignature?: string;
22
- functionCall?: { name: string; args: Record<string, unknown> };
23
- functionResponse?: { name: string; response: Record<string, unknown> };
24
- }
25
-
26
- interface GeminiContent {
27
- role: "user" | "model";
28
- parts: GeminiPart[];
29
- }
30
-
31
- interface GeminiFunctionDeclaration {
32
- name: string;
33
- description: string;
34
- parameters?: Record<string, unknown>;
35
- }
36
-
37
- // ─── Message Conversion ──────────────────────────────────────────────
38
-
39
- /**
40
- * We store thoughtSignature in the ToolCall id field as a prefix:
41
- * "gemini_fc_<ts>_<idx>|<signature>"
42
- * This is a hack, but it avoids changing the Message type just for Gemini.
43
- */
44
-
45
- function convertMessages(messages: Message[]): {
46
- systemInstruction: { parts: Array<{ text: string }> } | undefined;
47
- contents: GeminiContent[];
48
- } {
49
- let systemInstruction: { parts: Array<{ text: string }> } | undefined;
50
- const contents: GeminiContent[] = [];
51
-
52
- for (const msg of messages) {
53
- const textContent = typeof msg.content === "string"
54
- ? msg.content
55
- : msg.content?.map(p => p.text ?? "").join("") ?? "";
56
-
57
- switch (msg.role) {
58
- case "system":
59
- if (!systemInstruction) {
60
- systemInstruction = { parts: [{ text: textContent }] };
61
- } else {
62
- systemInstruction.parts.push({ text: textContent });
63
- }
64
- break;
65
-
66
- case "user":
67
- contents.push({ role: "user", parts: [{ text: textContent }] });
68
- break;
69
-
70
- case "assistant": {
71
- const parts: GeminiPart[] = [];
72
- if (textContent) parts.push({ text: textContent });
73
- if (msg.tool_calls) {
74
- for (const tc of msg.tool_calls) {
75
- let args: Record<string, unknown> = {};
76
- try { args = JSON.parse(tc.function.arguments); } catch {}
77
-
78
- // Extract thoughtSignature from ID (stored as "id|signature")
79
- const pipeIdx = tc.id.indexOf("|");
80
- const thoughtSignature = pipeIdx >= 0 ? tc.id.slice(pipeIdx + 1) : undefined;
81
-
82
- const part: GeminiPart = {
83
- functionCall: { name: tc.function.name, args },
84
- };
85
- if (thoughtSignature) {
86
- part.thoughtSignature = thoughtSignature;
87
- }
88
- parts.push(part);
89
- }
90
- }
91
- if (parts.length > 0) {
92
- contents.push({ role: "model", parts });
93
- }
94
- break;
95
- }
96
-
97
- case "tool": {
98
- let response: Record<string, unknown> = {};
99
- try {
100
- const parsed = JSON.parse(textContent);
101
- // Gemini requires response to be a plain object (not array)
102
- if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
103
- response = parsed as Record<string, unknown>;
104
- } else {
105
- response = { result: parsed };
106
- }
107
- } catch {
108
- response = { result: textContent };
109
- }
110
-
111
- // Tool responses are role: "user" in Gemini API
112
- const last = contents[contents.length - 1];
113
- if (last?.role === "user" && last.parts.some(p => p.functionResponse)) {
114
- last.parts.push({
115
- functionResponse: { name: msg.name ?? "unknown", response },
116
- });
117
- } else {
118
- contents.push({
119
- role: "user",
120
- parts: [{
121
- functionResponse: { name: msg.name ?? "unknown", response },
122
- }],
123
- });
124
- }
125
- break;
126
- }
127
- }
128
- }
129
-
130
- return { systemInstruction, contents };
131
- }
132
-
133
- // ─── Tool Conversion ─────────────────────────────────────────────────
134
-
135
- function convertTools(tools: OpenAI.ChatCompletionTool[]): Array<{ functionDeclarations: GeminiFunctionDeclaration[] }> | undefined {
136
- const decls: GeminiFunctionDeclaration[] = [];
137
- for (const t of tools) {
138
- if (t.type !== "function") continue;
139
- decls.push({
140
- name: t.function.name,
141
- description: t.function.description ?? "",
142
- parameters: t.function.parameters as Record<string, unknown> | undefined,
143
- });
144
- }
145
- return decls.length > 0 ? [{ functionDeclarations: decls }] : undefined;
146
- }
147
-
148
- // ─── SSE Stream Parser ───────────────────────────────────────────────
149
-
150
- async function* parseSSEStream(response: Response): AsyncGenerator<Record<string, unknown>> {
151
- const reader = response.body!.getReader();
152
- const decoder = new TextDecoder();
153
- let buffer = "";
154
-
155
- while (true) {
156
- const { done, value } = await reader.read();
157
- if (done) break;
158
-
159
- buffer += decoder.decode(value, { stream: true });
160
- const lines = buffer.split("\n");
161
- buffer = lines.pop()!;
162
-
163
- for (const line of lines) {
164
- const trimmed = line.trim();
165
- if (trimmed.startsWith("data: ")) {
166
- const jsonStr = trimmed.slice(6);
167
- if (jsonStr === "[DONE]") return;
168
- try {
169
- yield JSON.parse(jsonStr);
170
- } catch {}
171
- }
172
- }
173
- }
174
-
175
- if (buffer.trim().startsWith("data: ")) {
176
- try { yield JSON.parse(buffer.trim().slice(6)); } catch {}
177
- }
178
- }
179
-
180
- // ─── Main Streaming Function ─────────────────────────────────────────
181
-
182
- export async function* streamGeminiNative(
183
- apiKey: string,
184
- modelName: string,
185
- messages: Message[],
186
- tools: OpenAI.ChatCompletionTool[],
187
- maxTokens: number,
188
- ): AsyncGenerator<LLMChunk> {
189
- const { systemInstruction, contents } = convertMessages(messages);
190
- const geminiTools = convertTools(tools);
191
-
192
- const body: Record<string, unknown> = {
193
- contents,
194
- generationConfig: {
195
- maxOutputTokens: maxTokens,
196
- thinkingConfig: {
197
- thinkingBudget: 8192,
198
- includeThoughts: true,
199
- },
200
- },
201
- };
202
-
203
- if (systemInstruction) body.systemInstruction = systemInstruction;
204
- if (geminiTools) body.tools = geminiTools;
205
-
206
- const url = `https://generativelanguage.googleapis.com/v1beta/models/${modelName}:streamGenerateContent?alt=sse&key=${apiKey}`;
207
-
208
- const MAX_RETRIES = 4;
209
- let response: Response | null = null;
210
-
211
- for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
212
- response = await fetch(url, {
213
- method: "POST",
214
- headers: { "Content-Type": "application/json" },
215
- body: JSON.stringify(body),
216
- });
217
-
218
- if ((response.status === 429 || response.status === 503) && attempt < MAX_RETRIES) {
219
- const retryAfter = response.headers.get("retry-after");
220
- const delay = retryAfter
221
- ? Math.min(parseInt(retryAfter, 10) * 1000, 30_000)
222
- : 1000 * Math.pow(2, attempt - 1);
223
- await new Promise(r => setTimeout(r, delay));
224
- continue;
225
- }
226
-
227
- if (!response.ok) {
228
- let errorText = "";
229
- try { errorText = await response.text(); } catch {}
230
- throw new Error(`Gemini API ${response.status}: ${errorText.slice(0, 500)}`);
231
- }
232
-
233
- break;
234
- }
235
-
236
- if (!response?.body) {
237
- throw new Error("Gemini API returned no body");
238
- }
239
-
240
- let toolCallIndex = 0;
241
- let usageEmitted = false;
242
-
243
- for await (const chunk of parseSSEStream(response)) {
244
- const candidates = chunk.candidates as Array<{
245
- content?: { parts?: GeminiPart[] };
246
- finishReason?: string;
247
- }> | undefined;
248
-
249
- // Process parts
250
- if (candidates?.[0]?.content?.parts) {
251
- for (const part of candidates[0].content!.parts!) {
252
- // Thinking content
253
- if (part.thought && part.text) {
254
- yield { type: "reasoning", text: part.text };
255
- continue;
256
- }
257
-
258
- // Text content (skip empty text and thoughtSignature-only parts)
259
- if (part.text && !part.thought && !part.thoughtSignature) {
260
- yield { type: "text", text: part.text };
261
- continue;
262
- }
263
-
264
- // Function call — include thoughtSignature in the ID
265
- if (part.functionCall) {
266
- const baseId = `gemini_fc_${Date.now()}_${toolCallIndex}`;
267
- // Encode thoughtSignature into the ID so it survives the message round-trip
268
- const id = part.thoughtSignature
269
- ? `${baseId}|${part.thoughtSignature}`
270
- : baseId;
271
- const argsStr = JSON.stringify(part.functionCall.args ?? {});
272
-
273
- yield {
274
- type: "tool_call_delta",
275
- index: toolCallIndex,
276
- id,
277
- name: part.functionCall.name,
278
- arguments: argsStr,
279
- };
280
- yield {
281
- type: "tool_call_complete",
282
- id,
283
- name: part.functionCall.name,
284
- arguments: argsStr,
285
- };
286
- toolCallIndex++;
287
- }
288
- }
289
- }
290
-
291
- // Usage in final chunk
292
- const meta = chunk.usageMetadata as {
293
- promptTokenCount?: number;
294
- candidatesTokenCount?: number;
295
- thoughtsTokenCount?: number;
296
- } | undefined;
297
-
298
- if (meta && candidates?.[0]?.finishReason && !usageEmitted) {
299
- usageEmitted = true;
300
- yield {
301
- type: "usage",
302
- input_tokens: meta.promptTokenCount ?? 0,
303
- output_tokens: (meta.candidatesTokenCount ?? 0) + (meta.thoughtsTokenCount ?? 0),
304
- };
305
- }
306
- }
307
- }
package/src/llm/models.ts DELETED
@@ -1,64 +0,0 @@
1
- export const MODEL_MAP: Record<string, string> = {
2
- // Tier defaults
3
- auto: "gemini-3-flash-preview",
4
- light: "x-ai/grok-4.1-fast",
5
- pro: "anthropic/claude-sonnet-4.6",
6
- deep: "anthropic/claude-opus-4.6",
7
-
8
- // Short aliases
9
- haiku: "anthropic/claude-haiku-4.5",
10
- kimi: "moonshotai/kimi-k2.5",
11
- grok: "x-ai/grok-4.1-fast",
12
- gemini: "google/gemini-3.1-pro-preview",
13
- sonnet: "anthropic/claude-sonnet-4.6",
14
- codex: "openai/gpt-5.3-codex",
15
- opus: "anthropic/claude-opus-4.6",
16
- flash: "gemini-3-flash-preview",
17
-
18
- // Full IDs map to themselves
19
- "anthropic/claude-haiku-4.5": "anthropic/claude-haiku-4.5",
20
- "moonshotai/kimi-k2.5": "moonshotai/kimi-k2.5",
21
- "x-ai/grok-4.1-fast": "x-ai/grok-4.1-fast",
22
- "google/gemini-3.1-pro-preview": "google/gemini-3.1-pro-preview",
23
- "anthropic/claude-sonnet-4.6": "anthropic/claude-sonnet-4.6",
24
- "openai/gpt-5.3-codex": "openai/gpt-5.3-codex",
25
- "anthropic/claude-opus-4.6": "anthropic/claude-opus-4.6",
26
- "gemini-3-flash-preview": "gemini-3-flash-preview",
27
- };
28
-
29
- export const MODEL_LIMITS: Record<string, { maxTokens: number; maxInputTokens: number }> = {
30
- "gemini-3-flash-preview": { maxTokens: 65_536, maxInputTokens: 1_000_000 },
31
- "openai/gpt-5.3-codex": { maxTokens: 100_000, maxInputTokens: 280_000 },
32
- "anthropic/claude-opus-4.6": { maxTokens: 100_000, maxInputTokens: 900_000 },
33
- "anthropic/claude-haiku-4.5": { maxTokens: 16_384, maxInputTokens: 200_000 },
34
- "moonshotai/kimi-k2.5": { maxTokens: 65_536, maxInputTokens: 262_000 },
35
- "x-ai/grok-4.1-fast": { maxTokens: 100_000, maxInputTokens: 800_000 },
36
- "google/gemini-3.1-pro-preview": { maxTokens: 65_536, maxInputTokens: 900_000 },
37
- "anthropic/claude-sonnet-4.6": { maxTokens: 100_000, maxInputTokens: 900_000 },
38
- };
39
-
40
- export const DEFAULT_LIMITS = { maxTokens: 100_000, maxInputTokens: 280_000 };
41
-
42
- export type ModelTier = "light" | "pro" | "deep";
43
-
44
- export const MODEL_TO_TIER: Record<string, ModelTier> = {
45
- "auto": "light",
46
- "gemini-3-flash-preview": "light",
47
- "anthropic/claude-haiku-4.5": "light",
48
- "moonshotai/kimi-k2.5": "light",
49
- "x-ai/grok-4.1-fast": "light",
50
- "google/gemini-3.1-pro-preview": "pro",
51
- "anthropic/claude-sonnet-4.6": "pro",
52
- "openai/gpt-5.3-codex": "pro",
53
- "anthropic/claude-opus-4.6": "deep",
54
- };
55
- export const SUBAGENT_MODEL = "x-ai/grok-4.1-fast";
56
- export const SUMMARIZATION_MODEL = "google/gemini-2.5-flash-lite";
57
-
58
- export function resolveModel(tier: string): string {
59
- return MODEL_MAP[tier] ?? tier;
60
- }
61
-
62
- export function getModelLimits(model: string): { maxTokens: number; maxInputTokens: number } {
63
- return MODEL_LIMITS[model] ?? DEFAULT_LIMITS;
64
- }
@@ -1,24 +0,0 @@
1
- import type { Middleware, Message } from "../types.js";
2
- import type OpenAI from "openai";
3
-
4
- export function composeMiddleware(middlewares: Middleware[]): {
5
- beforeModel: (messages: Message[]) => Message[];
6
- filterTools: (tools: OpenAI.ChatCompletionTool[]) => OpenAI.ChatCompletionTool[];
7
- } {
8
- return {
9
- beforeModel: (messages) => {
10
- let result = messages;
11
- for (const mw of middlewares) {
12
- if (mw.beforeModel) result = mw.beforeModel(result);
13
- }
14
- return result;
15
- },
16
- filterTools: (tools) => {
17
- let result = tools;
18
- for (const mw of middlewares) {
19
- if (mw.filterTools) result = mw.filterTools(result);
20
- }
21
- return result;
22
- },
23
- };
24
- }