keystone-cli 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +114 -140
  2. package/package.json +6 -3
  3. package/src/cli.ts +54 -369
  4. package/src/commands/init.ts +15 -29
  5. package/src/db/memory-db.test.ts +45 -0
  6. package/src/db/memory-db.ts +47 -21
  7. package/src/db/sqlite-setup.ts +26 -3
  8. package/src/db/workflow-db.ts +12 -5
  9. package/src/parser/config-schema.ts +11 -13
  10. package/src/parser/schema.ts +4 -2
  11. package/src/runner/__test__/llm-mock-setup.ts +173 -0
  12. package/src/runner/__test__/llm-test-setup.ts +271 -0
  13. package/src/runner/engine-executor.test.ts +25 -18
  14. package/src/runner/executors/blueprint-executor.ts +0 -1
  15. package/src/runner/executors/dynamic-executor.ts +11 -6
  16. package/src/runner/executors/engine-executor.ts +5 -1
  17. package/src/runner/executors/llm-executor.ts +502 -1033
  18. package/src/runner/executors/memory-executor.ts +35 -19
  19. package/src/runner/executors/plan-executor.ts +0 -1
  20. package/src/runner/executors/types.ts +4 -4
  21. package/src/runner/llm-adapter.integration.test.ts +151 -0
  22. package/src/runner/llm-adapter.ts +263 -1401
  23. package/src/runner/llm-clarification.test.ts +91 -106
  24. package/src/runner/llm-executor.test.ts +217 -1181
  25. package/src/runner/memoization.test.ts +0 -1
  26. package/src/runner/recovery-security.test.ts +51 -20
  27. package/src/runner/reflexion.test.ts +55 -18
  28. package/src/runner/standard-tools-integration.test.ts +137 -87
  29. package/src/runner/step-executor.test.ts +36 -80
  30. package/src/runner/step-executor.ts +0 -2
  31. package/src/runner/test-harness.ts +3 -29
  32. package/src/runner/tool-integration.test.ts +122 -73
  33. package/src/runner/workflow-runner.ts +92 -35
  34. package/src/runner/workflow-scheduler.ts +11 -1
  35. package/src/runner/workflow-summary.ts +144 -0
  36. package/src/utils/auth-manager.test.ts +10 -520
  37. package/src/utils/auth-manager.ts +3 -756
  38. package/src/utils/config-loader.ts +12 -0
  39. package/src/utils/constants.ts +0 -17
  40. package/src/utils/process-sandbox.ts +15 -3
  41. package/src/runner/llm-adapter-runtime.test.ts +0 -209
  42. package/src/runner/llm-adapter.test.ts +0 -1012
@@ -1,335 +1,97 @@
1
- import { randomUUID } from 'node:crypto';
2
- import { copyFileSync, existsSync, readdirSync } from 'node:fs';
3
- import { Module } from 'node:module';
4
- import { homedir } from 'node:os';
5
- import { basename, dirname, join } from 'node:path';
6
- import { pathToFileURL } from 'node:url';
7
- import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
1
+ import { execSync } from 'node:child_process';
2
+ import { createRequire } from 'node:module';
3
+ import { join } from 'node:path';
4
+ import type { EmbeddingModel, LanguageModel } from 'ai';
5
+ import type { Config } from '../parser/config-schema';
8
6
  import { ConfigLoader } from '../utils/config-loader';
9
7
  import { ConsoleLogger } from '../utils/logger';
10
- import { processOpenAIStream } from './stream-utils';
11
8
 
12
- // Maximum response size to prevent memory exhaustion (1MB)
13
- const MAX_RESPONSE_SIZE = 1024 * 1024;
14
- const ANTHROPIC_OAUTH_BETAS = [
15
- 'oauth-2025-04-20',
16
- 'claude-code-20250219',
17
- 'interleaved-thinking-2025-05-14',
18
- 'fine-grained-tool-streaming-2025-05-14',
19
- ].join(',');
20
- const GEMINI_DEFAULT_BASE_URL = 'https://cloudcode-pa.googleapis.com';
21
- const GEMINI_DEFAULT_PROJECT_ID = 'rising-fact-p41fc';
22
- const GEMINI_HEADERS = {
23
- 'User-Agent': 'antigravity/1.11.5 windows/amd64',
24
- 'X-Goog-Api-Client': 'google-cloud-sdk vscode_cloudshelleditor/0.1',
25
- 'Client-Metadata':
26
- '{"ideType":"IDE_UNSPECIFIED","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}',
27
- };
28
- const defaultLogger = new ConsoleLogger();
29
- type TransformersPipeline = (...args: unknown[]) => Promise<unknown>;
30
- let cachedPipeline: TransformersPipeline | null = null;
31
- let runtimeResolverRegistered = false;
32
- let nativeFallbacksRegistered = false;
33
-
34
- export function resetRuntimeHelpers(): void {
35
- runtimeResolverRegistered = false;
36
- nativeFallbacksRegistered = false;
9
+ // --- Keystone Types & Extensions ---
10
+
11
+ /**
12
+ * A provider instance in the AI SDK can be a function that returns a language model,
13
+ * or an object with methods for different model types.
14
+ */
15
+ export interface ProviderInstance {
16
+ (modelId: string): LanguageModel;
17
+ languageModel?: (modelId: string) => LanguageModel;
18
+ textEmbeddingModel?: (modelId: string) => EmbeddingModel;
19
+ embedding?: (modelId: string) => EmbeddingModel;
20
+ textEmbedding?: (modelId: string) => EmbeddingModel;
37
21
  }
38
22
 
39
- const ONNX_RUNTIME_LIB_PATTERN =
40
- process.platform === 'win32' ? /^onnxruntime.*\.dll$/i : /^libonnxruntime/i;
41
-
42
- function hasOnnxRuntimeLibrary(dir: string): boolean {
43
- try {
44
- return readdirSync(dir, { withFileTypes: true }).some(
45
- (entry) => entry.isFile() && ONNX_RUNTIME_LIB_PATTERN.test(entry.name)
23
+ /**
24
+ * A provider factory is a function that takes configuration options and returns a provider instance.
25
+ */
26
+ export type ProviderFactory = (options: Record<string, unknown>) => ProviderInstance;
27
+
28
+ /**
29
+ * Local embedding model implementation using @xenova/transformers.
30
+ * Maintains the "zero-setup local memory" feature.
31
+ */
32
+ class LocalEmbeddingModel {
33
+ readonly specificationVersion = 'v2';
34
+ readonly modelId = 'local-minilm';
35
+ readonly provider = 'local';
36
+ readonly maxEmbeddingsPerCall = 1;
37
+ readonly supportsParallelCalls = false;
38
+ private static pipelinePromise: Promise<any> | null = null;
39
+
40
+ private async getPipeline() {
41
+ if (!LocalEmbeddingModel.pipelinePromise) {
42
+ LocalEmbeddingModel.pipelinePromise = (async () => {
43
+ const { pipeline } = await import('@xenova/transformers');
44
+ return pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
45
+ })() as Promise<any>;
46
+ }
47
+ return LocalEmbeddingModel.pipelinePromise;
48
+ }
49
+
50
+ async doEmbed(options: { values: string[]; abortSignal?: AbortSignal }) {
51
+ const pipe = await this.getPipeline();
52
+ const embeddings = await Promise.all(
53
+ options.values.map(async (text) => {
54
+ const output = await pipe(text, { pooling: 'mean', normalize: true });
55
+ return Array.from(output.data) as number[];
56
+ })
46
57
  );
47
- } catch {
48
- return false;
49
- }
50
- }
51
-
52
- export function collectOnnxRuntimeLibraryDirs(): string[] {
53
- const candidates = new Set<string>();
54
-
55
- if (process.env.KEYSTONE_ONNX_RUNTIME_LIB_DIR) {
56
- candidates.add(process.env.KEYSTONE_ONNX_RUNTIME_LIB_DIR);
57
- }
58
-
59
- const runtimeDir = getRuntimeDir();
60
- const runtimeOnnxDir = join(
61
- runtimeDir,
62
- 'node_modules',
63
- 'onnxruntime-node',
64
- 'bin',
65
- 'napi-v3',
66
- process.platform,
67
- process.arch
68
- );
69
- if (existsSync(runtimeOnnxDir)) {
70
- candidates.add(runtimeOnnxDir);
58
+ return { embeddings };
71
59
  }
72
60
 
73
- const nodeModulesDir = join(
74
- process.cwd(),
75
- 'node_modules',
76
- 'onnxruntime-node',
77
- 'bin',
78
- 'napi-v3',
79
- process.platform,
80
- process.arch
81
- );
82
- if (existsSync(nodeModulesDir)) {
83
- candidates.add(nodeModulesDir);
61
+ /**
62
+ * Dispose of the cached pipeline to free memory.
63
+ * Call this when the embedding model is no longer needed.
64
+ */
65
+ static dispose(): void {
66
+ LocalEmbeddingModel.pipelinePromise = null;
84
67
  }
85
-
86
- const execDir = dirname(process.execPath);
87
- candidates.add(execDir);
88
- candidates.add(join(execDir, 'lib'));
89
-
90
- return Array.from(candidates).filter(hasOnnxRuntimeLibrary);
91
68
  }
92
69
 
93
- export function findOnnxRuntimeLibraryPath(dirs: string[]): string | null {
94
- for (const dir of dirs) {
95
- try {
96
- for (const entry of readdirSync(dir, { withFileTypes: true })) {
97
- if (entry.isFile() && ONNX_RUNTIME_LIB_PATTERN.test(entry.name)) {
98
- return join(dir, entry.name);
99
- }
100
- }
101
- } catch {
102
- // Ignore unreadable directories.
103
- }
104
- }
105
- return null;
106
- }
107
-
108
- export function ensureOnnxRuntimeLibraryPath(): void {
109
- const libDirs = collectOnnxRuntimeLibraryDirs();
110
- if (!libDirs.length) return;
111
-
112
- const runtimePath = findOnnxRuntimeLibraryPath(libDirs);
113
- if (runtimePath) {
114
- const tempDirs = process.platform === 'darwin' ? ['/private/tmp', '/tmp'] : ['/tmp'];
115
- for (const tempDir of tempDirs) {
116
- try {
117
- const target = join(tempDir, basename(runtimePath));
118
- if (!existsSync(target)) {
119
- copyFileSync(runtimePath, target);
120
- }
121
- } catch {
122
- // Best-effort copy for runtimes that extract native modules into temp.
123
- }
124
- }
125
- }
126
-
127
- const envKey =
128
- process.platform === 'darwin'
129
- ? 'DYLD_LIBRARY_PATH'
130
- : process.platform === 'win32'
131
- ? 'PATH'
132
- : 'LD_LIBRARY_PATH';
133
- const delimiter = process.platform === 'win32' ? ';' : ':';
134
- const existing = (process.env[envKey] || '').split(delimiter).filter(Boolean);
135
- const merged: string[] = [];
136
- const seen = new Set<string>();
137
-
138
- for (const dir of [...libDirs, ...existing]) {
139
- if (seen.has(dir)) continue;
140
- seen.add(dir);
141
- merged.push(dir);
142
- }
143
-
144
- process.env[envKey] = merged.join(delimiter);
145
- if (runtimePath && typeof Bun !== 'undefined' && typeof (Bun as any).dlopen === 'function') {
146
- try {
147
- (Bun as any).dlopen(runtimePath, {});
148
- } catch {
149
- // Best-effort preloading for compiled binaries.
150
- }
151
- }
152
- }
70
+ // Re-export specific AI SDK types
71
+ export type { LanguageModel, EmbeddingModel } from 'ai';
153
72
 
154
- export function resolveNativeModuleFallback(
155
- request: string,
156
- parentFilename: string
157
- ): string | null {
158
- const normalizedRequest = request.replace(/\\/g, '/');
159
- const fileName = normalizedRequest.split('/').pop();
160
- if (!fileName) return null;
73
+ const userRequire = createRequire(join(process.cwd(), 'package.json'));
161
74
 
162
- if (fileName.startsWith('sharp-') || /[\\/]sharp[\\/]/.test(parentFilename)) {
163
- const candidate = join(getRuntimeDir(), 'node_modules', 'sharp', 'build', 'Release', fileName);
164
- if (existsSync(candidate)) {
165
- return candidate;
166
- }
167
- }
75
+ // Lazy-loaded global require to avoid blocking import time
76
+ let globalRequire: NodeRequire | undefined;
77
+ let globalRequireResolved = false;
168
78
 
169
- if (
170
- fileName === 'onnxruntime_binding.node' ||
171
- /[\\/]onnxruntime-node[\\/]/.test(parentFilename)
172
- ) {
173
- const candidate = join(
174
- getRuntimeDir(),
175
- 'node_modules',
176
- 'onnxruntime-node',
177
- 'bin',
178
- 'napi-v3',
179
- process.platform,
180
- process.arch,
181
- 'onnxruntime_binding.node'
182
- );
183
- if (existsSync(candidate)) {
184
- return candidate;
185
- }
79
+ function getGlobalRequire(): NodeRequire | undefined {
80
+ if (globalRequireResolved) {
81
+ return globalRequire;
186
82
  }
187
-
188
- return null;
189
- }
190
-
191
- export function ensureNativeModuleFallbacks(): void {
192
- if (nativeFallbacksRegistered) return;
193
- nativeFallbacksRegistered = true;
194
-
195
- const moduleAny = Module as unknown as {
196
- _resolveFilename: (
197
- request: string,
198
- parent?: { filename?: string },
199
- isMain?: boolean,
200
- options?: unknown
201
- ) => string;
202
- };
203
- const originalResolve = moduleAny._resolveFilename;
204
- if (typeof originalResolve !== 'function') return;
205
-
206
- moduleAny._resolveFilename = function resolveFilename(request, parent, isMain, options) {
207
- if (typeof request === 'string' && request.endsWith('.node')) {
208
- try {
209
- return originalResolve.call(this, request, parent, isMain, options);
210
- } catch (error) {
211
- const parentFilename = parent && typeof parent.filename === 'string' ? parent.filename : '';
212
- const fallback = resolveNativeModuleFallback(request, parentFilename);
213
- if (fallback) {
214
- return fallback;
215
- }
216
- throw error;
217
- }
218
- }
219
- return originalResolve.call(this, request, parent, isMain, options);
220
- };
221
- }
222
-
223
- export function resolveTransformersCacheDir(): string | null {
224
- if (process.env.TRANSFORMERS_CACHE) {
225
- return process.env.TRANSFORMERS_CACHE;
226
- }
227
- if (process.env.XDG_CACHE_HOME) {
228
- return join(process.env.XDG_CACHE_HOME, 'keystone', 'transformers');
229
- }
230
- const home = process.env.HOME || homedir();
231
- if (home) {
232
- return join(home, '.cache', 'keystone', 'transformers');
233
- }
234
- return null;
235
- }
236
-
237
- async function getTransformersPipeline(): Promise<TransformersPipeline> {
238
- if (!cachedPipeline) {
239
- ensureNativeModuleFallbacks();
240
- ensureRuntimeResolver();
241
- const resolved = resolveTransformersPath();
242
- const module = resolved
243
- ? await import(pathToFileURL(resolved).href)
244
- : await import('@xenova/transformers');
245
- if (module.env?.cacheDir?.includes('/$bunfs')) {
246
- const cacheDir = resolveTransformersCacheDir();
247
- if (cacheDir) {
248
- module.env.cacheDir = cacheDir;
249
- }
250
- }
251
- cachedPipeline = module.pipeline;
252
- }
253
- if (!cachedPipeline) {
254
- throw new Error('Failed to load transformers pipeline');
255
- }
256
- return cachedPipeline;
257
- }
258
-
259
- export function resolveTransformersPath(): string | null {
83
+ globalRequireResolved = true;
260
84
  try {
261
- if (
262
- process.env.KEYSTONE_TRANSFORMERS_PATH &&
263
- existsSync(process.env.KEYSTONE_TRANSFORMERS_PATH)
264
- ) {
265
- return process.env.KEYSTONE_TRANSFORMERS_PATH;
266
- }
85
+ const globalRoot = execSync('npm root -g', { encoding: 'utf-8' }).trim();
86
+ globalRequire = createRequire(join(globalRoot, 'package.json'));
267
87
  } catch {
268
- // Ignore resolve failures and fall back to bundled module.
88
+ // Global npm root not found - this is expected in some environments (e.g., containers, CI)
89
+ // Global package resolution will be disabled silently
269
90
  }
270
- return null;
271
- }
272
-
273
- export function getRuntimeDir(): string {
274
- return process.env.KEYSTONE_RUNTIME_DIR || join(dirname(process.execPath), 'keystone-runtime');
275
- }
276
-
277
- function resolveRuntimePackageEntry(pkg: string, entry: string): string | null {
278
- const runtimePath = join(getRuntimeDir(), 'node_modules', ...pkg.split('/'), entry);
279
- if (existsSync(runtimePath)) {
280
- return runtimePath;
281
- }
282
- const cwdPath = join(process.cwd(), 'node_modules', ...pkg.split('/'), entry);
283
- if (existsSync(cwdPath)) {
284
- return cwdPath;
285
- }
286
- return null;
287
- }
288
-
289
- export function ensureRuntimeResolver(): void {
290
- if (runtimeResolverRegistered) return;
291
- if (typeof Bun === 'undefined' || typeof Bun.plugin !== 'function') {
292
- return;
293
- }
294
-
295
- const entryMap: Record<string, string> = {
296
- '@huggingface/jinja': 'dist/index.js',
297
- sharp: 'lib/index.js',
298
- 'onnxruntime-node': 'dist/index.js',
299
- 'onnxruntime-common': 'dist/ort-common.node.js',
300
- };
301
-
302
- Bun.plugin({
303
- name: 'keystone-runtime-resolver',
304
- setup(builder) {
305
- builder.onResolve(
306
- { filter: /^(sharp|onnxruntime-node|onnxruntime-common|@huggingface\/jinja)$/ },
307
- (args) => {
308
- const entry = entryMap[args.path];
309
- if (!entry) return null;
310
- const resolved = resolveRuntimePackageEntry(args.path, entry);
311
- if (!resolved) return null;
312
- return { path: resolved };
313
- }
314
- );
315
- },
316
- });
317
-
318
- runtimeResolverRegistered = true;
319
- }
320
-
321
- export interface LLMMessage {
322
- role: 'system' | 'user' | 'assistant' | 'tool';
323
- content: string | null;
324
- tool_call_id?: string;
325
- name?: string;
326
- tool_calls?: LLMToolCall[];
327
- reasoning?: {
328
- encrypted_content: string;
329
- summary?: string;
330
- };
91
+ return globalRequire;
331
92
  }
332
93
 
94
+ // Compatibility types for Keystone
333
95
  export interface LLMToolCall {
334
96
  id: string;
335
97
  type: 'function';
@@ -339,9 +101,14 @@ export interface LLMToolCall {
339
101
  };
340
102
  }
341
103
 
342
- type LLMMessageWithId = LLMMessage & { id?: string };
343
- type ChatGPTToolCall = Omit<LLMToolCall, 'id'>;
344
- type ChatGPTMessage = Omit<LLMMessage, 'tool_calls'> & { tool_calls?: ChatGPTToolCall[] };
104
+ export interface LLMMessage {
105
+ role: 'system' | 'user' | 'assistant' | 'tool';
106
+ content?: string;
107
+ name?: string;
108
+ tool_calls?: LLMToolCall[];
109
+ tool_call_id?: string;
110
+ reasoning?: { summary?: string }; // Keystone extension
111
+ }
345
112
 
346
113
  export interface LLMResponse {
347
114
  message: LLMMessage;
@@ -352,1155 +119,250 @@ export interface LLMResponse {
352
119
  };
353
120
  }
354
121
 
355
- export interface LLMTool {
356
- type: 'function';
357
- function: {
358
- name: string;
359
- description?: string;
360
- parameters?: Record<string, unknown>;
361
- };
362
- }
363
-
364
- interface GeminiFunctionCall {
365
- name: string;
366
- args?: Record<string, unknown> | string;
367
- }
368
-
369
- interface GeminiPart {
370
- text?: string;
371
- functionCall?: GeminiFunctionCall;
372
- functionResponse?: {
373
- name: string;
374
- response: Record<string, unknown>;
375
- };
376
- }
377
-
378
- interface GeminiContent {
379
- role: 'user' | 'model';
380
- parts: GeminiPart[];
381
- }
382
-
383
- interface GeminiSystemInstruction {
384
- role?: 'system';
385
- parts: GeminiPart[];
386
- }
387
-
388
- export interface LLMAdapter {
389
- chat(
390
- messages: LLMMessage[],
391
- options?: {
392
- model?: string;
393
- tools?: LLMTool[];
394
- onStream?: (chunk: string) => void;
395
- signal?: AbortSignal;
396
- responseSchema?: any; // Native JSON schema for structured output
397
- }
398
- ): Promise<LLMResponse>;
399
- embed?(text: string, model?: string, options?: { signal?: AbortSignal }): Promise<number[]>;
400
- }
401
-
402
- export class OpenAIAdapter implements LLMAdapter {
403
- private apiKey: string;
404
- private baseUrl: string;
405
-
406
- constructor(apiKey?: string, baseUrl?: string) {
407
- this.apiKey = apiKey || ConfigLoader.getSecret('OPENAI_API_KEY') || '';
408
- this.baseUrl =
409
- baseUrl || ConfigLoader.getSecret('OPENAI_BASE_URL') || 'https://api.openai.com/v1';
410
-
411
- if (!this.apiKey && this.baseUrl === 'https://api.openai.com/v1') {
412
- defaultLogger.warn('Warning: OPENAI_API_KEY is not set.');
413
- }
414
- }
415
-
416
- async chat(
417
- messages: LLMMessage[],
418
- options?: {
419
- model?: string;
420
- tools?: LLMTool[];
421
- onStream?: (chunk: string) => void;
422
- signal?: AbortSignal;
423
- responseSchema?: any;
424
- }
425
- ): Promise<LLMResponse> {
426
- const isStreaming = !!options?.onStream;
427
-
428
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
429
- method: 'POST',
430
- headers: {
431
- 'Content-Type': 'application/json',
432
- Authorization: `Bearer ${this.apiKey}`,
433
- },
434
- body: JSON.stringify({
435
- model: options?.model || 'gpt-4o',
436
- messages,
437
- tools: options?.tools,
438
- stream: isStreaming,
439
- response_format: options?.responseSchema
440
- ? {
441
- type: 'json_schema',
442
- json_schema: {
443
- name: 'output',
444
- strict: true,
445
- schema: options.responseSchema,
446
- },
447
- }
448
- : undefined,
449
- }),
450
- signal: options?.signal,
451
- });
452
-
453
- if (!response.ok) {
454
- const error = await response.text();
455
- throw new Error(`OpenAI API error: ${response.status} ${response.statusText} - ${error}`);
456
- }
457
-
458
- if (isStreaming) {
459
- if (!response.body) throw new Error('Response body is null');
460
- return processOpenAIStream(response, options, 'OpenAI');
461
- }
122
+ // --- Dynamic Provider Registry ---
462
123
 
463
- const data = (await response.json()) as {
464
- choices: { message: LLMMessage }[];
465
- usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
466
- };
124
+ export class DynamicProviderRegistry {
125
+ private static loadedProviders = new Map<string, ProviderFactory | ProviderInstance>();
467
126
 
468
- // Validate response size to prevent memory exhaustion
469
- const contentLength = data.choices[0]?.message?.content?.length ?? 0;
470
- if (contentLength > MAX_RESPONSE_SIZE) {
471
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
127
+ static async getProvider(
128
+ providerName: string,
129
+ config: Config['providers'][string]
130
+ ): Promise<ProviderFactory | ProviderInstance> {
131
+ if (DynamicProviderRegistry.loadedProviders.has(providerName)) {
132
+ return DynamicProviderRegistry.loadedProviders.get(providerName) as
133
+ | ProviderFactory
134
+ | ProviderInstance;
472
135
  }
473
136
 
474
- return {
475
- message: data.choices[0].message,
476
- usage: data.usage,
477
- };
478
- }
479
-
480
- async embed(
481
- text: string,
482
- model = 'text-embedding-3-small',
483
- options?: { signal?: AbortSignal }
484
- ): Promise<number[]> {
485
- const response = await fetch(`${this.baseUrl}/embeddings`, {
486
- method: 'POST',
487
- headers: {
488
- 'Content-Type': 'application/json',
489
- Authorization: `Bearer ${this.apiKey}`,
490
- },
491
- body: JSON.stringify({
492
- model,
493
- input: text,
494
- }),
495
- signal: options?.signal,
496
- });
497
-
498
- if (!response.ok) {
499
- const error = await response.text();
500
- throw new Error(
501
- `OpenAI Embeddings API error: ${response.status} ${response.statusText} - ${error}`
502
- );
503
- }
504
-
505
- const data = (await response.json()) as {
506
- data: { embedding: number[] }[];
507
- };
508
- return data.data[0].embedding;
509
- }
510
- }
511
-
512
- export class AnthropicAdapter implements LLMAdapter {
513
- private apiKey: string;
514
- private baseUrl: string;
515
- private authMode: 'api-key' | 'oauth';
516
-
517
- constructor(apiKey?: string, baseUrl?: string, authMode: 'api-key' | 'oauth' = 'api-key') {
518
- this.apiKey = apiKey || ConfigLoader.getSecret('ANTHROPIC_API_KEY') || '';
519
- this.baseUrl =
520
- baseUrl || ConfigLoader.getSecret('ANTHROPIC_BASE_URL') || 'https://api.anthropic.com/v1';
521
- this.authMode = authMode;
522
-
523
- if (
524
- this.authMode === 'api-key' &&
525
- !this.apiKey &&
526
- this.baseUrl === 'https://api.anthropic.com/v1'
527
- ) {
528
- defaultLogger.warn('Warning: ANTHROPIC_API_KEY is not set.');
529
- }
530
- }
531
-
532
- private async getAuthHeaders(): Promise<Record<string, string>> {
533
- if (this.authMode === 'oauth') {
534
- const token = await AuthManager.getAnthropicClaudeToken();
535
- if (!token) {
137
+ // 1. Custom Script
138
+ if (config.script) {
139
+ const scriptPath = join(process.cwd(), config.script);
140
+ try {
141
+ const module = await import(scriptPath);
142
+ if (!module.default) {
143
+ throw new Error(`Custom provider script '${scriptPath}' must export a default function.`);
144
+ }
145
+ DynamicProviderRegistry.loadedProviders.set(providerName, module.default);
146
+ return module.default;
147
+ } catch (err) {
536
148
  throw new Error(
537
- 'Anthropic Claude authentication not found. Please run "keystone auth login anthropic-claude" first.'
149
+ `Failed to load custom provider script '${scriptPath}': ${err instanceof Error ? err.message : String(err)}`
538
150
  );
539
151
  }
540
- return {
541
- Authorization: `Bearer ${token}`,
542
- 'anthropic-beta': ANTHROPIC_OAUTH_BETAS,
543
- };
544
- }
545
-
546
- return {
547
- 'x-api-key': this.apiKey,
548
- };
549
- }
550
-
551
- async chat(
552
- messages: LLMMessage[],
553
- options?: {
554
- model?: string;
555
- tools?: LLMTool[];
556
- onStream?: (chunk: string) => void;
557
- signal?: AbortSignal;
558
- responseSchema?: any;
559
152
  }
560
- ): Promise<LLMResponse> {
561
- const isStreaming = !!options?.onStream;
562
- const system = messages.find((m) => m.role === 'system')?.content || undefined;
563
153
 
564
- // Anthropic requires alternating user/assistant roles.
565
- // Sequential tool results must be grouped into a single user message.
566
- const anthropicMessages: Array<{
567
- role: 'user' | 'assistant';
568
- content: string | Array<Record<string, unknown>>;
569
- }> = [];
570
-
571
- for (const m of messages) {
572
- if (m.role === 'system') continue;
573
-
574
- if (m.role === 'tool') {
575
- const lastMsg = anthropicMessages[anthropicMessages.length - 1];
576
- const toolResult = {
577
- type: 'tool_result' as const,
578
- tool_use_id: m.tool_call_id,
579
- content: m.content,
580
- };
581
-
582
- if (lastMsg && lastMsg.role === 'user' && Array.isArray(lastMsg.content)) {
583
- // Append to existing tool result block if previous message was also a tool result
584
- lastMsg.content.push(toolResult);
585
- } else {
586
- // Start a new user message for tool results
587
- anthropicMessages.push({
588
- role: 'user',
589
- content: [toolResult],
590
- });
591
- }
592
- } else if (m.tool_calls) {
593
- anthropicMessages.push({
594
- role: 'assistant',
595
- content: [
596
- ...(m.content ? [{ type: 'text' as const, text: m.content }] : []),
597
- ...m.tool_calls.map((tc) => {
598
- let input = {};
154
+ // 2. Package
155
+ if (config.package) {
156
+ try {
157
+ let pkg: any;
158
+ try {
159
+ // Try local project first
160
+ pkg = await import(config.package);
161
+ } catch {
162
+ try {
163
+ const pkgPath = userRequire.resolve(config.package);
164
+ pkg = await import(pkgPath);
165
+ } catch {
166
+ // Try global if local resolution fails
167
+ const globalReq = getGlobalRequire();
168
+ if (globalReq) {
599
169
  try {
600
- input =
601
- typeof tc.function.arguments === 'string'
602
- ? JSON.parse(tc.function.arguments)
603
- : tc.function.arguments;
604
- } catch (e) {
605
- defaultLogger.error(`Failed to parse tool arguments: ${tc.function.arguments}`);
170
+ const globalPkgPath = globalReq.resolve(config.package);
171
+ pkg = await import(globalPkgPath);
172
+ } catch {
173
+ throw new Error(
174
+ `Failed to resolve package '${config.package}' locally or globally.`
175
+ );
606
176
  }
607
- return {
608
- type: 'tool_use' as const,
609
- id: tc.id,
610
- name: tc.function.name,
611
- input,
612
- };
613
- }),
614
- ],
615
- });
616
- } else {
617
- const role = m.role as 'user' | 'assistant';
618
- const lastMsg = anthropicMessages[anthropicMessages.length - 1];
619
-
620
- if (
621
- lastMsg &&
622
- lastMsg.role === role &&
623
- typeof lastMsg.content === 'string' &&
624
- typeof m.content === 'string'
625
- ) {
626
- lastMsg.content += `\n\n${m.content}`;
627
- } else {
628
- anthropicMessages.push({
629
- role,
630
- content: m.content || '',
631
- });
177
+ } else {
178
+ throw new Error(`Failed to resolve package '${config.package}' locally.`);
179
+ }
180
+ }
632
181
  }
633
- }
634
- }
635
-
636
- const anthropicTools = options?.tools
637
- ? options.tools.map((t) => ({
638
- name: t.function.name,
639
- description: t.function.description,
640
- input_schema: t.function.parameters,
641
- }))
642
- : undefined;
643
182
 
644
- // If responseSchema is provided, Anthropic requires using tool call to force output
645
- const responseTool = options?.responseSchema
646
- ? {
647
- name: 'record_output',
648
- description: 'Record the structured output matching the requested schema',
649
- input_schema: options.responseSchema,
183
+ // If a specific factory is configured, try to use it first
184
+ const factoryKey = config.factory || config.type || 'default';
185
+ if (pkg[factoryKey] && typeof pkg[factoryKey] === 'function') {
186
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg[factoryKey]);
187
+ return pkg[factoryKey];
650
188
  }
651
- : undefined;
652
-
653
- const combinedTools = [...(anthropicTools || []), ...(responseTool ? [responseTool] : [])];
654
-
655
- const authHeaders = await this.getAuthHeaders();
656
- const response = await fetch(`${this.baseUrl}/messages`, {
657
- method: 'POST',
658
- headers: {
659
- 'Content-Type': 'application/json',
660
- ...authHeaders,
661
- 'anthropic-version': '2023-06-01',
662
- },
663
- body: JSON.stringify({
664
- model: options?.model || 'claude-3-5-sonnet-20240620',
665
- system,
666
- messages: anthropicMessages,
667
- tools: combinedTools.length > 0 ? combinedTools : undefined,
668
- tool_choice: responseTool ? { type: 'tool', name: 'record_output' } : undefined,
669
- max_tokens: 4096,
670
- stream: isStreaming,
671
- }),
672
- signal: options?.signal,
673
- });
674
-
675
- if (!response.ok) {
676
- const error = await response.text();
677
- throw new Error(`Anthropic API error: ${response.status} ${response.statusText} - ${error}`);
678
- }
679
-
680
- if (isStreaming) {
681
- if (!response.body) throw new Error('Response body is null');
682
- const reader = response.body.getReader();
683
- const decoder = new TextDecoder();
684
- let fullContent = '';
685
- // Track tool calls by content block index for robust correlation
686
- const toolCallsMap = new Map<number, { id: string; name: string; inputString: string }>();
687
- const usage = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
688
-
689
- while (true) {
690
- const { done, value } = await reader.read();
691
- if (done) break;
692
-
693
- const chunk = decoder.decode(value);
694
- const lines = chunk.split('\n').filter((line) => line.trim() !== '');
695
-
696
- for (const line of lines) {
697
- if (!line.startsWith('data: ')) continue;
698
189
 
699
- try {
700
- const data = JSON.parse(line.slice(6));
701
- if (data.type === 'content_block_delta' && data.delta?.text) {
702
- if (fullContent.length + data.delta.text.length > MAX_RESPONSE_SIZE) {
703
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
704
- }
705
- fullContent += data.delta.text;
706
- options.onStream?.(data.delta.text);
707
- }
708
-
709
- // Track tool calls by their index in the content blocks
710
- if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
711
- const index = data.index ?? toolCallsMap.size;
712
- toolCallsMap.set(index, {
713
- id: data.content_block.id || '',
714
- name: data.content_block.name || '',
715
- inputString: '',
716
- });
717
- }
718
-
719
- // Handle tool input streaming - Anthropic uses content_block_delta with input_json_delta
720
- if (
721
- data.type === 'content_block_delta' &&
722
- data.delta?.type === 'input_json_delta' &&
723
- data.delta?.partial_json
724
- ) {
725
- const index = data.index;
726
- const toolCall = toolCallsMap.get(index);
727
- if (toolCall) {
728
- toolCall.inputString += data.delta.partial_json;
729
- }
730
- }
731
-
732
- // Update tool call ID if it arrives later (some edge cases)
733
- if (data.type === 'content_block_delta' && data.content_block?.id) {
734
- const index = data.index;
735
- const toolCall = toolCallsMap.get(index);
736
- if (toolCall && !toolCall.id) {
737
- toolCall.id = data.content_block.id;
738
- }
739
- }
740
-
741
- if (data.type === 'message_start' && data.message?.usage) {
742
- usage.prompt_tokens += data.message.usage.input_tokens || 0;
743
- }
744
- if (data.type === 'message_delta' && data.usage) {
745
- usage.completion_tokens += data.usage.output_tokens || 0;
746
- }
747
- usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
748
- } catch (e) {
749
- // Log non-SyntaxError exceptions at warning level (they indicate real issues)
750
- if (!(e instanceof SyntaxError)) {
751
- defaultLogger.warn(`[Anthropic Stream] Error processing chunk: ${e}`);
752
- } else if (process.env.DEBUG || process.env.LLM_DEBUG) {
753
- // SyntaxErrors are normal for incomplete chunks - only log in debug mode
754
- process.stderr.write(
755
- `[Anthropic Stream] Incomplete chunk parse: ${line.slice(0, 50)}...\n`
756
- );
190
+ // Discovery fallback: Search for common factory patterns case-insensitively
191
+ const searchTerms = [
192
+ `create${providerName.replace(/[-_]/g, '')}provider`,
193
+ `create${providerName.split(/[-_]/)[0]}provider`,
194
+ providerName.replace(/[-_]/g, ''),
195
+ providerName.split(/[-_]/)[0],
196
+ ];
197
+
198
+ const allKeys = Object.keys(pkg);
199
+ for (const key of allKeys) {
200
+ const lowerKey = key.toLowerCase();
201
+ if (
202
+ searchTerms.some(
203
+ (term) =>
204
+ lowerKey === term ||
205
+ lowerKey === `create${term}provider` ||
206
+ lowerKey.includes(`${term}provider`)
207
+ )
208
+ ) {
209
+ if (typeof pkg[key] === 'function') {
210
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg[key]);
211
+ return pkg[key];
757
212
  }
758
213
  }
759
214
  }
760
- }
761
-
762
- // Convert map to array and filter out incomplete tool calls
763
- const toolCalls = Array.from(toolCallsMap.values())
764
- .filter((tc) => tc.id && tc.name) // Only include complete tool calls
765
- .map((tc) => ({
766
- id: tc.id,
767
- type: 'function' as const,
768
- function: { name: tc.name, arguments: tc.inputString },
769
- }));
770
215
 
771
- return {
772
- message: {
773
- role: 'assistant',
774
- content: fullContent || null,
775
- tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
776
- },
777
- usage: usage.total_tokens > 0 ? usage : undefined,
778
- };
779
- }
780
-
781
- const data = (await response.json()) as {
782
- content: {
783
- type: 'text' | 'tool_use';
784
- text?: string;
785
- id?: string;
786
- name?: string;
787
- input?: Record<string, unknown>;
788
- }[];
789
- usage: { input_tokens: number; output_tokens: number };
790
- };
216
+ if (pkg.default && typeof pkg.default === 'function') {
217
+ DynamicProviderRegistry.loadedProviders.set(providerName, pkg.default);
218
+ return pkg.default;
219
+ }
791
220
 
792
- const textBlocks = data.content.filter((c) => c.type === 'text');
793
- const thinkingBlocks = data.content.filter((c) => c.type === ('thinking' as any));
221
+ const firstFn = Object.values(pkg).find((v) => typeof v === 'function');
222
+ if (firstFn) {
223
+ DynamicProviderRegistry.loadedProviders.set(providerName, firstFn as any);
224
+ return firstFn as any;
225
+ }
794
226
 
795
- let content =
796
- textBlocks
797
- .map((tb) => tb.text)
798
- .filter(Boolean)
799
- .join('\n') || null;
800
- if (thinkingBlocks.length > 0) {
801
- const thoughts = thinkingBlocks
802
- .map((tb) => (tb as any).thinking)
803
- .filter(Boolean)
804
- .join('\n');
805
- if (thoughts) {
806
- content = `<thinking>\n${thoughts}\n</thinking>${content ? `\n\n${content}` : ''}`;
227
+ throw new Error(
228
+ `Could not find a valid factory function in package '${config.package}'. Available keys: ${allKeys.join(', ')}`
229
+ );
230
+ } catch (err) {
231
+ throw new Error(
232
+ `Failed to load provider package '${config.package}': ${err instanceof Error ? err.message : String(err)}. Please run 'npm install -g ${config.package}' or 'npm install ${config.package}'.`
233
+ );
807
234
  }
808
235
  }
809
236
 
810
- // Validate response size to prevent memory exhaustion
811
- if (content && content.length > MAX_RESPONSE_SIZE) {
812
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
813
- }
814
-
815
- const toolCalls = data.content
816
- .filter((c) => c.type === 'tool_use')
817
- .map((c) => ({
818
- id: c.id as string,
819
- type: 'function' as const,
820
- function: {
821
- name: c.name as string,
822
- arguments: JSON.stringify(c.input),
823
- },
824
- }));
825
-
826
- return {
827
- message: {
828
- role: 'assistant',
829
- content,
830
- tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
831
- },
832
- usage: {
833
- prompt_tokens: data.usage.input_tokens,
834
- completion_tokens: data.usage.output_tokens,
835
- total_tokens: data.usage.input_tokens + data.usage.output_tokens,
836
- },
837
- };
237
+ throw new Error(
238
+ `Provider '${providerName}' must have a 'package' or 'script' configured in your keystone settings.`
239
+ );
838
240
  }
839
241
  }
840
242
 
841
- export class AnthropicClaudeAdapter extends AnthropicAdapter {
842
- constructor(baseUrl?: string) {
843
- super(undefined, baseUrl, 'oauth');
844
- }
243
+ export function resetProviderRegistry(): void {
244
+ // @ts-ignore: private static property access for test cleanup
245
+ DynamicProviderRegistry.loadedProviders.clear();
845
246
  }
846
247
 
847
- export class OpenAIChatGPTAdapter implements LLMAdapter {
848
- private baseUrl: string;
248
+ async function prepareProvider(
249
+ model: string
250
+ ): Promise<{ provider: ProviderInstance; resolvedModel: string }> {
251
+ const providerName = ConfigLoader.getProviderForModel(model);
252
+ const config = ConfigLoader.load();
253
+ const providerConfig = config.providers[providerName];
849
254
 
850
- constructor(baseUrl?: string) {
851
- this.baseUrl =
852
- baseUrl || ConfigLoader.getSecret('OPENAI_CHATGPT_BASE_URL') || 'https://api.openai.com/v1';
255
+ if (!providerConfig) {
256
+ throw new Error(
257
+ `Provider configuration not found for: ${providerName}. Ensure it is defined in your keystone configuration.`
258
+ );
853
259
  }
854
260
 
855
- private filterMessages(messages: LLMMessage[], model: string): ChatGPTMessage[] {
856
- // Stateless mode requires stripping all IDs and filtering out item_references
857
- const normalizedModel = this.normalizeModel(model);
858
- return messages.map((m): ChatGPTMessage => {
859
- // Create a shallow copy and remove id if it exists
860
- const { id: _id, ...rest } = m as LLMMessageWithId;
261
+ // Pure BYOP: Load provider factory from user configuration
262
+ const providerFactory = await DynamicProviderRegistry.getProvider(providerName, providerConfig);
861
263
 
862
- if (m.tool_calls) {
863
- const toolCalls = m.tool_calls.map((tc) => {
864
- const { id: _toolCallId, ...tcRest } = tc;
865
- return tcRest;
866
- });
867
- return {
868
- ...rest,
869
- tool_calls: toolCalls,
870
- };
871
- }
264
+ // Initialize provider with AuthManager secrets
265
+ const options: Record<string, unknown> = {};
872
266
 
873
- if (
874
- m.role === 'system' &&
875
- (normalizedModel === 'gpt-4o' || normalizedModel.startsWith('o1-'))
876
- ) {
877
- return { ...rest, role: 'developer' as any };
878
- }
879
-
880
- return rest;
881
- });
267
+ if (providerConfig.base_url) {
268
+ options.baseURL = providerConfig.base_url;
882
269
  }
883
270
 
884
- private normalizeModel(model: string): string {
885
- // Map Keystone model names to Codex API expected names
886
- if (model.includes('gpt-5')) return 'gpt-5-codex';
887
- if (model.includes('gpt-4o-codex')) return 'gpt-4o';
888
- return model;
271
+ // Fallback to env var lookup via ConfigLoader if not found above
272
+ if (!options.apiKey && providerConfig.api_key_env) {
273
+ options.apiKey = ConfigLoader.getSecret(providerConfig.api_key_env);
889
274
  }
890
275
 
891
- async chat(
892
- messages: LLMMessage[],
893
- options?: {
894
- model?: string;
895
- tools?: LLMTool[];
896
- onStream?: (chunk: string) => void;
897
- signal?: AbortSignal;
898
- responseSchema?: any;
899
- }
900
- ): Promise<LLMResponse> {
901
- const isStreaming = !!options?.onStream;
902
- const token = await AuthManager.getOpenAIChatGPTToken();
903
- if (!token) {
904
- throw new Error(
905
- 'OpenAI ChatGPT authentication not found. Please run "keystone auth login openai-chatgpt" first.'
906
- );
907
- }
908
-
909
- const filteredMessages = this.filterMessages(messages, options?.model || 'gpt-5-codex');
910
- const resolvedModel = this.normalizeModel(options?.model || 'gpt-5-codex');
911
-
912
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
913
- method: 'POST',
914
- headers: {
915
- 'Content-Type': 'application/json',
916
- Authorization: `Bearer ${token}`,
917
- 'OpenAI-Organization': '', // Ensure clear org context
918
- },
919
- body: JSON.stringify({
920
- model: resolvedModel,
921
- messages: filteredMessages,
922
- tools: options?.tools,
923
- stream: isStreaming,
924
- // Critical for ChatGPT Plus/Pro backend compatibility
925
- store: false,
926
- include: ['reasoning.encrypted_content'],
927
- response_format: options?.responseSchema
928
- ? {
929
- type: 'json_schema',
930
- json_schema: {
931
- name: 'output',
932
- strict: true,
933
- schema: options.responseSchema,
934
- },
935
- }
936
- : undefined,
937
- }),
938
- signal: options?.signal,
939
- });
940
-
941
- if (!response.ok) {
942
- const error = await response.text();
943
- // Handle usage limit messages gracefully
944
- if (response.status === 429 && error.includes('limit')) {
945
- throw new Error(
946
- 'ChatGPT subscription limit reached. Please wait and try again or switch to another provider.'
947
- );
948
- }
949
- throw new Error(
950
- `OpenAI ChatGPT API error: ${response.status} ${response.statusText} - ${error}`
951
- );
952
- }
953
-
954
- if (isStreaming) {
955
- if (!response.body) throw new Error('Response body is null');
956
- return processOpenAIStream(response, options, 'OpenAIChatGPT');
957
- }
958
-
959
- const data = (await response.json()) as {
960
- choices: { message: LLMMessage }[];
961
- usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
962
- };
276
+ // Create the provider instance
277
+ const provider =
278
+ typeof providerFactory === 'function'
279
+ ? (providerFactory as ProviderFactory)(options)
280
+ : providerFactory;
963
281
 
964
- return {
965
- message: data.choices[0].message,
966
- usage: data.usage,
967
- };
968
- }
969
- }
970
-
971
- export class GoogleGeminiAdapter implements LLMAdapter {
972
- private baseUrl: string;
973
- private projectId?: string;
974
-
975
- constructor(baseUrl?: string, projectId?: string) {
976
- this.baseUrl = (baseUrl || Bun.env.GOOGLE_GEMINI_BASE_URL || GEMINI_DEFAULT_BASE_URL).replace(
977
- /\/$/,
978
- ''
282
+ if (!provider) {
283
+ throw new Error(
284
+ `Provider factory for '${providerName}' returned undefined. Check your provider implementation.`
979
285
  );
980
- this.projectId =
981
- projectId || Bun.env.GOOGLE_GEMINI_PROJECT_ID || Bun.env.KEYSTONE_GEMINI_PROJECT_ID;
982
- }
983
-
984
- private sanitizeToolName(name: string, index: number, used: Set<string>): string {
985
- let sanitized = name.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64);
986
- if (!sanitized) {
987
- sanitized = `tool_${index}`;
988
- }
989
- while (used.has(sanitized)) {
990
- sanitized = `${sanitized}_${index}`.slice(0, 64);
991
- }
992
- used.add(sanitized);
993
- return sanitized;
994
- }
995
-
996
- private buildToolMaps(tools?: LLMTool[]): {
997
- nameToSanitized: Map<string, string>;
998
- sanitizedToName: Map<string, string>;
999
- tools?: {
1000
- functionDeclarations: Array<{ name: string; description: string; parameters: unknown }>;
1001
- }[];
1002
- toolConfig?: { functionCallingConfig: { mode: 'AUTO' } };
1003
- } {
1004
- const nameToSanitized = new Map<string, string>();
1005
- const sanitizedToName = new Map<string, string>();
1006
-
1007
- if (!tools || tools.length === 0) {
1008
- return { nameToSanitized, sanitizedToName };
1009
- }
1010
-
1011
- const usedNames = new Set<string>();
1012
- const functionDeclarations = tools.map((tool, index) => {
1013
- const originalName = tool.function.name;
1014
- const sanitized = this.sanitizeToolName(originalName, index, usedNames);
1015
- nameToSanitized.set(originalName, sanitized);
1016
- sanitizedToName.set(sanitized, originalName);
1017
- return {
1018
- name: sanitized,
1019
- description: tool.function.description ?? '',
1020
- parameters: tool.function.parameters ?? { type: 'object', properties: {} },
1021
- };
1022
- });
1023
-
1024
- return {
1025
- nameToSanitized,
1026
- sanitizedToName,
1027
- tools: [{ functionDeclarations }],
1028
- toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1029
- };
1030
286
  }
1031
287
 
1032
- private parseToolResponse(content: string | null): Record<string, unknown> {
1033
- if (!content) return {};
1034
- try {
1035
- const parsed = JSON.parse(content);
1036
- if (parsed && typeof parsed === 'object') {
1037
- return parsed as Record<string, unknown>;
1038
- }
1039
- return { content: parsed };
1040
- } catch {
1041
- return { content };
288
+ // Resolve model name (strip prefix if typical "provider:model" format)
289
+ let resolvedModel = model;
290
+ if (model.includes(':')) {
291
+ const [prefix, ...rest] = model.split(':');
292
+ if (config.providers[prefix]) {
293
+ resolvedModel = rest.join(':');
1042
294
  }
1043
295
  }
1044
296
 
1045
- private buildContents(
1046
- messages: LLMMessage[],
1047
- nameToSanitized: Map<string, string>
1048
- ): { contents: GeminiContent[]; systemInstruction?: GeminiSystemInstruction } {
1049
- const contents: GeminiContent[] = [];
1050
- const systemParts: string[] = [];
1051
-
1052
- for (const message of messages) {
1053
- if (message.role === 'system') {
1054
- if (message.content) systemParts.push(message.content);
1055
- continue;
1056
- }
1057
-
1058
- const role: GeminiContent['role'] = message.role === 'assistant' ? 'model' : 'user';
1059
- const parts: GeminiPart[] = [];
1060
-
1061
- if (message.role === 'tool') {
1062
- const toolName = message.name
1063
- ? nameToSanitized.get(message.name) || message.name
1064
- : undefined;
1065
- if (toolName) {
1066
- parts.push({
1067
- functionResponse: {
1068
- name: toolName,
1069
- response: this.parseToolResponse(message.content),
1070
- },
1071
- });
1072
- } else if (message.content) {
1073
- parts.push({ text: message.content });
1074
- }
1075
- } else {
1076
- if (message.content) {
1077
- parts.push({ text: message.content });
1078
- }
1079
-
1080
- if (message.tool_calls) {
1081
- for (const toolCall of message.tool_calls) {
1082
- const toolName = nameToSanitized.get(toolCall.function.name) || toolCall.function.name;
1083
- let args: Record<string, unknown> | string = {};
1084
- if (typeof toolCall.function.arguments === 'string') {
1085
- try {
1086
- args = JSON.parse(toolCall.function.arguments) as Record<string, unknown>;
1087
- } catch {
1088
- args = toolCall.function.arguments;
1089
- }
1090
- } else {
1091
- args = toolCall.function.arguments as unknown as Record<string, unknown>;
1092
- }
1093
- parts.push({
1094
- functionCall: {
1095
- name: toolName,
1096
- args,
1097
- },
1098
- });
1099
- }
1100
- }
1101
- }
1102
-
1103
- if (parts.length > 0) {
1104
- contents.push({ role, parts });
1105
- }
1106
- }
297
+ return { provider, resolvedModel };
298
+ }
1107
299
 
1108
- const systemInstruction =
1109
- systemParts.length > 0
1110
- ? {
1111
- parts: [{ text: systemParts.join('\n\n') }],
1112
- }
1113
- : undefined;
300
+ export async function getModel(model: string): Promise<LanguageModel> {
301
+ const { provider, resolvedModel } = await prepareProvider(model);
1114
302
 
1115
- return { contents, systemInstruction };
303
+ // AI SDK convention: provider(modelId)
304
+ if (typeof provider === 'function') {
305
+ return (provider as any)(resolvedModel);
1116
306
  }
1117
307
 
1118
- private buildEndpoint(isStreaming: boolean): string {
1119
- const action = isStreaming ? 'streamGenerateContent' : 'generateContent';
1120
- const suffix = isStreaming ? '?alt=sse' : '';
1121
- return `${this.baseUrl}/v1internal:${action}${suffix}`;
308
+ // Fallback for objects that aren't callable but have standard methods
309
+ if (typeof (provider as any).languageModel === 'function') {
310
+ return (provider as any).languageModel(resolvedModel);
1122
311
  }
1123
-
1124
- private buildUsage(usage?: {
1125
- promptTokenCount?: number;
1126
- candidatesTokenCount?: number;
1127
- totalTokenCount?: number;
1128
- }): LLMResponse['usage'] | undefined {
1129
- if (!usage) return undefined;
1130
- const promptTokens = usage.promptTokenCount ?? 0;
1131
- const completionTokens = usage.candidatesTokenCount ?? 0;
1132
- const totalTokens = usage.totalTokenCount ?? promptTokens + completionTokens;
1133
- return {
1134
- prompt_tokens: promptTokens,
1135
- completion_tokens: completionTokens,
1136
- total_tokens: totalTokens,
1137
- };
312
+ if (typeof (provider as any).chatModel === 'function') {
313
+ return (provider as any).chatModel(resolvedModel);
1138
314
  }
1139
315
 
1140
- private extractGeminiParts(
1141
- data: {
1142
- candidates?: Array<{ content?: { parts?: GeminiPart[] } }>;
1143
- usageMetadata?: {
1144
- promptTokenCount?: number;
1145
- candidatesTokenCount?: number;
1146
- totalTokenCount?: number;
1147
- };
1148
- },
1149
- sanitizedToName: Map<string, string>,
1150
- onStream?: (chunk: string) => void,
1151
- toolCalls?: LLMToolCall[]
1152
- ): { content: string; usage?: LLMResponse['usage'] } {
1153
- let content = '';
1154
- if (Array.isArray(data.candidates)) {
1155
- const candidate = data.candidates[0];
1156
- const parts = candidate?.content?.parts || [];
1157
- for (const part of parts) {
1158
- if (part.text) {
1159
- if (content.length + part.text.length > MAX_RESPONSE_SIZE) {
1160
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
1161
- }
1162
- content += part.text;
1163
- onStream?.(part.text);
1164
- }
1165
- if (part.functionCall && toolCalls) {
1166
- const originalName =
1167
- sanitizedToName.get(part.functionCall.name) || part.functionCall.name;
1168
- const args = part.functionCall.args ?? {};
1169
- const argsString = typeof args === 'string' ? args : JSON.stringify(args);
1170
- toolCalls.push({
1171
- id: `gemini_tool_${toolCalls.length + 1}`,
1172
- type: 'function',
1173
- function: {
1174
- name: originalName,
1175
- arguments: argsString,
1176
- },
1177
- });
1178
- }
1179
- }
1180
- }
1181
-
1182
- return { content, usage: this.buildUsage(data.usageMetadata) };
1183
- }
1184
-
1185
- async chat(
1186
- messages: LLMMessage[],
1187
- options?: {
1188
- model?: string;
1189
- tools?: LLMTool[];
1190
- onStream?: (chunk: string) => void;
1191
- signal?: AbortSignal;
1192
- responseSchema?: any;
1193
- }
1194
- ): Promise<LLMResponse> {
1195
- const isStreaming = !!options?.onStream;
1196
- const token = await AuthManager.getGoogleGeminiToken();
1197
- if (!token) {
1198
- throw new Error(
1199
- 'Google Gemini authentication not found. Please run "keystone auth login gemini" first.'
1200
- );
1201
- }
1202
-
1203
- const { nameToSanitized, sanitizedToName, tools, toolConfig } = this.buildToolMaps(
1204
- options?.tools
1205
- );
1206
- const { contents, systemInstruction } = this.buildContents(messages, nameToSanitized);
1207
-
1208
- const requestPayload: Record<string, unknown> = {
1209
- contents,
1210
- sessionId: randomUUID(),
1211
- };
1212
- if (systemInstruction) requestPayload.systemInstruction = systemInstruction;
1213
- if (tools) requestPayload.tools = tools;
1214
- if (toolConfig) requestPayload.toolConfig = toolConfig;
1215
-
1216
- if (options?.responseSchema) {
1217
- requestPayload.generationConfig = {
1218
- responseMimeType: 'application/json',
1219
- responseSchema: options.responseSchema,
1220
- };
1221
- }
1222
-
1223
- const authProjectId = this.projectId ? undefined : AuthManager.load().google_gemini?.project_id;
1224
- const resolvedProjectId = this.projectId || authProjectId || GEMINI_DEFAULT_PROJECT_ID;
1225
-
1226
- const wrappedBody = {
1227
- project: resolvedProjectId,
1228
- model: options?.model || 'gemini-3-pro-high',
1229
- request: requestPayload,
1230
- userAgent: 'antigravity',
1231
- requestId: `keystone-${randomUUID()}`,
1232
- };
1233
-
1234
- const response = await fetch(this.buildEndpoint(isStreaming), {
1235
- method: 'POST',
1236
- headers: {
1237
- 'Content-Type': 'application/json',
1238
- Authorization: `Bearer ${token}`,
1239
- ...GEMINI_HEADERS,
1240
- ...(isStreaming ? { Accept: 'text/event-stream' } : {}),
1241
- },
1242
- body: JSON.stringify(wrappedBody),
1243
- signal: options?.signal,
1244
- });
1245
-
1246
- if (!response.ok) {
1247
- const error = await response.text();
1248
- throw new Error(
1249
- `Google Gemini API error: ${response.status} ${response.statusText} - ${error}`
1250
- );
1251
- }
1252
-
1253
- if (isStreaming) {
1254
- if (!response.body) throw new Error('Response body is null');
1255
- const reader = response.body.getReader();
1256
- const decoder = new TextDecoder();
1257
- let buffer = '';
1258
- let fullContent = '';
1259
- const toolCalls: LLMToolCall[] = [];
1260
- let usage: LLMResponse['usage'] | undefined;
1261
-
1262
- while (true) {
1263
- const { done, value } = await reader.read();
1264
- if (done) break;
1265
-
1266
- buffer += decoder.decode(value, { stream: true });
1267
- const lines = buffer.split('\n');
1268
- buffer = lines.pop() || '';
1269
-
1270
- for (const line of lines) {
1271
- const trimmed = line.trim();
1272
- if (!trimmed.startsWith('data:')) continue;
1273
- const payload = trimmed.slice(5).trim();
1274
- if (!payload || payload === '[DONE]') continue;
1275
-
1276
- try {
1277
- const data = JSON.parse(payload) as {
1278
- candidates?: Array<{ content?: { parts?: GeminiPart[] } }>;
1279
- usageMetadata?: {
1280
- promptTokenCount?: number;
1281
- candidatesTokenCount?: number;
1282
- totalTokenCount?: number;
1283
- };
1284
- };
1285
- const result = this.extractGeminiParts(
1286
- data,
1287
- sanitizedToName,
1288
- options?.onStream,
1289
- toolCalls
1290
- );
1291
- if (result.content) {
1292
- if (fullContent.length + result.content.length > MAX_RESPONSE_SIZE) {
1293
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
1294
- }
1295
- fullContent += result.content;
1296
- }
1297
- if (result.usage) {
1298
- usage = result.usage;
1299
- }
1300
- } catch (e) {
1301
- if (e instanceof Error && e.message.includes('LLM response exceeds')) {
1302
- throw e;
1303
- }
1304
- if (process.env.DEBUG || process.env.LLM_DEBUG) {
1305
- process.stderr.write(`[Gemini Stream] Failed to parse chunk: ${payload}\n`);
1306
- }
1307
- }
1308
- }
1309
- }
1310
-
1311
- const finalToolCalls = toolCalls.length > 0 ? toolCalls : undefined;
1312
- return {
1313
- message: {
1314
- role: 'assistant',
1315
- content: fullContent || null,
1316
- tool_calls: finalToolCalls,
1317
- },
1318
- usage,
1319
- };
1320
- }
1321
-
1322
- const data = (await response.json()) as {
1323
- candidates?: Array<{ content?: { parts?: GeminiPart[] } }>;
1324
- usageMetadata?: {
1325
- promptTokenCount?: number;
1326
- candidatesTokenCount?: number;
1327
- totalTokenCount?: number;
1328
- };
1329
- };
1330
-
1331
- const toolCalls: LLMToolCall[] = [];
1332
- const extracted = this.extractGeminiParts(data, sanitizedToName, undefined, toolCalls);
1333
- const content = extracted.content || null;
1334
-
1335
- return {
1336
- message: {
1337
- role: 'assistant',
1338
- content,
1339
- tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
1340
- },
1341
- usage: extracted.usage,
1342
- };
1343
- }
316
+ const keys = Object.keys(provider as any);
317
+ const type = typeof provider;
318
+ throw new Error(
319
+ `Provider for model '${model}' is not a function (type: ${type}) and has no .languageModel() method. Available keys: ${keys.join(', ')}`
320
+ );
1344
321
  }
1345
322
 
1346
- export class CopilotAdapter implements LLMAdapter {
1347
- private baseUrl: string;
1348
-
1349
- constructor(baseUrl?: string) {
1350
- this.baseUrl = baseUrl || 'https://api.githubcopilot.com';
323
+ export async function getEmbeddingModel(model: string): Promise<EmbeddingModel> {
324
+ // 1. Check for local fallback
325
+ if (model === 'local' || model === 'keystone-local') {
326
+ return new LocalEmbeddingModel();
1351
327
  }
1352
328
 
1353
- async chat(
1354
- messages: LLMMessage[],
1355
- options?: {
1356
- model?: string;
1357
- tools?: LLMTool[];
1358
- onStream?: (chunk: string) => void;
1359
- signal?: AbortSignal;
1360
- responseSchema?: any;
1361
- }
1362
- ): Promise<LLMResponse> {
1363
- const isStreaming = !!options?.onStream;
1364
- const token = await AuthManager.getCopilotToken();
1365
- if (!token) {
1366
- throw new Error('GitHub Copilot token not found. Please run "keystone auth login" first.');
1367
- }
1368
-
1369
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
1370
- method: 'POST',
1371
- headers: {
1372
- 'Content-Type': 'application/json',
1373
- Authorization: `Bearer ${token}`,
1374
- 'vscode-editorid': 'vscode-chat',
1375
- 'vscode-machineid': 'default',
1376
- ...COPILOT_HEADERS,
1377
- },
1378
- body: JSON.stringify({
1379
- model: options?.model || 'gpt-4o',
1380
- messages,
1381
- tools: options?.tools,
1382
- stream: isStreaming,
1383
- }),
1384
- signal: options?.signal,
1385
- });
329
+ try {
330
+ const { provider, resolvedModel } = await prepareProvider(model);
1386
331
 
1387
- if (!response.ok) {
1388
- const error = await response.text();
1389
- throw new Error(`Copilot API error: ${response.status} ${response.statusText} - ${error}`);
332
+ // AI SDK convention: provider.textEmbeddingModel(modelId) OR provider.embedding(modelId)
333
+ // We check all known variations to be safe with different provider implementations or versions
334
+ if (typeof provider.textEmbeddingModel === 'function') {
335
+ return provider.textEmbeddingModel(resolvedModel);
1390
336
  }
1391
-
1392
- if (isStreaming) {
1393
- // Use the same streaming logic as OpenAIAdapter since Copilot uses OpenAI API
1394
- if (!response.body) throw new Error('Response body is null');
1395
- return processOpenAIStream(response, options, 'Copilot');
337
+ if (typeof provider.embedding === 'function') {
338
+ return provider.embedding(resolvedModel);
1396
339
  }
1397
-
1398
- const data = (await response.json()) as {
1399
- choices: { message: LLMMessage }[];
1400
- usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
1401
- };
1402
-
1403
- // Validate response size to prevent memory exhaustion
1404
- const contentLength = data.choices[0]?.message?.content?.length ?? 0;
1405
- if (contentLength > MAX_RESPONSE_SIZE) {
1406
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
340
+ if (typeof provider.textEmbedding === 'function') {
341
+ return provider.textEmbedding(resolvedModel);
1407
342
  }
1408
-
1409
- return {
1410
- message: data.choices[0].message,
1411
- usage: data.usage,
1412
- };
1413
- }
1414
- }
1415
-
1416
- export class LocalEmbeddingAdapter implements LLMAdapter {
1417
- private static extractor: unknown = null;
1418
-
1419
- async chat(
1420
- _messages: LLMMessage[],
1421
- _options?: {
1422
- model?: string;
1423
- tools?: LLMTool[];
1424
- onStream?: (chunk: string) => void;
1425
- signal?: AbortSignal;
343
+ } catch (err) {
344
+ // If explicit provider setup fails AND it's a default attempt, fallback to local
345
+ const config = ConfigLoader.load();
346
+ if (model === config.embedding_model || !model) {
347
+ new ConsoleLogger().warn(
348
+ `⚠️ Embedding provider for '${model}' failed, falling back to local embeddings: ${err instanceof Error ? err.message : String(err)}`
349
+ );
350
+ return new LocalEmbeddingModel();
1426
351
  }
1427
- ): Promise<LLMResponse> {
1428
- throw new Error(
1429
- 'Local models in Keystone currently only support memory/embedding operations. ' +
1430
- 'To use a local LLM for chat/generation, please use an OpenAI-compatible local server ' +
1431
- '(like Ollama, LM Studio, or LocalAI) and configure it as an OpenAI provider in your config.'
1432
- );
352
+ throw err;
1433
353
  }
1434
354
 
1435
- async embed(
1436
- text: string,
1437
- model = 'Xenova/all-MiniLM-L6-v2',
1438
- options?: { signal?: AbortSignal }
1439
- ): Promise<number[]> {
1440
- const modelToUse = model === 'local' ? 'Xenova/all-MiniLM-L6-v2' : model;
1441
- if (options?.signal?.aborted) throw new Error('Embedding aborted');
1442
- if (!LocalEmbeddingAdapter.extractor) {
1443
- try {
1444
- ensureOnnxRuntimeLibraryPath();
1445
- const pipeline = await getTransformersPipeline();
1446
- LocalEmbeddingAdapter.extractor = await pipeline('feature-extraction', modelToUse);
1447
- } catch (error) {
1448
- const details = error instanceof Error ? error.message : String(error);
1449
- throw new Error(
1450
- `Failed to initialize local embeddings. If you are running a compiled binary, ensure the keystone-runtime directory is next to the executable (or set KEYSTONE_RUNTIME_DIR), and that the ONNX Runtime shared library is available (set KEYSTONE_ONNX_RUNTIME_LIB_DIR or place it next to the executable). Original error: ${details}`
1451
- );
1452
- }
1453
- }
1454
- const output = await LocalEmbeddingAdapter.extractor(text, {
1455
- pooling: 'mean',
1456
- normalize: true,
1457
- });
1458
- return Array.from(output.data);
1459
- }
355
+ // Some providers might just return the model directly if called, but usually that's for LanguageModel.
356
+ // We assume standard AI SDK provider structure here.
357
+ throw new Error(
358
+ `Provider for model '${model}' does not support embeddings (no .textEmbeddingModel, .embedding, or .textEmbedding method found).`
359
+ );
1460
360
  }
1461
361
 
1462
- export function getAdapter(model: string): { adapter: LLMAdapter; resolvedModel: string } {
1463
- if (model === 'local' || model.startsWith('local:')) {
1464
- const resolvedModel = model === 'local' ? 'Xenova/all-MiniLM-L6-v2' : model.substring(6);
1465
- return { adapter: new LocalEmbeddingAdapter(), resolvedModel };
1466
- }
1467
-
1468
- const providerName = ConfigLoader.getProviderForModel(model);
1469
- const config = ConfigLoader.load();
1470
- const providerConfig = config.providers[providerName];
1471
-
1472
- if (!providerConfig) {
1473
- throw new Error(`Provider configuration not found for: ${providerName}`);
1474
- }
1475
-
1476
- let resolvedModel = model;
1477
- if (model.includes(':')) {
1478
- const [prefix, ...rest] = model.split(':');
1479
- if (config.providers[prefix]) {
1480
- resolvedModel = rest.join(':');
1481
- }
1482
- }
1483
-
1484
- let adapter: LLMAdapter;
1485
- if (providerConfig.type === 'copilot') {
1486
- adapter = new CopilotAdapter(providerConfig.base_url);
1487
- } else if (providerConfig.type === 'openai-chatgpt') {
1488
- adapter = new OpenAIChatGPTAdapter(providerConfig.base_url);
1489
- } else if (providerConfig.type === 'google-gemini') {
1490
- adapter = new GoogleGeminiAdapter(providerConfig.base_url, providerConfig.project_id);
1491
- } else if (providerConfig.type === 'anthropic-claude') {
1492
- adapter = new AnthropicClaudeAdapter(providerConfig.base_url);
1493
- } else {
1494
- const apiKey = providerConfig.api_key_env
1495
- ? ConfigLoader.getSecret(providerConfig.api_key_env)
1496
- : undefined;
1497
-
1498
- if (providerConfig.type === 'anthropic') {
1499
- adapter = new AnthropicAdapter(apiKey, providerConfig.base_url);
1500
- } else {
1501
- adapter = new OpenAIAdapter(apiKey, providerConfig.base_url);
1502
- }
1503
- }
1504
-
1505
- return { adapter, resolvedModel };
362
+ /**
363
+ * Dispose of the local embedding model's cached pipeline to free memory.
364
+ * Call this during graceful shutdown or when embeddings are no longer needed.
365
+ */
366
+ export function disposeLocalEmbeddingModel(): void {
367
+ LocalEmbeddingModel.dispose();
1506
368
  }