keystone-cli 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +114 -140
  2. package/package.json +6 -3
  3. package/src/cli.ts +54 -369
  4. package/src/commands/init.ts +15 -29
  5. package/src/db/memory-db.test.ts +45 -0
  6. package/src/db/memory-db.ts +47 -21
  7. package/src/db/sqlite-setup.ts +26 -3
  8. package/src/db/workflow-db.ts +12 -5
  9. package/src/parser/config-schema.ts +11 -13
  10. package/src/parser/schema.ts +4 -2
  11. package/src/runner/__test__/llm-mock-setup.ts +173 -0
  12. package/src/runner/__test__/llm-test-setup.ts +271 -0
  13. package/src/runner/engine-executor.test.ts +25 -18
  14. package/src/runner/executors/blueprint-executor.ts +0 -1
  15. package/src/runner/executors/dynamic-executor.ts +11 -6
  16. package/src/runner/executors/engine-executor.ts +5 -1
  17. package/src/runner/executors/llm-executor.ts +502 -1033
  18. package/src/runner/executors/memory-executor.ts +35 -19
  19. package/src/runner/executors/plan-executor.ts +0 -1
  20. package/src/runner/executors/types.ts +4 -4
  21. package/src/runner/llm-adapter.integration.test.ts +151 -0
  22. package/src/runner/llm-adapter.ts +263 -1401
  23. package/src/runner/llm-clarification.test.ts +91 -106
  24. package/src/runner/llm-executor.test.ts +217 -1181
  25. package/src/runner/memoization.test.ts +0 -1
  26. package/src/runner/recovery-security.test.ts +51 -20
  27. package/src/runner/reflexion.test.ts +55 -18
  28. package/src/runner/standard-tools-integration.test.ts +137 -87
  29. package/src/runner/step-executor.test.ts +36 -80
  30. package/src/runner/step-executor.ts +0 -2
  31. package/src/runner/test-harness.ts +3 -29
  32. package/src/runner/tool-integration.test.ts +122 -73
  33. package/src/runner/workflow-runner.ts +92 -35
  34. package/src/runner/workflow-scheduler.ts +11 -1
  35. package/src/runner/workflow-summary.ts +144 -0
  36. package/src/utils/auth-manager.test.ts +10 -520
  37. package/src/utils/auth-manager.ts +3 -756
  38. package/src/utils/config-loader.ts +12 -0
  39. package/src/utils/constants.ts +0 -17
  40. package/src/utils/process-sandbox.ts +15 -3
  41. package/src/runner/llm-adapter-runtime.test.ts +0 -209
  42. package/src/runner/llm-adapter.test.ts +0 -1012
@@ -3,7 +3,8 @@ import { randomUUID } from 'node:crypto';
3
3
  import { existsSync, mkdirSync } from 'node:fs';
4
4
  import { dirname, join } from 'node:path';
5
5
  import * as sqliteVec from 'sqlite-vec';
6
- import './sqlite-setup.ts';
6
+ import { ConsoleLogger } from '../utils/logger';
7
+ import { setupSqlite } from './sqlite-setup.ts';
7
8
 
8
9
  export interface MemoryEntry {
9
10
  id: string;
@@ -64,9 +65,16 @@ export class MemoryDb {
64
65
  private db: Database;
65
66
  // Cache connections by path to avoid reloading extensions
66
67
  private static connectionCache = new Map<string, { db: Database; refCount: number }>();
67
- static readonly EMBEDDING_DIMENSION = 384;
68
+ private tableName: string;
68
69
 
69
- constructor(public readonly dbPath = '.keystone/memory.db') {
70
+ constructor(
71
+ public readonly dbPath = '.keystone/memory.db',
72
+ private readonly embeddingDimension = 384
73
+ ) {
74
+ // Ensure SQLite is set up with custom library on macOS (idempotent)
75
+ setupSqlite();
76
+
77
+ this.tableName = `vec_memory_${embeddingDimension}`;
70
78
  const cached = MemoryDb.connectionCache.get(dbPath);
71
79
  if (cached) {
72
80
  cached.refCount++;
@@ -89,10 +97,36 @@ export class MemoryDb {
89
97
  }
90
98
 
91
99
  private initSchema(): void {
100
+ // Check if the legacy 'vec_memory' table exists and what its dimension is
101
+ const legacyTable = this.db
102
+ .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='vec_memory'")
103
+ .get() as { sql: string } | undefined;
104
+
105
+ if (legacyTable) {
106
+ const match = legacyTable.sql.match(/FLOAT\[(\d+)\]/i);
107
+ if (match && Number.parseInt(match[1], 10) === this.embeddingDimension) {
108
+ // Legacy table exists and matches our dimension, reuse it
109
+ this.tableName = 'vec_memory';
110
+ } else {
111
+ // Mismatch or couldn't parse. We will use the specific table name `vec_memory_{dim}`.
112
+ // We log a warning to stdout since we don't have a logger instance here,
113
+ // but only if we haven't already created the specific table (to avoid spamming on every init).
114
+ const specificTableExists = this.db
115
+ .prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='${this.tableName}'`)
116
+ .get();
117
+ if (!specificTableExists) {
118
+ new ConsoleLogger().warn(
119
+ `\n⚠️ Vector DB: Found legacy table 'vec_memory' with dimension mismatch (expected ${this.embeddingDimension}).\n` +
120
+ `Using new table '${this.tableName}' instead. Old data is preserved in 'vec_memory'.\n`
121
+ );
122
+ }
123
+ }
124
+ }
125
+
92
126
  this.db.run(`
93
- CREATE VIRTUAL TABLE IF NOT EXISTS vec_memory USING vec0(
127
+ CREATE VIRTUAL TABLE IF NOT EXISTS ${this.tableName} USING vec0(
94
128
  id TEXT PRIMARY KEY,
95
- embedding FLOAT[${MemoryDb.EMBEDDING_DIMENSION}]
129
+ embedding FLOAT[${this.embeddingDimension}]
96
130
  );
97
131
  `);
98
132
 
@@ -106,10 +140,10 @@ export class MemoryDb {
106
140
  `);
107
141
  }
108
142
 
109
- private static assertEmbeddingDimension(embedding: number[]): void {
110
- if (embedding.length !== MemoryDb.EMBEDDING_DIMENSION) {
143
+ private assertEmbeddingDimension(embedding: number[]): void {
144
+ if (embedding.length !== this.embeddingDimension) {
111
145
  throw new Error(
112
- `Embedding dimension mismatch: expected ${MemoryDb.EMBEDDING_DIMENSION}, got ${embedding.length}`
146
+ `Embedding dimension mismatch: expected ${this.embeddingDimension}, got ${embedding.length}`
113
147
  );
114
148
  }
115
149
  }
@@ -117,12 +151,8 @@ export class MemoryDb {
117
151
  /**
118
152
  * Store an embedding and its associated text/metadata.
119
153
  *
120
- * Note: The async signature provides interface compatibility with potentially
121
- * async backends (e.g., remote vector DBs). The current implementation uses
122
- * synchronous bun:sqlite operations internally.
123
- *
124
154
  * @param text - The text content to store
125
- * @param embedding - The embedding vector (384 dimensions)
155
+ * @param embedding - The embedding vector
126
156
  * @param metadata - Optional metadata to associate with the entry
127
157
  * @returns The generated entry ID
128
158
  */
@@ -133,11 +163,11 @@ export class MemoryDb {
133
163
  ): Promise<string> {
134
164
  const id = randomUUID();
135
165
  const createdAt = new Date().toISOString();
136
- MemoryDb.assertEmbeddingDimension(embedding);
166
+ this.assertEmbeddingDimension(embedding);
137
167
 
138
168
  // bun:sqlite transaction wrapper ensures atomicity synchronously
139
169
  const insertTransaction = this.db.transaction(() => {
140
- this.db.run('INSERT INTO vec_memory(id, embedding) VALUES (?, ?)', [
170
+ this.db.run(`INSERT INTO ${this.tableName}(id, embedding) VALUES (?, ?)`, [
141
171
  id,
142
172
  new Float32Array(embedding),
143
173
  ]);
@@ -155,23 +185,19 @@ export class MemoryDb {
155
185
  /**
156
186
  * Search for similar embeddings using vector similarity.
157
187
  *
158
- * Note: The async signature provides interface compatibility with potentially
159
- * async backends (e.g., remote vector DBs). The current implementation uses
160
- * synchronous bun:sqlite operations internally.
161
- *
162
188
  * @param embedding - The query embedding vector
163
189
  * @param limit - Maximum number of results to return (default: 5)
164
190
  * @returns Array of matching entries with distance scores
165
191
  */
166
192
  async search(embedding: number[], limit = 5): Promise<MemoryEntry[]> {
167
- MemoryDb.assertEmbeddingDimension(embedding);
193
+ this.assertEmbeddingDimension(embedding);
168
194
  const query = `
169
195
  SELECT
170
196
  v.id,
171
197
  v.distance,
172
198
  m.text,
173
199
  m.metadata
174
- FROM vec_memory v
200
+ FROM ${this.tableName} v
175
201
  JOIN memory_metadata m ON v.id = m.id
176
202
  WHERE embedding MATCH ? AND k = ?
177
203
  ORDER BY distance
@@ -2,7 +2,19 @@ import { Database } from 'bun:sqlite';
2
2
  import { existsSync } from 'node:fs';
3
3
  import { ConsoleLogger, type Logger } from '../utils/logger.ts';
4
4
 
5
- export function setupSqlite(logger: Logger = new ConsoleLogger()) {
5
+ let sqliteSetupComplete = false;
6
+
7
+ /**
8
+ * Setup SQLite with a custom library on macOS to support extensions.
9
+ * This is idempotent - calling it multiple times is safe.
10
+ */
11
+ export function setupSqlite(logger: Logger = new ConsoleLogger()): void {
12
+ // Only run setup once
13
+ if (sqliteSetupComplete) {
14
+ return;
15
+ }
16
+ sqliteSetupComplete = true;
17
+
6
18
  // macOS typically comes with a system SQLite that doesn't support extensions
7
19
  // We need to try to load a custom one (e.g. from Homebrew) if on macOS
8
20
  if (process.platform === 'darwin') {
@@ -44,5 +56,16 @@ export function setupSqlite(logger: Logger = new ConsoleLogger()) {
44
56
  }
45
57
  }
46
58
 
47
- // Run setup immediately when imported
48
- setupSqlite();
59
+ /**
60
+ * Reset SQLite setup state (mainly for testing).
61
+ */
62
+ export function resetSqliteSetup(): void {
63
+ sqliteSetupComplete = false;
64
+ }
65
+
66
+ /**
67
+ * Check if SQLite setup has been completed.
68
+ */
69
+ export function isSqliteSetupComplete(): boolean {
70
+ return sqliteSetupComplete;
71
+ }
@@ -2,7 +2,6 @@ import { Database, type Statement } from 'bun:sqlite';
2
2
  import { randomUUID } from 'node:crypto';
3
3
  import { existsSync, mkdirSync } from 'node:fs';
4
4
  import { dirname } from 'node:path';
5
- import './sqlite-setup.ts';
6
5
  import {
7
6
  StepStatus as StepStatusConst,
8
7
  type StepStatusType,
@@ -11,6 +10,7 @@ import {
11
10
  } from '../types/status';
12
11
  import { DB, LIMITS } from '../utils/constants';
13
12
  import { PathResolver } from '../utils/paths';
13
+ import { setupSqlite } from './sqlite-setup.ts';
14
14
 
15
15
  export type RunStatus = WorkflowStatusType | 'pending';
16
16
  export type StepStatus = StepStatusType;
@@ -162,6 +162,9 @@ export class WorkflowDb {
162
162
  private isClosed = false;
163
163
 
164
164
  constructor(public readonly dbPath = PathResolver.resolveDbPath()) {
165
+ // Ensure SQLite is set up with custom library on macOS (idempotent)
166
+ setupSqlite();
167
+
165
168
  const dir = dirname(dbPath);
166
169
  if (!existsSync(dir)) {
167
170
  mkdirSync(dir, { recursive: true });
@@ -197,7 +200,11 @@ export class WorkflowDb {
197
200
  ORDER BY started_at DESC
198
201
  LIMIT ?
199
202
  `);
200
- this.pruneRunsStmt = this.db.prepare('DELETE FROM workflow_runs WHERE started_at < ?');
203
+ this.pruneRunsStmt = this.db.prepare(`
204
+ DELETE FROM workflow_runs
205
+ WHERE started_at < ?
206
+ AND status IN ('success', 'failed', 'canceled')
207
+ `);
201
208
  this.createStepStmt = this.db.prepare(`
202
209
  INSERT INTO step_executions (id, run_id, step_id, iteration_index, status, retry_count)
203
210
  VALUES (?, ?, ?, ?, ?, ?)
@@ -448,7 +455,7 @@ export class WorkflowDb {
448
455
  * Uses exponential backoff with jitter to reduce contention.
449
456
  */
450
457
  private async withRetry<T>(operation: () => T, maxRetries = LIMITS.MAX_DB_RETRIES): Promise<T> {
451
- let lastError: any;
458
+ let lastError: unknown;
452
459
 
453
460
  for (let attempt = 0; attempt < maxRetries; attempt++) {
454
461
  try {
@@ -477,13 +484,13 @@ export class WorkflowDb {
477
484
 
478
485
  // Wrap non-busy errors in DatabaseError
479
486
  const msg = error instanceof Error ? error.message : String(error);
480
- const code = (error as any)?.code;
487
+ const code = (error as { code?: string | number })?.code;
481
488
  throw new DatabaseError(msg, code, false);
482
489
  }
483
490
  }
484
491
 
485
492
  const msg = lastError instanceof Error ? lastError.message : String(lastError);
486
- const code = (lastError as any)?.code;
493
+ const code = (lastError as { code?: string | number })?.code;
487
494
  throw new DatabaseError(
488
495
  `SQLite operation failed after ${maxRetries} retries: ${msg}`,
489
496
  code,
@@ -3,43 +3,41 @@ import { z } from 'zod';
3
3
  export const ConfigSchema = z.object({
4
4
  default_provider: z.string().default('openai'),
5
5
  default_model: z.string().optional(),
6
+ embedding_model: z.string().optional(),
7
+ embedding_dimension: z.number().int().positive().default(384),
6
8
  providers: z
7
9
  .record(
8
10
  z.object({
9
11
  type: z
10
- .enum([
11
- 'openai',
12
- 'anthropic',
13
- 'anthropic-claude',
14
- 'copilot',
15
- 'openai-chatgpt',
16
- 'google-gemini',
17
- ])
12
+ .enum(['openai', 'anthropic'])
13
+ .or(z.string()) // Allow custom types for BYOP
18
14
  .default('openai'),
19
15
  base_url: z.string().optional(),
20
16
  api_key_env: z.string().optional(),
21
17
  default_model: z.string().optional(),
22
18
  project_id: z.string().optional(),
19
+ embedding_dimension: z.number().int().positive().optional(),
20
+ // BYOP fields
21
+ package: z.string().optional(),
22
+ factory: z.string().optional(),
23
+ script: z.string().optional(),
23
24
  })
24
25
  )
25
26
  .default({
26
27
  openai: {
27
28
  type: 'openai',
29
+ package: '@ai-sdk/openai',
28
30
  base_url: 'https://api.openai.com/v1',
29
31
  api_key_env: 'OPENAI_API_KEY',
30
32
  default_model: 'gpt-4o',
31
33
  },
32
34
  anthropic: {
33
35
  type: 'anthropic',
36
+ package: '@ai-sdk/anthropic',
34
37
  base_url: 'https://api.anthropic.com/v1',
35
38
  api_key_env: 'ANTHROPIC_API_KEY',
36
39
  default_model: 'claude-3-5-sonnet-20240620',
37
40
  },
38
- copilot: {
39
- type: 'copilot',
40
- base_url: 'https://api.githubcopilot.com',
41
- default_model: 'gpt-4o',
42
- },
43
41
  }),
44
42
  model_mappings: z.record(z.string()).default({
45
43
  'claude-*': 'anthropic',
@@ -444,8 +444,10 @@ const DynamicStepSchema = BaseStepSchema.extend({
444
444
  allowInsecure: z.boolean().optional(), // Allow generated steps to use insecure commands (e.g. shell redirects)
445
445
  });
446
446
 
447
- // ===== Discriminated Union for Steps =====
448
-
447
+ // Note: `as any` casts are required here because of circular type references:
448
+ // BaseStepSchema.compensate → StepSchema → all step schemas → BaseStepSchema
449
+ // TypeScript cannot infer types through this cycle, so we use z.ZodType<any>
450
+ // and cast each schema. This is a known Zod limitation with recursive schemas.
449
451
  export const StepSchema: z.ZodType<any> = z.lazy(() =>
450
452
  z.discriminatedUnion('type', [
451
453
  ShellStepSchema as any,
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Shared test mock setup for LLM adapter
3
+ *
4
+ * This file provides a unified mock model and setup utilities for tests
5
+ * that need to mock the LLM adapter without affecting other test files.
6
+ *
7
+ * Usage:
8
+ * 1. Import this at the top of your test file BEFORE any SUT imports
9
+ * 2. Call setupLlmAdapterMocks() before your tests
10
+ * 3. Use setCurrentChatFn() to control mock responses
11
+ */
12
+ import { mock } from 'bun:test';
13
+
14
+ // Mock response type
15
+ export interface MockLLMResponse {
16
+ message: {
17
+ role: string;
18
+ content?: string | null;
19
+ tool_calls?: Array<{
20
+ id: string;
21
+ type: 'function';
22
+ function: { name: string; arguments: string };
23
+ }>;
24
+ };
25
+ usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
26
+ }
27
+
28
+ // Shared mock getModel function
29
+ export const mockGetModel = mock();
30
+ export const mockGetEmbeddingModel = mock();
31
+
32
+ // Current chat function - set this in your test to control responses
33
+ let _currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse> = async () => ({
34
+ message: { role: 'assistant', content: 'Default mock response' },
35
+ });
36
+
37
+ export function setCurrentChatFn(fn: typeof _currentChatFn) {
38
+ _currentChatFn = fn;
39
+ }
40
+
41
+ export function getCurrentChatFn() {
42
+ return _currentChatFn;
43
+ }
44
+
45
+ /**
46
+ * Creates a unified mock model that simulates AI SDK LanguageModel behavior.
47
+ * This is used as the return value for mockGetModel.
48
+ */
49
+ export function createUnifiedMockModel() {
50
+ return {
51
+ specificationVersion: 'v2',
52
+ provider: 'mock',
53
+ modelId: 'mock-model',
54
+ doStream: async (options: any) => {
55
+ // Convert AI SDK prompt format to our test format
56
+ const mapMessages = (prompt: any[]) =>
57
+ prompt.flatMap((m: any) => {
58
+ let content = m.content;
59
+ if (Array.isArray(m.content)) {
60
+ const toolResults = m.content.filter((p: any) => p.type === 'tool-result');
61
+ if (toolResults.length > 0) {
62
+ return toolResults.map((tr: any) => ({
63
+ role: 'tool',
64
+ tool_call_id: tr.toolCallId,
65
+ content: JSON.stringify(tr.result),
66
+ }));
67
+ }
68
+ const textParts = m.content
69
+ .filter((p: any) => p.type === 'text')
70
+ .map((p: any) => p.text)
71
+ .join('');
72
+ if (textParts) content = textParts;
73
+ }
74
+ return [
75
+ {
76
+ role: m.role,
77
+ content: typeof content === 'string' ? content : JSON.stringify(content),
78
+ },
79
+ ];
80
+ });
81
+
82
+ const messages = mapMessages(options.prompt || options.input);
83
+ const tools = (options.tools || options.mode?.tools)?.map((t: any) => ({
84
+ type: 'function',
85
+ function: {
86
+ name: t.name,
87
+ description: t.description,
88
+ parameters: t.parameters || t.inputSchema,
89
+ },
90
+ }));
91
+
92
+ const response = await _currentChatFn(messages, { tools });
93
+
94
+ const stream = new ReadableStream({
95
+ async start(controller) {
96
+ if (response.message.content) {
97
+ controller.enqueue({
98
+ type: 'text-delta',
99
+ delta: response.message.content,
100
+ text: response.message.content,
101
+ });
102
+ }
103
+
104
+ const toolCalls = response.message.tool_calls?.map((tc: any) => ({
105
+ type: 'tool-call',
106
+ toolCallId: tc.id,
107
+ toolName: tc.function.name,
108
+ args:
109
+ typeof tc.function.arguments === 'string'
110
+ ? JSON.parse(tc.function.arguments)
111
+ : tc.function.arguments,
112
+ id: tc.id,
113
+ name: tc.function.name,
114
+ input:
115
+ typeof tc.function.arguments === 'string'
116
+ ? tc.function.arguments
117
+ : JSON.stringify(tc.function.arguments),
118
+ }));
119
+
120
+ if (toolCalls?.length) {
121
+ for (const tc of toolCalls) {
122
+ controller.enqueue(tc);
123
+ }
124
+ }
125
+
126
+ controller.enqueue({
127
+ type: 'finish',
128
+ finishReason: toolCalls?.length ? 'tool-calls' : 'stop',
129
+ usage: { promptTokens: 10, completionTokens: 5 },
130
+ });
131
+
132
+ controller.close();
133
+ },
134
+ });
135
+
136
+ return { stream, rawResponse: { headers: {} } };
137
+ },
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Sets up the LLM adapter module mocks.
143
+ * Call this at the TOP of your test file, before any imports of the SUT.
144
+ */
145
+ export function setupLlmAdapterMocks() {
146
+ mock.module('../llm-adapter', () => ({
147
+ getModel: mockGetModel,
148
+ getEmbeddingModel: mockGetEmbeddingModel,
149
+ DynamicProviderRegistry: { getProvider: mock() },
150
+ }));
151
+
152
+ // Also mock with relative paths that might be used
153
+ mock.module('./llm-adapter', () => ({
154
+ getModel: mockGetModel,
155
+ getEmbeddingModel: mockGetEmbeddingModel,
156
+ DynamicProviderRegistry: { getProvider: mock() },
157
+ }));
158
+
159
+ // Reset mocks to use the unified model
160
+ mockGetModel.mockReset();
161
+ mockGetModel.mockResolvedValue(createUnifiedMockModel());
162
+ }
163
+
164
+ /**
165
+ * Resets all mocks to default state. Call in afterEach if needed.
166
+ */
167
+ export function resetLlmMocks() {
168
+ mockGetModel.mockReset();
169
+ mockGetModel.mockResolvedValue(createUnifiedMockModel());
170
+ _currentChatFn = async () => ({
171
+ message: { role: 'assistant', content: 'Default mock response' },
172
+ });
173
+ }