keystone-cli 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -138
- package/package.json +6 -3
- package/src/cli.ts +54 -369
- package/src/commands/init.ts +19 -27
- package/src/db/dynamic-state-manager.test.ts +319 -0
- package/src/db/dynamic-state-manager.ts +411 -0
- package/src/db/memory-db.test.ts +45 -0
- package/src/db/memory-db.ts +47 -21
- package/src/db/sqlite-setup.ts +26 -3
- package/src/db/workflow-db.ts +76 -5
- package/src/parser/config-schema.ts +11 -13
- package/src/parser/schema.ts +37 -2
- package/src/parser/workflow-parser.test.ts +3 -4
- package/src/parser/workflow-parser.ts +3 -62
- package/src/runner/__test__/llm-mock-setup.ts +173 -0
- package/src/runner/__test__/llm-test-setup.ts +271 -0
- package/src/runner/engine-executor.test.ts +25 -18
- package/src/runner/executors/blueprint-executor.ts +0 -1
- package/src/runner/executors/dynamic-executor.test.ts +613 -0
- package/src/runner/executors/dynamic-executor.ts +723 -0
- package/src/runner/executors/dynamic-types.ts +69 -0
- package/src/runner/executors/engine-executor.ts +5 -1
- package/src/runner/executors/llm-executor.ts +502 -1033
- package/src/runner/executors/memory-executor.ts +35 -19
- package/src/runner/executors/plan-executor.ts +0 -1
- package/src/runner/executors/types.ts +4 -4
- package/src/runner/llm-adapter.integration.test.ts +151 -0
- package/src/runner/llm-adapter.ts +263 -1401
- package/src/runner/llm-clarification.test.ts +91 -106
- package/src/runner/llm-executor.test.ts +217 -1181
- package/src/runner/memoization.test.ts +0 -1
- package/src/runner/recovery-security.test.ts +51 -20
- package/src/runner/reflexion.test.ts +55 -18
- package/src/runner/standard-tools-integration.test.ts +137 -87
- package/src/runner/step-executor.test.ts +36 -80
- package/src/runner/step-executor.ts +20 -2
- package/src/runner/test-harness.ts +3 -29
- package/src/runner/tool-integration.test.ts +122 -73
- package/src/runner/workflow-runner.ts +92 -35
- package/src/runner/workflow-scheduler.ts +11 -1
- package/src/runner/workflow-summary.ts +144 -0
- package/src/templates/dynamic-demo.yaml +31 -0
- package/src/templates/scaffolding/decompose-problem.yaml +1 -1
- package/src/templates/scaffolding/dynamic-decompose.yaml +39 -0
- package/src/utils/auth-manager.test.ts +10 -520
- package/src/utils/auth-manager.ts +3 -756
- package/src/utils/config-loader.ts +12 -0
- package/src/utils/constants.ts +0 -17
- package/src/utils/process-sandbox.ts +15 -3
- package/src/utils/topo-sort.ts +47 -0
- package/src/runner/llm-adapter-runtime.test.ts +0 -209
- package/src/runner/llm-adapter.test.ts +0 -1012
|
@@ -6,6 +6,7 @@ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
|
|
|
6
6
|
|
|
7
7
|
import { executeArtifactStep } from './executors/artifact-executor.ts';
|
|
8
8
|
import { executeBlueprintStep } from './executors/blueprint-executor.ts';
|
|
9
|
+
import { executeDynamicStep } from './executors/dynamic-executor.ts';
|
|
9
10
|
import { executeEngineStepWrapper } from './executors/engine-executor.ts';
|
|
10
11
|
import { executeFileStep } from './executors/file-executor.ts';
|
|
11
12
|
import { executeGitStep } from './executors/git-executor.ts';
|
|
@@ -49,7 +50,6 @@ export async function executeStep(
|
|
|
49
50
|
stepExecutionId,
|
|
50
51
|
artifactRoot,
|
|
51
52
|
redactForStorage,
|
|
52
|
-
getAdapter,
|
|
53
53
|
executeStep: injectedExecuteStep,
|
|
54
54
|
executeLlmStep: injectedExecuteLlmStep,
|
|
55
55
|
} = options;
|
|
@@ -108,7 +108,6 @@ export async function executeStep(
|
|
|
108
108
|
mcpManager,
|
|
109
109
|
workflowDir,
|
|
110
110
|
abortSignal,
|
|
111
|
-
getAdapter,
|
|
112
111
|
options.emitEvent,
|
|
113
112
|
options.workflowName
|
|
114
113
|
? { runId: options.runId, workflow: options.workflowName }
|
|
@@ -171,6 +170,25 @@ export async function executeStep(
|
|
|
171
170
|
case 'git':
|
|
172
171
|
result = await executeGitStep(step, context, logger, abortSignal);
|
|
173
172
|
break;
|
|
173
|
+
case 'dynamic':
|
|
174
|
+
result = await executeDynamicStep(
|
|
175
|
+
step,
|
|
176
|
+
context,
|
|
177
|
+
(s, c) => (injectedExecuteStep || executeStep)(s, c, logger, options),
|
|
178
|
+
logger,
|
|
179
|
+
{
|
|
180
|
+
mcpManager,
|
|
181
|
+
workflowDir,
|
|
182
|
+
abortSignal,
|
|
183
|
+
runId,
|
|
184
|
+
artifactRoot,
|
|
185
|
+
executeLlmStep: injectedExecuteLlmStep || executeLlmStep,
|
|
186
|
+
emitEvent: options.emitEvent,
|
|
187
|
+
workflowName: options.workflowName,
|
|
188
|
+
db: options.db,
|
|
189
|
+
}
|
|
190
|
+
);
|
|
191
|
+
break;
|
|
174
192
|
default:
|
|
175
193
|
throw new Error(`Unknown step type: ${(step as Step).type}`);
|
|
176
194
|
}
|
|
@@ -4,7 +4,7 @@ import { dirname, join, resolve } from 'node:path';
|
|
|
4
4
|
import { type ExpressionContext, ExpressionEvaluator } from '../expression/evaluator';
|
|
5
5
|
import type { Step, Workflow } from '../parser/schema';
|
|
6
6
|
import { ConsoleLogger, type Logger } from '../utils/logger';
|
|
7
|
-
|
|
7
|
+
// Note: LLM mocking is now handled via module mocking of getModel in tests
|
|
8
8
|
import { type StepExecutorOptions, type StepResult, executeStep } from './step-executor';
|
|
9
9
|
import { WorkflowRunner } from './workflow-runner';
|
|
10
10
|
|
|
@@ -66,7 +66,6 @@ export class TestHarness {
|
|
|
66
66
|
inputs: this.fixture.inputs,
|
|
67
67
|
secrets: this.fixture.secrets,
|
|
68
68
|
executeStep: this.mockExecuteStep.bind(this),
|
|
69
|
-
getAdapter: this.getMockAdapter.bind(this),
|
|
70
69
|
// Use memory DB for tests
|
|
71
70
|
dbPath: ':memory:',
|
|
72
71
|
});
|
|
@@ -131,7 +130,6 @@ export class TestHarness {
|
|
|
131
130
|
const result = await executeStep(step, context, logger, {
|
|
132
131
|
...options,
|
|
133
132
|
executeStep: this.mockExecuteStep.bind(this),
|
|
134
|
-
getAdapter: this.getMockAdapter.bind(this),
|
|
135
133
|
});
|
|
136
134
|
|
|
137
135
|
this.stepResults.set(step.id, {
|
|
@@ -151,30 +149,6 @@ export class TestHarness {
|
|
|
151
149
|
return false;
|
|
152
150
|
}
|
|
153
151
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
resolvedModel: model,
|
|
157
|
-
adapter: {
|
|
158
|
-
chat: async (messages: LLMMessage[]) => {
|
|
159
|
-
const userMessage = messages.find((m) => m.role === 'user')?.content || '';
|
|
160
|
-
|
|
161
|
-
for (const mock of this.llmMocks) {
|
|
162
|
-
if (userMessage.includes(mock.prompt)) {
|
|
163
|
-
return {
|
|
164
|
-
message: {
|
|
165
|
-
role: 'assistant',
|
|
166
|
-
content:
|
|
167
|
-
typeof mock.response === 'string'
|
|
168
|
-
? mock.response
|
|
169
|
-
: JSON.stringify(mock.response),
|
|
170
|
-
},
|
|
171
|
-
};
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
throw new Error(`No LLM mock found for prompt: ${userMessage.substring(0, 100)}...`);
|
|
176
|
-
},
|
|
177
|
-
},
|
|
178
|
-
};
|
|
179
|
-
}
|
|
152
|
+
// Note: LLM mocking for test harness is handled via module mocking of llm-adapter
|
|
153
|
+
// If you need to mock LLM responses, use bun's mock.module() to mock getModel
|
|
180
154
|
}
|
|
@@ -1,12 +1,39 @@
|
|
|
1
|
-
|
|
2
|
-
import {
|
|
1
|
+
// Import shared mock setup FIRST (mock.module is in preload, these are the mock references)
|
|
2
|
+
import {
|
|
3
|
+
type MockLLMResponse,
|
|
4
|
+
createUnifiedMockModel,
|
|
5
|
+
mockGetModel,
|
|
6
|
+
resetLlmMocks,
|
|
7
|
+
setCurrentChatFn,
|
|
8
|
+
setupLlmMocks,
|
|
9
|
+
} from './__test__/llm-test-setup';
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
afterAll,
|
|
13
|
+
afterEach,
|
|
14
|
+
beforeAll,
|
|
15
|
+
beforeEach,
|
|
16
|
+
describe,
|
|
17
|
+
expect,
|
|
18
|
+
it,
|
|
19
|
+
mock,
|
|
20
|
+
spyOn,
|
|
21
|
+
} from 'bun:test';
|
|
3
22
|
import { join } from 'node:path';
|
|
4
23
|
import type { ExpressionContext } from '../expression/evaluator';
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
7
|
-
import
|
|
24
|
+
import * as agentParser from '../parser/agent-parser';
|
|
25
|
+
import type { Agent, LlmStep, Step } from '../parser/schema';
|
|
26
|
+
import { ConfigLoader } from '../utils/config-loader';
|
|
8
27
|
import type { StepResult } from './step-executor';
|
|
9
28
|
|
|
29
|
+
// Note: mock.module() for llm-adapter is now handled by the preload file
|
|
30
|
+
|
|
31
|
+
// Dynamic import holder
|
|
32
|
+
let executeLlmStep: any;
|
|
33
|
+
|
|
34
|
+
// Local chat function wrapper
|
|
35
|
+
let currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse>;
|
|
36
|
+
|
|
10
37
|
interface MockToolCall {
|
|
11
38
|
function: {
|
|
12
39
|
name: string;
|
|
@@ -14,14 +41,9 @@ interface MockToolCall {
|
|
|
14
41
|
}
|
|
15
42
|
|
|
16
43
|
describe('llm-executor with tools and MCP', () => {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
return (_modelString: string) => ({
|
|
21
|
-
adapter: { chat: chatFn } as LLMAdapter,
|
|
22
|
-
resolvedModel: 'gpt-4',
|
|
23
|
-
});
|
|
24
|
-
};
|
|
44
|
+
let resolveAgentPathSpy: ReturnType<typeof spyOn>;
|
|
45
|
+
let parseAgentSpy: ReturnType<typeof spyOn>;
|
|
46
|
+
|
|
25
47
|
const createMockMcpClient = (
|
|
26
48
|
options: {
|
|
27
49
|
tools?: { name: string; description?: string; inputSchema: Record<string, unknown> }[];
|
|
@@ -48,43 +70,74 @@ describe('llm-executor with tools and MCP', () => {
|
|
|
48
70
|
return { getClient };
|
|
49
71
|
};
|
|
50
72
|
|
|
51
|
-
beforeAll(() => {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
73
|
+
beforeAll(async () => {
|
|
74
|
+
mockGetModel.mockResolvedValue(createUnifiedMockModel());
|
|
75
|
+
|
|
76
|
+
// Set up config
|
|
77
|
+
ConfigLoader.setConfig({
|
|
78
|
+
providers: {
|
|
79
|
+
openai: { type: 'openai', package: '@ai-sdk/openai', api_key_env: 'OPENAI_API_KEY' },
|
|
80
|
+
},
|
|
81
|
+
default_provider: 'openai',
|
|
82
|
+
model_mappings: {},
|
|
83
|
+
storage: { retention_days: 30, redact_secrets_at_rest: true },
|
|
84
|
+
mcp_servers: {},
|
|
85
|
+
engines: { allowlist: {}, denylist: [] },
|
|
86
|
+
concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
|
|
87
|
+
expression: { strict: false },
|
|
88
|
+
} as any);
|
|
89
|
+
|
|
90
|
+
// Ensure the mock model is set up
|
|
91
|
+
setupLlmMocks();
|
|
92
|
+
|
|
93
|
+
// Dynamic import AFTER mocks are set up
|
|
94
|
+
const module = await import('./executors/llm-executor.ts');
|
|
95
|
+
executeLlmStep = module.executeLlmStep;
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
beforeEach(() => {
|
|
99
|
+
resetLlmMocks();
|
|
100
|
+
|
|
101
|
+
// jest.restoreAllMocks();
|
|
102
|
+
ConfigLoader.clear();
|
|
103
|
+
// Setup mocks
|
|
104
|
+
setupLlmMocks();
|
|
105
|
+
|
|
106
|
+
// Mock agent parser to avoid needing actual agent files
|
|
107
|
+
resolveAgentPathSpy = spyOn(agentParser, 'resolveAgentPath').mockReturnValue('tool-agent.md');
|
|
108
|
+
parseAgentSpy = spyOn(agentParser, 'parseAgent').mockReturnValue({
|
|
109
|
+
name: 'tool-test-agent',
|
|
110
|
+
systemPrompt: 'Test system prompt',
|
|
111
|
+
tools: [
|
|
112
|
+
{
|
|
113
|
+
name: 'agent-tool',
|
|
114
|
+
parameters: { type: 'object', properties: {} },
|
|
115
|
+
execution: { id: 'agent-tool-exec', type: 'shell', run: 'echo "agent tool"' },
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
model: 'gpt-4o',
|
|
119
|
+
} as unknown as Agent);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
afterEach(() => {
|
|
123
|
+
resolveAgentPathSpy?.mockRestore();
|
|
124
|
+
parseAgentSpy?.mockRestore();
|
|
68
125
|
});
|
|
69
126
|
|
|
70
127
|
afterAll(() => {
|
|
71
|
-
|
|
72
|
-
unlinkSync(agentPath);
|
|
73
|
-
} catch (e) {
|
|
74
|
-
// Ignore error
|
|
75
|
-
}
|
|
128
|
+
ConfigLoader.clear();
|
|
76
129
|
});
|
|
77
130
|
|
|
78
131
|
it('should merge tools from agent, step and MCP', async () => {
|
|
79
132
|
let capturedTools: MockToolCall[] = [];
|
|
80
133
|
|
|
81
|
-
|
|
134
|
+
currentChatFn = async (_messages: unknown, options: unknown) => {
|
|
82
135
|
capturedTools = (options as { tools?: MockToolCall[] })?.tools || [];
|
|
83
136
|
return {
|
|
84
137
|
message: { role: 'assistant', content: 'Final response' },
|
|
85
138
|
};
|
|
86
|
-
}
|
|
87
|
-
|
|
139
|
+
};
|
|
140
|
+
setCurrentChatFn(currentChatFn as any);
|
|
88
141
|
|
|
89
142
|
const mockClient = createMockMcpClient({
|
|
90
143
|
tools: [
|
|
@@ -109,6 +162,7 @@ Test system prompt`;
|
|
|
109
162
|
tools: [
|
|
110
163
|
{
|
|
111
164
|
name: 'step-tool',
|
|
165
|
+
parameters: { type: 'object', properties: {} },
|
|
112
166
|
execution: { id: 'step-tool-exec', type: 'shell', run: 'echo step' },
|
|
113
167
|
},
|
|
114
168
|
],
|
|
@@ -125,8 +179,7 @@ Test system prompt`;
|
|
|
125
179
|
undefined,
|
|
126
180
|
mcpManager as unknown as { getClient: () => Promise<unknown> },
|
|
127
181
|
undefined,
|
|
128
|
-
undefined
|
|
129
|
-
getAdapter
|
|
182
|
+
undefined
|
|
130
183
|
);
|
|
131
184
|
|
|
132
185
|
const toolNames = capturedTools.map((t) => t.function.name);
|
|
@@ -136,29 +189,21 @@ Test system prompt`;
|
|
|
136
189
|
});
|
|
137
190
|
|
|
138
191
|
it('should execute MCP tool when called', async () => {
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
const mockChat = mock(async () => {
|
|
142
|
-
chatCount++;
|
|
143
|
-
if (chatCount === 1) {
|
|
144
|
-
return {
|
|
145
|
-
message: {
|
|
146
|
-
role: 'assistant',
|
|
147
|
-
tool_calls: [
|
|
148
|
-
{
|
|
149
|
-
id: 'call-1',
|
|
150
|
-
type: 'function',
|
|
151
|
-
function: { name: 'mcp-tool', arguments: '{}' },
|
|
152
|
-
},
|
|
153
|
-
],
|
|
154
|
-
},
|
|
155
|
-
};
|
|
156
|
-
}
|
|
192
|
+
currentChatFn = async () => {
|
|
157
193
|
return {
|
|
158
|
-
message: {
|
|
194
|
+
message: {
|
|
195
|
+
role: 'assistant',
|
|
196
|
+
tool_calls: [
|
|
197
|
+
{
|
|
198
|
+
id: 'call-1',
|
|
199
|
+
type: 'function',
|
|
200
|
+
function: { name: 'mcp-tool', arguments: '{}' },
|
|
201
|
+
},
|
|
202
|
+
],
|
|
203
|
+
},
|
|
159
204
|
};
|
|
160
|
-
}
|
|
161
|
-
|
|
205
|
+
};
|
|
206
|
+
setCurrentChatFn(currentChatFn as any);
|
|
162
207
|
|
|
163
208
|
const mockCallTool = mock(async () => ({ result: 'mcp success' }));
|
|
164
209
|
const mockClient = createMockMcpClient({
|
|
@@ -181,25 +226,29 @@ Test system prompt`;
|
|
|
181
226
|
agent: 'tool-test-agent',
|
|
182
227
|
prompt: 'test',
|
|
183
228
|
needs: [],
|
|
184
|
-
maxIterations:
|
|
229
|
+
maxIterations: 2, // Give room for tool execution
|
|
185
230
|
mcpServers: [{ name: 'test-mcp', command: 'node', args: ['-e', ''] }],
|
|
186
231
|
};
|
|
187
232
|
|
|
188
233
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
189
234
|
const executeStepFn = async () => ({ status: 'success' as const, output: {} });
|
|
190
235
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
236
|
+
// The execution may hit max iterations, but the tool should still be called
|
|
237
|
+
try {
|
|
238
|
+
await executeLlmStep(
|
|
239
|
+
step,
|
|
240
|
+
context,
|
|
241
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
242
|
+
undefined,
|
|
243
|
+
mcpManager as unknown as { getClient: () => Promise<unknown> },
|
|
244
|
+
undefined,
|
|
245
|
+
undefined
|
|
246
|
+
);
|
|
247
|
+
} catch (e) {
|
|
248
|
+
// May throw max iterations error
|
|
249
|
+
}
|
|
201
250
|
|
|
251
|
+
// Verify MCP tool was invoked
|
|
202
252
|
expect(mockCallTool).toHaveBeenCalledWith('mcp-tool', {});
|
|
203
|
-
expect(chatCount).toBe(2);
|
|
204
253
|
});
|
|
205
254
|
});
|
|
@@ -2,8 +2,9 @@ import { createHash, randomUUID } from 'node:crypto';
|
|
|
2
2
|
import * as fs from 'node:fs';
|
|
3
3
|
import * as path from 'node:path';
|
|
4
4
|
import { dirname, join } from 'node:path';
|
|
5
|
+
import { embed, generateText } from 'ai';
|
|
5
6
|
import { MemoryDb } from '../db/memory-db.ts';
|
|
6
|
-
import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
|
|
7
|
+
import { type RunStatus, type StepExecution, WorkflowDb } from '../db/workflow-db.ts';
|
|
7
8
|
import type { ExpressionContext } from '../expression/evaluator.ts';
|
|
8
9
|
import { ExpressionEvaluator } from '../expression/evaluator.ts';
|
|
9
10
|
import type { LlmStep, PlanStep, Step, Workflow, WorkflowStep } from '../parser/schema.ts';
|
|
@@ -18,8 +19,9 @@ import { formatSchemaErrors, validateJsonSchema } from '../utils/schema-validato
|
|
|
18
19
|
import { WorkflowRegistry } from '../utils/workflow-registry.ts';
|
|
19
20
|
import type { EventHandler, StepPhase, WorkflowEvent } from './events.ts';
|
|
20
21
|
import { ForeachExecutor } from './executors/foreach-executor.ts';
|
|
21
|
-
import {
|
|
22
|
-
import {
|
|
22
|
+
import type { RunnerFactory } from './executors/subworkflow-executor.ts';
|
|
23
|
+
import { executeSubWorkflow } from './executors/subworkflow-executor.ts';
|
|
24
|
+
import { type LLMMessage, getEmbeddingModel, getModel } from './llm-adapter.ts';
|
|
23
25
|
import { MCPManager } from './mcp-manager.ts';
|
|
24
26
|
import { ResourcePoolManager } from './resource-pool.ts';
|
|
25
27
|
import { withRetry } from './retry.ts';
|
|
@@ -35,6 +37,7 @@ import {
|
|
|
35
37
|
import { withTimeout } from './timeout.ts';
|
|
36
38
|
import { WorkflowScheduler } from './workflow-scheduler.ts';
|
|
37
39
|
import { type ForeachStepContext, type StepContext, WorkflowState } from './workflow-state.ts';
|
|
40
|
+
import { formatTimingSummary, formatTokenUsageSummary } from './workflow-summary.ts';
|
|
38
41
|
|
|
39
42
|
/**
|
|
40
43
|
* A logger wrapper that redacts secrets from all log messages
|
|
@@ -111,7 +114,7 @@ export interface RunOptions {
|
|
|
111
114
|
dryRun?: boolean;
|
|
112
115
|
debug?: boolean;
|
|
113
116
|
dedup?: boolean;
|
|
114
|
-
|
|
117
|
+
|
|
115
118
|
executeStep?: typeof executeStep;
|
|
116
119
|
executeLlmStep?: typeof import('./executors/llm-executor.ts').executeLlmStep;
|
|
117
120
|
depth?: number;
|
|
@@ -140,7 +143,9 @@ export class WorkflowRunner {
|
|
|
140
143
|
private _runId!: string;
|
|
141
144
|
private state!: WorkflowState;
|
|
142
145
|
private scheduler!: WorkflowScheduler;
|
|
146
|
+
private stepMap: Map<string, Step> = new Map();
|
|
143
147
|
private inputs!: Record<string, unknown>;
|
|
148
|
+
|
|
144
149
|
private secretManager: SecretManager;
|
|
145
150
|
private contextBuilder!: ContextBuilder;
|
|
146
151
|
private validator!: WorkflowValidator;
|
|
@@ -161,6 +166,7 @@ export class WorkflowRunner {
|
|
|
161
166
|
private abortController = new AbortController();
|
|
162
167
|
private resourcePool!: ResourcePoolManager;
|
|
163
168
|
private restored = false;
|
|
169
|
+
private stepEvents: WorkflowEvent[] = [];
|
|
164
170
|
|
|
165
171
|
/**
|
|
166
172
|
* Get the abort signal for cancellation checks
|
|
@@ -199,7 +205,9 @@ export class WorkflowRunner {
|
|
|
199
205
|
|
|
200
206
|
constructor(workflow: Workflow, options: RunOptions = {}) {
|
|
201
207
|
this.workflow = workflow;
|
|
208
|
+
this.stepMap = new Map(workflow.steps.map((s) => [s.id, s]));
|
|
202
209
|
this.options = options;
|
|
210
|
+
|
|
203
211
|
this.depth = options.depth || 0;
|
|
204
212
|
|
|
205
213
|
if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
|
|
@@ -544,7 +552,7 @@ export class WorkflowRunner {
|
|
|
544
552
|
const data = {
|
|
545
553
|
type: step.type,
|
|
546
554
|
inputs,
|
|
547
|
-
env: step.env,
|
|
555
|
+
env: 'env' in step ? step.env : undefined,
|
|
548
556
|
version: 2, // Cache versioning
|
|
549
557
|
};
|
|
550
558
|
|
|
@@ -601,7 +609,8 @@ export class WorkflowRunner {
|
|
|
601
609
|
if (!step.if) return false;
|
|
602
610
|
|
|
603
611
|
try {
|
|
604
|
-
|
|
612
|
+
if (typeof step.if === 'boolean') return !step.if;
|
|
613
|
+
return !this.evaluateCondition(step.if as string, context);
|
|
605
614
|
} catch (error) {
|
|
606
615
|
throw new Error(
|
|
607
616
|
`Failed to evaluate condition for step "${step.id}": ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -911,7 +920,6 @@ export class WorkflowRunner {
|
|
|
911
920
|
stepExecutionId: stepExecId,
|
|
912
921
|
artifactRoot: this.options.artifactRoot,
|
|
913
922
|
redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
|
|
914
|
-
getAdapter: this.options.getAdapter,
|
|
915
923
|
executeStep: this.options.executeStep || executeStep,
|
|
916
924
|
executeLlmStep: this.options.executeLlmStep,
|
|
917
925
|
emitEvent: this.emitEvent.bind(this),
|
|
@@ -1272,7 +1280,7 @@ export class WorkflowRunner {
|
|
|
1272
1280
|
};
|
|
1273
1281
|
|
|
1274
1282
|
return this.executeStepInternal(
|
|
1275
|
-
newStep,
|
|
1283
|
+
newStep as Step,
|
|
1276
1284
|
nextContext,
|
|
1277
1285
|
stepExecId,
|
|
1278
1286
|
idempotencyContextForRetry
|
|
@@ -1321,7 +1329,7 @@ export class WorkflowRunner {
|
|
|
1321
1329
|
};
|
|
1322
1330
|
|
|
1323
1331
|
return this.executeStepInternal(
|
|
1324
|
-
newStep,
|
|
1332
|
+
newStep as Step,
|
|
1325
1333
|
nextContext,
|
|
1326
1334
|
stepExecId,
|
|
1327
1335
|
idempotencyContextForRetry
|
|
@@ -1512,32 +1520,48 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
|
|
|
1512
1520
|
result: StepResult,
|
|
1513
1521
|
_context: ExpressionContext
|
|
1514
1522
|
): Promise<void> {
|
|
1515
|
-
const
|
|
1516
|
-
const
|
|
1517
|
-
if (!adapter.embed) return;
|
|
1523
|
+
const config = ConfigLoader.load();
|
|
1524
|
+
const modelName = config.embedding_model;
|
|
1518
1525
|
|
|
1519
|
-
|
|
1520
|
-
// For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
|
|
1526
|
+
if (!modelName) return;
|
|
1521
1527
|
|
|
1522
|
-
//
|
|
1523
|
-
|
|
1528
|
+
// Resolve dimension
|
|
1529
|
+
const providerName = ConfigLoader.getProviderForModel(modelName);
|
|
1530
|
+
const providerConfig = config.providers[providerName];
|
|
1531
|
+
const dimension = providerConfig?.embedding_dimension || config.embedding_dimension || 384;
|
|
1524
1532
|
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1533
|
+
// We reuse or create a specialized learning memory DB if needed,
|
|
1534
|
+
// but here we ensure the dimension is passed correctly.
|
|
1535
|
+
// If this.memoryDb is already shared, it might need to be re-initialized if it's the wrong dimension.
|
|
1536
|
+
// For now, we assume the shared memoryDb in runner is initialized with correct dimension or we pass it.
|
|
1537
|
+
const memoryDb = this.memoryDb;
|
|
1538
|
+
|
|
1539
|
+
// Combine input context (if relevant) and output
|
|
1540
|
+
// For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
|
|
1541
|
+
let textToEmbed = `Step: ${step.id}\n`;
|
|
1542
|
+
if (step.type === 'llm' || step.type === 'plan' || step.type === 'dynamic') {
|
|
1543
|
+
const goalOrPrompt = 'goal' in step ? step.goal : 'prompt' in step ? step.prompt : '';
|
|
1544
|
+
textToEmbed += `Goal: ${goalOrPrompt}\n`;
|
|
1529
1545
|
}
|
|
1530
1546
|
|
|
1531
1547
|
textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
|
|
1532
1548
|
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
workflow: this.workflow.name,
|
|
1537
|
-
timestamp: new Date().toISOString(),
|
|
1538
|
-
});
|
|
1549
|
+
try {
|
|
1550
|
+
const model = await getEmbeddingModel(modelName);
|
|
1551
|
+
const { embedding } = await embed({ model, value: textToEmbed });
|
|
1539
1552
|
|
|
1540
|
-
|
|
1553
|
+
await memoryDb.store(textToEmbed, embedding, {
|
|
1554
|
+
stepId: step.id,
|
|
1555
|
+
workflow: this.workflow.name,
|
|
1556
|
+
timestamp: new Date().toISOString(),
|
|
1557
|
+
});
|
|
1558
|
+
|
|
1559
|
+
this.logger.log(` ✨ Learned from step ${step.id}`);
|
|
1560
|
+
} catch (err) {
|
|
1561
|
+
this.logger.warn(
|
|
1562
|
+
` ⚠ Failed to embed/store step learning: ${err instanceof Error ? err.message : String(err)}`
|
|
1563
|
+
);
|
|
1564
|
+
}
|
|
1541
1565
|
}
|
|
1542
1566
|
|
|
1543
1567
|
/**
|
|
@@ -1582,12 +1606,14 @@ Please provide the fixed step configuration as JSON.`;
|
|
|
1582
1606
|
|
|
1583
1607
|
// Use the default model (gpt-4o) or configured default for the Mechanic
|
|
1584
1608
|
// We'll use gpt-4o as a strong default for this reasoning task
|
|
1585
|
-
const
|
|
1586
|
-
const { adapter } = getAdapterFn('gpt-4o');
|
|
1609
|
+
const model = await getModel('gpt-4o');
|
|
1587
1610
|
|
|
1588
|
-
const
|
|
1611
|
+
const { text } = await generateText({
|
|
1612
|
+
model,
|
|
1613
|
+
messages: messages as any, // Cast to AI SDK messages
|
|
1614
|
+
});
|
|
1589
1615
|
|
|
1590
|
-
return extractJson(
|
|
1616
|
+
return extractJson(text || '{}') as Partial<Step>;
|
|
1591
1617
|
}
|
|
1592
1618
|
|
|
1593
1619
|
/**
|
|
@@ -1770,7 +1796,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
1770
1796
|
runId: this.runId,
|
|
1771
1797
|
artifactRoot: this.options.artifactRoot,
|
|
1772
1798
|
redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
|
|
1773
|
-
getAdapter: this.options.getAdapter,
|
|
1774
1799
|
executeStep: this.options.executeStep || executeStep,
|
|
1775
1800
|
emitEvent: this.emitEvent.bind(this),
|
|
1776
1801
|
workflowName: this.workflow.name,
|
|
@@ -1834,7 +1859,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
1834
1859
|
runId: this.runId,
|
|
1835
1860
|
artifactRoot: this.options.artifactRoot,
|
|
1836
1861
|
redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
|
|
1837
|
-
|
|
1862
|
+
|
|
1838
1863
|
executeStep: this.options.executeStep || executeStep,
|
|
1839
1864
|
emitEvent: this.emitEvent.bind(this),
|
|
1840
1865
|
workflowName: this.workflow.name,
|
|
@@ -1951,6 +1976,12 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
1951
1976
|
try {
|
|
1952
1977
|
const redactor = this.secretManager.getRedactor();
|
|
1953
1978
|
const redacted = redactor.redactValue(event) as WorkflowEvent;
|
|
1979
|
+
|
|
1980
|
+
// Track step.end events for summary generation
|
|
1981
|
+
if (redacted.type === 'step.end') {
|
|
1982
|
+
this.stepEvents.push(redacted);
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1954
1985
|
if (redacted.type === 'llm.thought') {
|
|
1955
1986
|
void this.db
|
|
1956
1987
|
.storeThoughtEvent(
|
|
@@ -2196,6 +2227,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
2196
2227
|
this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
|
|
2197
2228
|
} catch (error) {
|
|
2198
2229
|
this.emitStepEnd(step, 'main', startedAt, error, stepIndex, totalSteps);
|
|
2230
|
+
this.scheduler.markStepFailed(stepId);
|
|
2199
2231
|
throw error;
|
|
2200
2232
|
} finally {
|
|
2201
2233
|
if (typeof release === 'function') {
|
|
@@ -2222,7 +2254,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
2222
2254
|
// 3. Wait for at least one step to finish before checking again
|
|
2223
2255
|
if (runningPromises.size > 0) {
|
|
2224
2256
|
await Promise.race(runningPromises.values());
|
|
2225
|
-
// Yield to event loop to prevent tight loop if multiple steps finish in same tick
|
|
2226
2257
|
await Bun.sleep(0);
|
|
2227
2258
|
}
|
|
2228
2259
|
}
|
|
@@ -2243,7 +2274,18 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
2243
2274
|
throw error;
|
|
2244
2275
|
}
|
|
2245
2276
|
|
|
2277
|
+
// Final check for failed steps before success update
|
|
2278
|
+
for (const [id, ctx] of this.state.entries()) {
|
|
2279
|
+
if (ctx.status === StepStatus.FAILED) {
|
|
2280
|
+
const step = this.stepMap.get(id);
|
|
2281
|
+
if (!step?.allowFailure) {
|
|
2282
|
+
throw new Error(ctx.error || `Step ${id} failed`);
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2246
2287
|
// Evaluate outputs
|
|
2288
|
+
|
|
2247
2289
|
const outputs = this.evaluateOutputs();
|
|
2248
2290
|
|
|
2249
2291
|
// Mark run as complete
|
|
@@ -2253,7 +2295,22 @@ Revise the output to address the feedback. Return only the corrected output.`;
|
|
|
2253
2295
|
this.secretManager.redactForStorage(outputs)
|
|
2254
2296
|
);
|
|
2255
2297
|
|
|
2256
|
-
this.logger.log('✨ Workflow completed successfully
|
|
2298
|
+
this.logger.log('✨ Workflow completed successfully!');
|
|
2299
|
+
|
|
2300
|
+
// Display timing summary
|
|
2301
|
+
const timingSummary = formatTimingSummary(this.stepEvents);
|
|
2302
|
+
if (timingSummary) {
|
|
2303
|
+
this.logger.log(timingSummary);
|
|
2304
|
+
}
|
|
2305
|
+
|
|
2306
|
+
// Display token usage summary
|
|
2307
|
+
const steps = await this.db.getStepsByRun(this.runId);
|
|
2308
|
+
const tokenSummary = formatTokenUsageSummary(steps);
|
|
2309
|
+
if (tokenSummary) {
|
|
2310
|
+
this.logger.log(tokenSummary);
|
|
2311
|
+
}
|
|
2312
|
+
|
|
2313
|
+
this.logger.log('');
|
|
2257
2314
|
|
|
2258
2315
|
completionEvent = {
|
|
2259
2316
|
type: 'workflow.complete',
|