keystone-cli 1.3.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +127 -140
  2. package/package.json +6 -3
  3. package/src/cli.ts +54 -369
  4. package/src/commands/init.ts +15 -29
  5. package/src/db/memory-db.test.ts +45 -0
  6. package/src/db/memory-db.ts +47 -21
  7. package/src/db/sqlite-setup.ts +26 -3
  8. package/src/db/workflow-db.ts +12 -5
  9. package/src/parser/config-schema.ts +17 -13
  10. package/src/parser/schema.ts +4 -2
  11. package/src/runner/__test__/llm-mock-setup.ts +173 -0
  12. package/src/runner/__test__/llm-test-setup.ts +271 -0
  13. package/src/runner/engine-executor.test.ts +25 -18
  14. package/src/runner/executors/blueprint-executor.ts +0 -1
  15. package/src/runner/executors/dynamic-executor.ts +11 -6
  16. package/src/runner/executors/engine-executor.ts +5 -1
  17. package/src/runner/executors/llm-executor.ts +502 -1033
  18. package/src/runner/executors/memory-executor.ts +35 -19
  19. package/src/runner/executors/plan-executor.ts +0 -1
  20. package/src/runner/executors/types.ts +4 -4
  21. package/src/runner/llm-adapter.integration.test.ts +151 -0
  22. package/src/runner/llm-adapter.ts +270 -1398
  23. package/src/runner/llm-clarification.test.ts +91 -106
  24. package/src/runner/llm-executor.test.ts +217 -1181
  25. package/src/runner/memoization.test.ts +0 -1
  26. package/src/runner/recovery-security.test.ts +51 -20
  27. package/src/runner/reflexion.test.ts +55 -18
  28. package/src/runner/standard-tools-integration.test.ts +137 -87
  29. package/src/runner/step-executor.test.ts +36 -80
  30. package/src/runner/step-executor.ts +0 -2
  31. package/src/runner/test-harness.ts +3 -29
  32. package/src/runner/tool-integration.test.ts +122 -73
  33. package/src/runner/workflow-runner.ts +110 -49
  34. package/src/runner/workflow-scheduler.ts +11 -1
  35. package/src/runner/workflow-summary.ts +144 -0
  36. package/src/utils/auth-manager.test.ts +10 -520
  37. package/src/utils/auth-manager.ts +3 -756
  38. package/src/utils/config-loader.ts +12 -0
  39. package/src/utils/constants.ts +0 -17
  40. package/src/utils/process-sandbox.ts +15 -3
  41. package/src/runner/llm-adapter-runtime.test.ts +0 -209
  42. package/src/runner/llm-adapter.test.ts +0 -1012
@@ -2,8 +2,9 @@ import { createHash, randomUUID } from 'node:crypto';
2
2
  import * as fs from 'node:fs';
3
3
  import * as path from 'node:path';
4
4
  import { dirname, join } from 'node:path';
5
+ import { embed, generateText } from 'ai';
5
6
  import { MemoryDb } from '../db/memory-db.ts';
6
- import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
7
+ import { type RunStatus, type StepExecution, WorkflowDb } from '../db/workflow-db.ts';
7
8
  import type { ExpressionContext } from '../expression/evaluator.ts';
8
9
  import { ExpressionEvaluator } from '../expression/evaluator.ts';
9
10
  import type { LlmStep, PlanStep, Step, Workflow, WorkflowStep } from '../parser/schema.ts';
@@ -18,8 +19,9 @@ import { formatSchemaErrors, validateJsonSchema } from '../utils/schema-validato
18
19
  import { WorkflowRegistry } from '../utils/workflow-registry.ts';
19
20
  import type { EventHandler, StepPhase, WorkflowEvent } from './events.ts';
20
21
  import { ForeachExecutor } from './executors/foreach-executor.ts';
21
- import { type RunnerFactory, executeSubWorkflow } from './executors/subworkflow-executor.ts';
22
- import { type LLMMessage, getAdapter } from './llm-adapter.ts';
22
+ import type { RunnerFactory } from './executors/subworkflow-executor.ts';
23
+ import { executeSubWorkflow } from './executors/subworkflow-executor.ts';
24
+ import { type LLMMessage, getEmbeddingModel, getModel } from './llm-adapter.ts';
23
25
  import { MCPManager } from './mcp-manager.ts';
24
26
  import { ResourcePoolManager } from './resource-pool.ts';
25
27
  import { withRetry } from './retry.ts';
@@ -35,6 +37,7 @@ import {
35
37
  import { withTimeout } from './timeout.ts';
36
38
  import { WorkflowScheduler } from './workflow-scheduler.ts';
37
39
  import { type ForeachStepContext, type StepContext, WorkflowState } from './workflow-state.ts';
40
+ import { formatTimingSummary, formatTokenUsageSummary } from './workflow-summary.ts';
38
41
 
39
42
  /**
40
43
  * A logger wrapper that redacts secrets from all log messages
@@ -43,7 +46,7 @@ class RedactingLogger implements Logger {
43
46
  constructor(
44
47
  private inner: Logger,
45
48
  private redactor: Redactor
46
- ) {}
49
+ ) { }
47
50
 
48
51
  log(msg: string): void {
49
52
  this.inner.log(this.redactor.redact(msg));
@@ -111,7 +114,7 @@ export interface RunOptions {
111
114
  dryRun?: boolean;
112
115
  debug?: boolean;
113
116
  dedup?: boolean;
114
- getAdapter?: typeof getAdapter;
117
+
115
118
  executeStep?: typeof executeStep;
116
119
  executeLlmStep?: typeof import('./executors/llm-executor.ts').executeLlmStep;
117
120
  depth?: number;
@@ -140,7 +143,9 @@ export class WorkflowRunner {
140
143
  private _runId!: string;
141
144
  private state!: WorkflowState;
142
145
  private scheduler!: WorkflowScheduler;
146
+ private stepMap: Map<string, Step> = new Map();
143
147
  private inputs!: Record<string, unknown>;
148
+
144
149
  private secretManager: SecretManager;
145
150
  private contextBuilder!: ContextBuilder;
146
151
  private validator!: WorkflowValidator;
@@ -161,6 +166,7 @@ export class WorkflowRunner {
161
166
  private abortController = new AbortController();
162
167
  private resourcePool!: ResourcePoolManager;
163
168
  private restored = false;
169
+ private stepEvents: WorkflowEvent[] = [];
164
170
 
165
171
  /**
166
172
  * Get the abort signal for cancellation checks
@@ -187,7 +193,7 @@ export class WorkflowRunner {
187
193
 
188
194
  if (parentSignal.aborted) {
189
195
  controller.abort();
190
- return { controller, cleanup: () => {} };
196
+ return { controller, cleanup: () => { } };
191
197
  }
192
198
 
193
199
  parentSignal.addEventListener('abort', onAbort, { once: true });
@@ -199,7 +205,9 @@ export class WorkflowRunner {
199
205
 
200
206
  constructor(workflow: Workflow, options: RunOptions = {}) {
201
207
  this.workflow = workflow;
208
+ this.stepMap = new Map(workflow.steps.map((s) => [s.id, s]));
202
209
  this.options = options;
210
+
203
211
  this.depth = options.depth || 0;
204
212
 
205
213
  if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
@@ -544,7 +552,7 @@ export class WorkflowRunner {
544
552
  const data = {
545
553
  type: step.type,
546
554
  inputs,
547
- env: step.env,
555
+ env: 'env' in step ? step.env : undefined,
548
556
  version: 2, // Cache versioning
549
557
  };
550
558
 
@@ -601,7 +609,8 @@ export class WorkflowRunner {
601
609
  if (!step.if) return false;
602
610
 
603
611
  try {
604
- return !this.evaluateCondition(step.if, context);
612
+ if (typeof step.if === 'boolean') return !step.if;
613
+ return !this.evaluateCondition(step.if as string, context);
605
614
  } catch (error) {
606
615
  throw new Error(
607
616
  `Failed to evaluate condition for step "${step.id}": ${error instanceof Error ? error.message : String(error)}`
@@ -807,11 +816,11 @@ export class WorkflowRunner {
807
816
  const idempotencyContextForRetry =
808
817
  idempotencyClaimed && scopedIdempotencyKey
809
818
  ? {
810
- rawKey: idempotencyKey || scopedIdempotencyKey,
811
- scopedKey: scopedIdempotencyKey,
812
- ttlSeconds: idempotencyTtlSeconds,
813
- claimed: true,
814
- }
819
+ rawKey: idempotencyKey || scopedIdempotencyKey,
820
+ scopedKey: scopedIdempotencyKey,
821
+ ttlSeconds: idempotencyTtlSeconds,
822
+ claimed: true,
823
+ }
815
824
  : undefined;
816
825
 
817
826
  let stepToExecute = step;
@@ -911,7 +920,6 @@ export class WorkflowRunner {
911
920
  stepExecutionId: stepExecId,
912
921
  artifactRoot: this.options.artifactRoot,
913
922
  redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
914
- getAdapter: this.options.getAdapter,
915
923
  executeStep: this.options.executeStep || executeStep,
916
924
  executeLlmStep: this.options.executeLlmStep,
917
925
  emitEvent: this.emitEvent.bind(this),
@@ -924,9 +932,9 @@ export class WorkflowRunner {
924
932
  try {
925
933
  const outputForValidation =
926
934
  stepToExecute.type === 'engine' &&
927
- result.output &&
928
- typeof result.output === 'object' &&
929
- 'summary' in result.output
935
+ result.output &&
936
+ typeof result.output === 'object' &&
937
+ 'summary' in result.output
930
938
  ? (result.output as { summary?: unknown }).summary
931
939
  : result.output;
932
940
  this.validator.validateSchema(
@@ -1272,7 +1280,7 @@ export class WorkflowRunner {
1272
1280
  };
1273
1281
 
1274
1282
  return this.executeStepInternal(
1275
- newStep,
1283
+ newStep as Step,
1276
1284
  nextContext,
1277
1285
  stepExecId,
1278
1286
  idempotencyContextForRetry
@@ -1321,7 +1329,7 @@ export class WorkflowRunner {
1321
1329
  };
1322
1330
 
1323
1331
  return this.executeStepInternal(
1324
- newStep,
1332
+ newStep as Step,
1325
1333
  nextContext,
1326
1334
  stepExecId,
1327
1335
  idempotencyContextForRetry
@@ -1512,32 +1520,48 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
1512
1520
  result: StepResult,
1513
1521
  _context: ExpressionContext
1514
1522
  ): Promise<void> {
1515
- const getAdapterFn = this.options.getAdapter || getAdapter;
1516
- const { adapter } = getAdapterFn('local'); // Default for embedding
1517
- if (!adapter.embed) return;
1523
+ const config = ConfigLoader.load();
1524
+ const modelName = config.embedding_model;
1518
1525
 
1519
- // Combine input context (if relevant) and output
1520
- // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
1526
+ if (!modelName) return;
1527
+
1528
+ // Resolve dimension
1529
+ const providerName = ConfigLoader.getProviderForModel(modelName);
1530
+ const providerConfig = config.providers[providerName];
1531
+ const dimension = providerConfig?.embedding_dimension || config.embedding_dimension || 384;
1521
1532
 
1522
- // We can try to construct a summary of what happened
1523
- let textToEmbed = `Step ID: ${step.id} (${step.type})\n`;
1533
+ // We reuse or create a specialized learning memory DB if needed,
1534
+ // but here we ensure the dimension is passed correctly.
1535
+ // If this.memoryDb is already shared, it might need to be re-initialized if it's the wrong dimension.
1536
+ // For now, we assume the shared memoryDb in runner is initialized with correct dimension or we pass it.
1537
+ const memoryDb = this.memoryDb;
1524
1538
 
1525
- if (step.type === 'llm') {
1526
- textToEmbed += `Task Context/Prompt:\n${(step as LlmStep).prompt}\n\n`;
1527
- } else if (step.type === 'shell') {
1528
- textToEmbed += `Command:\n${(step as unknown as { run: string }).run}\n\n`;
1539
+ // Combine input context (if relevant) and output
1540
+ // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
1541
+ let textToEmbed = `Step: ${step.id}\n`;
1542
+ if (step.type === 'llm' || step.type === 'plan' || step.type === 'dynamic') {
1543
+ const goalOrPrompt = 'goal' in step ? step.goal : 'prompt' in step ? step.prompt : '';
1544
+ textToEmbed += `Goal: ${goalOrPrompt}\n`;
1529
1545
  }
1530
1546
 
1531
1547
  textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
1532
1548
 
1533
- const embedding = await adapter.embed(textToEmbed, 'local');
1534
- await this.memoryDb.store(textToEmbed, embedding, {
1535
- stepId: step.id,
1536
- workflow: this.workflow.name,
1537
- timestamp: new Date().toISOString(),
1538
- });
1549
+ try {
1550
+ const model = await getEmbeddingModel(modelName);
1551
+ const { embedding } = await embed({ model, value: textToEmbed });
1552
+
1553
+ await memoryDb.store(textToEmbed, embedding, {
1554
+ stepId: step.id,
1555
+ workflow: this.workflow.name,
1556
+ timestamp: new Date().toISOString(),
1557
+ });
1539
1558
 
1540
- this.logger.log(` ✨ Learned from step ${step.id}`);
1559
+ this.logger.log(` ✨ Learned from step ${step.id}`);
1560
+ } catch (err) {
1561
+ this.logger.warn(
1562
+ ` ⚠ Failed to embed/store step learning: ${err instanceof Error ? err.message : String(err)}`
1563
+ );
1564
+ }
1541
1565
  }
1542
1566
 
1543
1567
  /**
@@ -1582,12 +1606,14 @@ Please provide the fixed step configuration as JSON.`;
1582
1606
 
1583
1607
  // Use the default model (gpt-4o) or configured default for the Mechanic
1584
1608
  // We'll use gpt-4o as a strong default for this reasoning task
1585
- const getAdapterFn = this.options.getAdapter || getAdapter;
1586
- const { adapter } = getAdapterFn('gpt-4o');
1609
+ const model = await getModel('gpt-4o');
1587
1610
 
1588
- const response = await adapter.chat(messages);
1611
+ const { text } = await generateText({
1612
+ model,
1613
+ messages: messages as any, // Cast to AI SDK messages
1614
+ });
1589
1615
 
1590
- return extractJson(response.message.content || '{}') as Partial<Step>;
1616
+ return extractJson(text || '{}') as Partial<Step>;
1591
1617
  }
1592
1618
 
1593
1619
  /**
@@ -1770,7 +1796,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
1770
1796
  runId: this.runId,
1771
1797
  artifactRoot: this.options.artifactRoot,
1772
1798
  redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
1773
- getAdapter: this.options.getAdapter,
1774
1799
  executeStep: this.options.executeStep || executeStep,
1775
1800
  emitEvent: this.emitEvent.bind(this),
1776
1801
  workflowName: this.workflow.name,
@@ -1834,7 +1859,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
1834
1859
  runId: this.runId,
1835
1860
  artifactRoot: this.options.artifactRoot,
1836
1861
  redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
1837
- getAdapter: this.options.getAdapter,
1862
+
1838
1863
  executeStep: this.options.executeStep || executeStep,
1839
1864
  emitEvent: this.emitEvent.bind(this),
1840
1865
  workflowName: this.workflow.name,
@@ -1951,6 +1976,12 @@ Revise the output to address the feedback. Return only the corrected output.`;
1951
1976
  try {
1952
1977
  const redactor = this.secretManager.getRedactor();
1953
1978
  const redacted = redactor.redactValue(event) as WorkflowEvent;
1979
+
1980
+ // Track step.end events for summary generation
1981
+ if (redacted.type === 'step.end') {
1982
+ this.stepEvents.push(redacted);
1983
+ }
1984
+
1954
1985
  if (redacted.type === 'llm.thought') {
1955
1986
  void this.db
1956
1987
  .storeThoughtEvent(
@@ -2045,12 +2076,16 @@ Revise the output to address the feedback. Return only the corrected output.`;
2045
2076
 
2046
2077
  this.logger.log(`\n🏛️ ${isResume ? 'Resuming' : 'Running'} workflow: ${this.workflow.name}`);
2047
2078
  this.logger.log(`Run ID: ${this.runId}`);
2048
- this.logger.log(
2049
- '\n⚠️ Security Warning: Only run workflows from trusted sources.\n' +
2079
+
2080
+ const config = ConfigLoader.load();
2081
+ if (!config.logging?.suppress_security_warning) {
2082
+ this.logger.log(
2083
+ '\n⚠️ Security Warning: Only run workflows from trusted sources.\n' +
2050
2084
  ' Workflows can execute arbitrary shell commands and access your environment.\n'
2051
- );
2085
+ );
2086
+ }
2052
2087
 
2053
- this.secretManager.redactAtRest = ConfigLoader.load().storage?.redact_secrets_at_rest ?? true;
2088
+ this.secretManager.redactAtRest = config.storage?.redact_secrets_at_rest ?? true;
2054
2089
 
2055
2090
  // Apply defaults and validate inputs
2056
2091
  const validated = this.validator.applyDefaultsAndValidate();
@@ -2196,6 +2231,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
2196
2231
  this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
2197
2232
  } catch (error) {
2198
2233
  this.emitStepEnd(step, 'main', startedAt, error, stepIndex, totalSteps);
2234
+ this.scheduler.markStepFailed(stepId);
2199
2235
  throw error;
2200
2236
  } finally {
2201
2237
  if (typeof release === 'function') {
@@ -2222,7 +2258,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
2222
2258
  // 3. Wait for at least one step to finish before checking again
2223
2259
  if (runningPromises.size > 0) {
2224
2260
  await Promise.race(runningPromises.values());
2225
- // Yield to event loop to prevent tight loop if multiple steps finish in same tick
2226
2261
  await Bun.sleep(0);
2227
2262
  }
2228
2263
  }
@@ -2243,7 +2278,18 @@ Revise the output to address the feedback. Return only the corrected output.`;
2243
2278
  throw error;
2244
2279
  }
2245
2280
 
2281
+ // Final check for failed steps before success update
2282
+ for (const [id, ctx] of this.state.entries()) {
2283
+ if (ctx.status === StepStatus.FAILED) {
2284
+ const step = this.stepMap.get(id);
2285
+ if (!step?.allowFailure) {
2286
+ throw new Error(ctx.error || `Step ${id} failed`);
2287
+ }
2288
+ }
2289
+ }
2290
+
2246
2291
  // Evaluate outputs
2292
+
2247
2293
  const outputs = this.evaluateOutputs();
2248
2294
 
2249
2295
  // Mark run as complete
@@ -2253,7 +2299,22 @@ Revise the output to address the feedback. Return only the corrected output.`;
2253
2299
  this.secretManager.redactForStorage(outputs)
2254
2300
  );
2255
2301
 
2256
- this.logger.log('✨ Workflow completed successfully!\n');
2302
+ this.logger.log('✨ Workflow completed successfully!');
2303
+
2304
+ // Display timing summary
2305
+ const timingSummary = formatTimingSummary(this.stepEvents);
2306
+ if (timingSummary) {
2307
+ this.logger.log(timingSummary);
2308
+ }
2309
+
2310
+ // Display token usage summary
2311
+ const steps = await this.db.getStepsByRun(this.runId);
2312
+ const tokenSummary = formatTokenUsageSummary(steps);
2313
+ if (tokenSummary) {
2314
+ this.logger.log(tokenSummary);
2315
+ }
2316
+
2317
+ this.logger.log('');
2257
2318
 
2258
2319
  completionEvent = {
2259
2320
  type: 'workflow.complete',
@@ -4,6 +4,7 @@ import { WorkflowParser } from '../parser/workflow-parser.ts';
4
4
  export class WorkflowScheduler {
5
5
  private executionOrder: string[];
6
6
  private pendingSteps: Set<string>;
7
+ private runningSteps: Set<string>;
7
8
  private completedSteps: Set<string>;
8
9
  private stepMap: Map<string, Step>;
9
10
 
@@ -20,6 +21,7 @@ export class WorkflowScheduler {
20
21
  // Remaining steps to execute
21
22
  const remaining = this.executionOrder.filter((id) => !this.completedSteps.has(id));
22
23
  this.pendingSteps = new Set(remaining);
24
+ this.runningSteps = new Set();
23
25
  }
24
26
 
25
27
  public getExecutionOrder(): string[] {
@@ -31,12 +33,13 @@ export class WorkflowScheduler {
31
33
  }
32
34
 
33
35
  public isComplete(): boolean {
34
- return this.pendingSteps.size === 0;
36
+ return this.pendingSteps.size === 0 && this.runningSteps.size === 0;
35
37
  }
36
38
 
37
39
  public markStepComplete(stepId: string): void {
38
40
  this.completedSteps.add(stepId);
39
41
  this.pendingSteps.delete(stepId);
42
+ this.runningSteps.delete(stepId);
40
43
  }
41
44
 
42
45
  public getRunnableSteps(runningCount: number, globalConcurrencyLimit: number): Step[] {
@@ -60,6 +63,13 @@ export class WorkflowScheduler {
60
63
 
61
64
  public startStep(stepId: string): void {
62
65
  this.pendingSteps.delete(stepId);
66
+ this.runningSteps.add(stepId);
67
+ }
68
+
69
+ public markStepFailed(stepId: string): void {
70
+ this.runningSteps.delete(stepId);
71
+ // Note: We don't add back to pending; it's failed.
72
+ // Resume will handle restoring state and scheduler will see it's not completed.
63
73
  }
64
74
 
65
75
  private isStepReady(step: Step): boolean {
@@ -0,0 +1,144 @@
1
+ import type { StepExecution } from '../db/workflow-db';
2
+ import type { WorkflowEvent } from './events';
3
+
4
+ /**
5
+ * Format a duration in milliseconds to a human-readable string
6
+ */
7
+ function formatDuration(ms: number): string {
8
+ if (ms < 1000) {
9
+ return `${Math.round(ms)}ms`;
10
+ }
11
+ return `${(ms / 1000).toFixed(1)}s`;
12
+ }
13
+
14
+ /**
15
+ * Format a number with comma separators
16
+ */
17
+ function formatNumber(num: number): string {
18
+ return num.toLocaleString('en-US');
19
+ }
20
+
21
+ interface StepTiming {
22
+ stepId: string;
23
+ stepType: string;
24
+ durationMs: number;
25
+ }
26
+
27
+ /**
28
+ * Extract timing information from step.end events
29
+ */
30
+ export function extractStepTimings(events: WorkflowEvent[]): StepTiming[] {
31
+ const timings: StepTiming[] = [];
32
+
33
+ for (const event of events) {
34
+ if (event.type === 'step.end' && event.phase === 'main' && event.durationMs !== undefined) {
35
+ timings.push({
36
+ stepId: event.stepId,
37
+ stepType: event.stepType,
38
+ durationMs: event.durationMs,
39
+ });
40
+ }
41
+ }
42
+
43
+ return timings;
44
+ }
45
+
46
+ /**
47
+ * Format timing summary from step events
48
+ */
49
+ export function formatTimingSummary(events: WorkflowEvent[]): string | null {
50
+ const timings = extractStepTimings(events);
51
+
52
+ if (timings.length === 0) {
53
+ return null;
54
+ }
55
+
56
+ const totalMs = timings.reduce((sum, t) => sum + t.durationMs, 0);
57
+
58
+ if (totalMs === 0) {
59
+ return null;
60
+ }
61
+
62
+ // Sort by duration descending
63
+ const sorted = timings.sort((a, b) => b.durationMs - a.durationMs);
64
+
65
+ const lines: string[] = [];
66
+ lines.push(`\n⏱️ Timing Summary (total: ${formatDuration(totalMs)})`);
67
+
68
+ for (const timing of sorted) {
69
+ const percentage = Math.round((timing.durationMs / totalMs) * 100);
70
+ lines.push(` • ${timing.stepId}: ${formatDuration(timing.durationMs)} (${percentage}%)`);
71
+ }
72
+
73
+ return lines.join('\n');
74
+ }
75
+
76
+ interface TokenUsage {
77
+ promptTokens: number;
78
+ completionTokens: number;
79
+ totalTokens: number;
80
+ }
81
+
82
+ /**
83
+ * Extract and aggregate token usage from step executions
84
+ */
85
+ export function aggregateTokenUsage(steps: StepExecution[]): TokenUsage | null {
86
+ let promptTokens = 0;
87
+ let completionTokens = 0;
88
+ let totalTokens = 0;
89
+ let hasUsage = false;
90
+
91
+ for (const step of steps) {
92
+ if (step.usage) {
93
+ try {
94
+ const usage = JSON.parse(step.usage);
95
+ if (usage.prompt_tokens !== undefined) {
96
+ promptTokens += usage.prompt_tokens || 0;
97
+ completionTokens += usage.completion_tokens || 0;
98
+ totalTokens += usage.total_tokens || 0;
99
+ hasUsage = true;
100
+ }
101
+ } catch {
102
+ // Ignore parse errors
103
+ }
104
+ }
105
+ }
106
+
107
+ return hasUsage ? { promptTokens, completionTokens, totalTokens } : null;
108
+ }
109
+
110
+ /**
111
+ * Estimate cost based on token usage
112
+ * Uses rough estimates for common models (GPT-4o pricing as baseline)
113
+ */
114
+ function estimateCost(usage: TokenUsage): string {
115
+ // Rough estimate: $2.50 per 1M input tokens, $10 per 1M output tokens (GPT-4o)
116
+ const inputCost = (usage.promptTokens / 1_000_000) * 2.5;
117
+ const outputCost = (usage.completionTokens / 1_000_000) * 10;
118
+ const total = inputCost + outputCost;
119
+
120
+ if (total < 0.01) {
121
+ return '<$0.01';
122
+ }
123
+ return `~$${total.toFixed(2)}`;
124
+ }
125
+
126
+ /**
127
+ * Format token usage summary from step executions
128
+ */
129
+ export function formatTokenUsageSummary(steps: StepExecution[]): string | null {
130
+ const usage = aggregateTokenUsage(steps);
131
+
132
+ if (!usage) {
133
+ return null;
134
+ }
135
+
136
+ const lines: string[] = [];
137
+ lines.push('\n📊 Token Usage');
138
+ lines.push(
139
+ ` • Input: ${formatNumber(usage.promptTokens)} | Output: ${formatNumber(usage.completionTokens)} | Total: ${formatNumber(usage.totalTokens)}`
140
+ );
141
+ lines.push(` • Estimated cost: ${estimateCost(usage)}`);
142
+
143
+ return lines.join('\n');
144
+ }