@sylphx/flow 1.8.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +72 -0
- package/assets/output-styles/silent.md +145 -8
- package/assets/rules/core.md +19 -2
- package/package.json +2 -12
- package/src/commands/flow/execute.ts +470 -0
- package/src/commands/flow/index.ts +11 -0
- package/src/commands/flow/prompt.ts +35 -0
- package/src/commands/flow/setup.ts +312 -0
- package/src/commands/flow/targets.ts +18 -0
- package/src/commands/flow/types.ts +47 -0
- package/src/commands/flow-command.ts +18 -967
- package/src/commands/flow-orchestrator.ts +14 -5
- package/src/commands/hook-command.ts +1 -1
- package/src/commands/init-core.ts +12 -3
- package/src/commands/run-command.ts +1 -1
- package/src/config/rules.ts +1 -1
- package/src/core/error-handling.ts +1 -1
- package/src/core/loop-controller.ts +1 -1
- package/src/core/state-detector.ts +1 -1
- package/src/core/target-manager.ts +1 -1
- package/src/index.ts +1 -1
- package/src/shared/files/index.ts +1 -1
- package/src/shared/processing/index.ts +1 -1
- package/src/targets/claude-code.ts +3 -3
- package/src/targets/opencode.ts +3 -3
- package/src/utils/agent-enhancer.ts +2 -2
- package/src/utils/{mcp-config.ts → config/mcp-config.ts} +4 -4
- package/src/utils/{paths.ts → config/paths.ts} +1 -1
- package/src/utils/{settings.ts → config/settings.ts} +1 -1
- package/src/utils/{target-config.ts → config/target-config.ts} +5 -5
- package/src/utils/{target-utils.ts → config/target-utils.ts} +3 -3
- package/src/utils/display/banner.ts +25 -0
- package/src/utils/display/status.ts +55 -0
- package/src/utils/{file-operations.ts → files/file-operations.ts} +2 -2
- package/src/utils/files/jsonc.ts +36 -0
- package/src/utils/{sync-utils.ts → files/sync-utils.ts} +3 -3
- package/src/utils/index.ts +42 -61
- package/src/utils/version.ts +47 -0
- package/src/components/benchmark-monitor.tsx +0 -331
- package/src/components/reindex-progress.tsx +0 -261
- package/src/composables/functional/index.ts +0 -14
- package/src/composables/functional/useEnvironment.ts +0 -171
- package/src/composables/functional/useFileSystem.ts +0 -139
- package/src/composables/index.ts +0 -4
- package/src/composables/useEnv.ts +0 -13
- package/src/composables/useRuntimeConfig.ts +0 -27
- package/src/core/ai-sdk.ts +0 -603
- package/src/core/app-factory.ts +0 -381
- package/src/core/builtin-agents.ts +0 -9
- package/src/core/command-system.ts +0 -550
- package/src/core/config-system.ts +0 -550
- package/src/core/connection-pool.ts +0 -390
- package/src/core/di-container.ts +0 -155
- package/src/core/headless-display.ts +0 -96
- package/src/core/interfaces/index.ts +0 -22
- package/src/core/interfaces/repository.interface.ts +0 -91
- package/src/core/interfaces/service.interface.ts +0 -133
- package/src/core/interfaces.ts +0 -96
- package/src/core/result.ts +0 -351
- package/src/core/service-config.ts +0 -252
- package/src/core/session-service.ts +0 -121
- package/src/core/storage-factory.ts +0 -115
- package/src/core/stream-handler.ts +0 -288
- package/src/core/type-utils.ts +0 -427
- package/src/core/unified-storage.ts +0 -456
- package/src/core/validation/limit.ts +0 -46
- package/src/core/validation/query.ts +0 -20
- package/src/db/auto-migrate.ts +0 -322
- package/src/db/base-database-client.ts +0 -144
- package/src/db/cache-db.ts +0 -218
- package/src/db/cache-schema.ts +0 -75
- package/src/db/database.ts +0 -70
- package/src/db/index.ts +0 -252
- package/src/db/memory-db.ts +0 -153
- package/src/db/memory-schema.ts +0 -29
- package/src/db/schema.ts +0 -289
- package/src/db/session-repository.ts +0 -733
- package/src/domains/index.ts +0 -6
- package/src/domains/utilities/index.ts +0 -6
- package/src/domains/utilities/time/index.ts +0 -5
- package/src/domains/utilities/time/tools.ts +0 -291
- package/src/services/agent-service.ts +0 -273
- package/src/services/evaluation-service.ts +0 -271
- package/src/services/functional/evaluation-logic.ts +0 -296
- package/src/services/functional/file-processor.ts +0 -273
- package/src/services/functional/index.ts +0 -12
- package/src/services/memory.service.ts +0 -476
- package/src/types/api/batch.ts +0 -108
- package/src/types/api/errors.ts +0 -118
- package/src/types/api/index.ts +0 -55
- package/src/types/api/requests.ts +0 -76
- package/src/types/api/responses.ts +0 -180
- package/src/types/api/websockets.ts +0 -85
- package/src/types/benchmark.ts +0 -49
- package/src/types/database.types.ts +0 -510
- package/src/types/memory-types.ts +0 -63
- package/src/utils/advanced-tokenizer.ts +0 -191
- package/src/utils/ai-model-fetcher.ts +0 -19
- package/src/utils/async-file-operations.ts +0 -516
- package/src/utils/audio-player.ts +0 -345
- package/src/utils/codebase-helpers.ts +0 -211
- package/src/utils/console-ui.ts +0 -79
- package/src/utils/database-errors.ts +0 -140
- package/src/utils/debug-logger.ts +0 -49
- package/src/utils/file-scanner.ts +0 -259
- package/src/utils/help.ts +0 -20
- package/src/utils/immutable-cache.ts +0 -106
- package/src/utils/jsonc.ts +0 -158
- package/src/utils/memory-tui.ts +0 -414
- package/src/utils/models-dev.ts +0 -91
- package/src/utils/parallel-operations.ts +0 -487
- package/src/utils/process-manager.ts +0 -155
- package/src/utils/prompts.ts +0 -120
- package/src/utils/search-tool-builder.ts +0 -214
- package/src/utils/session-manager.ts +0 -168
- package/src/utils/session-title.ts +0 -87
- package/src/utils/simplified-errors.ts +0 -410
- package/src/utils/template-engine.ts +0 -94
- package/src/utils/test-audio.ts +0 -71
- package/src/utils/todo-context.ts +0 -46
- package/src/utils/token-counter.ts +0 -288
- /package/src/utils/{cli-output.ts → display/cli-output.ts} +0 -0
- /package/src/utils/{logger.ts → display/logger.ts} +0 -0
- /package/src/utils/{notifications.ts → display/notifications.ts} +0 -0
- /package/src/utils/{secret-utils.ts → security/secret-utils.ts} +0 -0
- /package/src/utils/{security.ts → security/security.ts} +0 -0
|
@@ -1,271 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'node:child_process';
|
|
2
|
-
import fs from 'node:fs/promises';
|
|
3
|
-
import path from 'node:path';
|
|
4
|
-
import type { InkMonitor } from '../components/benchmark-monitor.js';
|
|
5
|
-
import { DEFAULT_AGENTS, PERFORMANCE_SCORE_RANGES } from '../constants/benchmark-constants.js';
|
|
6
|
-
import type { AgentTimings, AgentWork } from '../types/benchmark.js';
|
|
7
|
-
import { ProcessManager } from '../utils/process-manager.js';
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Evaluate agent results by running Claude to analyze agent work
|
|
11
|
-
* Pure function (with side effects: file I/O, process spawning)
|
|
12
|
-
*/
|
|
13
|
-
export async function evaluateResults(
|
|
14
|
-
outputDir: string,
|
|
15
|
-
reportDir: string | undefined,
|
|
16
|
-
monitor?: InkMonitor
|
|
17
|
-
): Promise<void> {
|
|
18
|
-
// First, collect actual timing information for each agent
|
|
19
|
-
const agentTimings: AgentTimings = {};
|
|
20
|
-
const agentDirs = DEFAULT_AGENTS.map((agent) => path.join(outputDir, agent));
|
|
21
|
-
|
|
22
|
-
for (const agentDir of agentDirs) {
|
|
23
|
-
const agentName = path.basename(agentDir);
|
|
24
|
-
try {
|
|
25
|
-
// Try to read the execution-time.txt file first
|
|
26
|
-
const timingFile = path.join(agentDir, 'execution-time.txt');
|
|
27
|
-
const timingContent = await fs.readFile(timingFile, 'utf-8');
|
|
28
|
-
|
|
29
|
-
// Parse the timing information
|
|
30
|
-
const durationMatch = timingContent.match(/Duration:\s*(\d+)\s*seconds/);
|
|
31
|
-
const duration = durationMatch ? Number.parseInt(durationMatch[1], 10) : 0;
|
|
32
|
-
|
|
33
|
-
agentTimings[agentName] = { duration };
|
|
34
|
-
} catch (_error) {
|
|
35
|
-
// Fallback: try to read from timing.json
|
|
36
|
-
try {
|
|
37
|
-
const timingJsonFile = path.join(agentDir, 'timing.json');
|
|
38
|
-
const timingContent = await fs.readFile(timingJsonFile, 'utf-8');
|
|
39
|
-
const _timingData = JSON.parse(timingContent);
|
|
40
|
-
|
|
41
|
-
// If we have timing data but no duration, estimate it
|
|
42
|
-
agentTimings[agentName] = { duration: 0 }; // Unknown duration
|
|
43
|
-
} catch (_fallbackError) {
|
|
44
|
-
agentTimings[agentName] = { duration: 0 }; // No timing data available
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const evaluatorPrompt = await buildEvaluationPrompt(agentTimings);
|
|
50
|
-
|
|
51
|
-
// Collect all agent work by reading their created files
|
|
52
|
-
const agentWork: AgentWork = {};
|
|
53
|
-
|
|
54
|
-
for (const agentDir of agentDirs) {
|
|
55
|
-
const agentName = path.basename(agentDir);
|
|
56
|
-
try {
|
|
57
|
-
// Read all files in agent directory
|
|
58
|
-
const files = await fs.readdir(agentDir);
|
|
59
|
-
|
|
60
|
-
// FUNCTIONAL: Build file content array instead of string accumulation
|
|
61
|
-
const fileContents: string[] = [];
|
|
62
|
-
for (const file of files) {
|
|
63
|
-
const filePath = path.join(agentDir, file);
|
|
64
|
-
const stat = await fs.stat(filePath);
|
|
65
|
-
|
|
66
|
-
if (stat.isFile()) {
|
|
67
|
-
const content = await fs.readFile(filePath, 'utf-8');
|
|
68
|
-
fileContents.push(`\n--- File: ${file} ---\n${content}\n`);
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Join at the end
|
|
73
|
-
agentWork[agentName] = `=== ${agentName} WORK ===\n\n${fileContents.join('')}`;
|
|
74
|
-
} catch (error) {
|
|
75
|
-
agentWork[agentName] = `=== ${agentName} WORK ===\n\nERROR: Could not read files - ${error}`;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Combine all agent work into input for evaluator
|
|
80
|
-
const allWork = Object.values(agentWork).join(`\n${'='.repeat(80)}\n`);
|
|
81
|
-
const fullInput = `${evaluatorPrompt}\n\nAGENT WORK TO EVALUATE:\n${allWork}`;
|
|
82
|
-
|
|
83
|
-
// Write evaluation prompt to temp file
|
|
84
|
-
const tempEvalFile = path.join(outputDir, '.evaluation-prompt.md');
|
|
85
|
-
await fs.writeFile(tempEvalFile, fullInput);
|
|
86
|
-
|
|
87
|
-
// Add evaluation agent to monitor if available
|
|
88
|
-
if (monitor) {
|
|
89
|
-
monitor.addAgent('evaluator');
|
|
90
|
-
monitor.updateAgentStatus('evaluator', 'running');
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Run evaluation with Claude
|
|
94
|
-
const evaluationProcess = spawn(
|
|
95
|
-
'claude',
|
|
96
|
-
[
|
|
97
|
-
'--system-prompt',
|
|
98
|
-
`@${tempEvalFile}`,
|
|
99
|
-
'--dangerously-skip-permissions',
|
|
100
|
-
'--output-format',
|
|
101
|
-
'stream-json',
|
|
102
|
-
'--verbose',
|
|
103
|
-
'Please evaluate the agent work as described in the system prompt.',
|
|
104
|
-
],
|
|
105
|
-
{
|
|
106
|
-
cwd: outputDir,
|
|
107
|
-
stdio: ['inherit', 'pipe', 'pipe'],
|
|
108
|
-
env: {
|
|
109
|
-
...process.env,
|
|
110
|
-
FORCE_NO_PROGRESS: '1',
|
|
111
|
-
CI: '1',
|
|
112
|
-
PYTHONUNBUFFERED: '1',
|
|
113
|
-
},
|
|
114
|
-
}
|
|
115
|
-
);
|
|
116
|
-
|
|
117
|
-
// Track evaluation process for cleanup
|
|
118
|
-
ProcessManager.getInstance().trackChildProcess(evaluationProcess);
|
|
119
|
-
|
|
120
|
-
// FUNCTIONAL: Use arrays for immutable buffer accumulation
|
|
121
|
-
const evaluationOutputChunks: string[] = [];
|
|
122
|
-
let incompleteStdoutLine = '';
|
|
123
|
-
|
|
124
|
-
evaluationProcess.stdout?.on('data', (data) => {
|
|
125
|
-
const output = data.toString();
|
|
126
|
-
|
|
127
|
-
// Process complete lines only - keep incomplete data in buffer
|
|
128
|
-
const combined = incompleteStdoutLine + output;
|
|
129
|
-
const lines = combined.split('\n');
|
|
130
|
-
incompleteStdoutLine = lines.pop() || ''; // Keep last incomplete line
|
|
131
|
-
|
|
132
|
-
for (const line of lines) {
|
|
133
|
-
if (!line.trim()) {
|
|
134
|
-
continue;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
try {
|
|
138
|
-
const jsonData = JSON.parse(line);
|
|
139
|
-
|
|
140
|
-
if (jsonData.type === 'assistant' && jsonData.message?.content) {
|
|
141
|
-
// Extract text content from assistant message
|
|
142
|
-
for (const content of jsonData.message.content) {
|
|
143
|
-
if (content.type === 'text') {
|
|
144
|
-
const textContent = content.text.trim();
|
|
145
|
-
if (textContent) {
|
|
146
|
-
evaluationOutputChunks.push(`${textContent}\n`);
|
|
147
|
-
// Add to monitor if available
|
|
148
|
-
monitor?.addAgentOutput('evaluator', textContent);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
} catch (_e) {
|
|
154
|
-
// Skip invalid JSON (shouldn't happen with stream-json)
|
|
155
|
-
// For non-JSON output, add to evaluation output
|
|
156
|
-
evaluationOutputChunks.push(`${line}\n`);
|
|
157
|
-
monitor?.addAgentOutput('evaluator', line);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
return new Promise((resolve, reject) => {
|
|
163
|
-
evaluationProcess.on('close', async (code) => {
|
|
164
|
-
// Update evaluator status
|
|
165
|
-
monitor?.updateAgentStatus('evaluator', code === 0 ? 'completed' : 'error');
|
|
166
|
-
|
|
167
|
-
if (code === 0) {
|
|
168
|
-
// Save report to both temp directory and optionally to project directory
|
|
169
|
-
// FUNCTIONAL: Join output chunks at the end
|
|
170
|
-
const evaluationOutput = evaluationOutputChunks.join('');
|
|
171
|
-
const tempReportPath = path.join(outputDir, 'evaluation-report.md');
|
|
172
|
-
await fs.writeFile(tempReportPath, evaluationOutput);
|
|
173
|
-
|
|
174
|
-
// Save summary of what each agent created
|
|
175
|
-
const summary = Object.entries(agentWork)
|
|
176
|
-
.map(([agent, content]) => {
|
|
177
|
-
const fileCount = (content.match(/--- File: /g) || []).length;
|
|
178
|
-
return `${agent}: ${fileCount} files created`;
|
|
179
|
-
})
|
|
180
|
-
.join('\n');
|
|
181
|
-
|
|
182
|
-
const tempSummaryPath = path.join(outputDir, 'summary.txt');
|
|
183
|
-
await fs.writeFile(tempSummaryPath, summary);
|
|
184
|
-
|
|
185
|
-
// Show completion message and display the full LLM output
|
|
186
|
-
if (monitor) {
|
|
187
|
-
monitor?.addAgentOutput('evaluator', '📊 Evaluation completed!');
|
|
188
|
-
monitor?.addAgentOutput('evaluator', `📁 Report saved to: ${tempReportPath}`);
|
|
189
|
-
monitor?.addAgentOutput('evaluator', '');
|
|
190
|
-
monitor?.addAgentOutput('evaluator', '🏆 EVALUATION RESULTS:');
|
|
191
|
-
monitor?.addAgentOutput('evaluator', '');
|
|
192
|
-
|
|
193
|
-
// Display the complete LLM evaluation output directly
|
|
194
|
-
const lines = evaluationOutput.split('\n');
|
|
195
|
-
lines.forEach((line, _index) => {
|
|
196
|
-
if (line.trim()) {
|
|
197
|
-
monitor?.addAgentOutput('evaluator', line);
|
|
198
|
-
}
|
|
199
|
-
});
|
|
200
|
-
|
|
201
|
-
monitor?.addAgentOutput('evaluator', '');
|
|
202
|
-
monitor?.addAgentOutput('evaluator', '✅ End of evaluation report');
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Clean up evaluation temp file
|
|
206
|
-
try {
|
|
207
|
-
await fs.unlink(tempEvalFile);
|
|
208
|
-
} catch (_error) {
|
|
209
|
-
// Ignore cleanup errors
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
// Also save to project directory if report option is provided
|
|
213
|
-
if (reportDir) {
|
|
214
|
-
const projectReportPath = path.join(process.cwd(), reportDir, 'evaluation-report.md');
|
|
215
|
-
const projectSummaryPath = path.join(process.cwd(), reportDir, 'summary.txt');
|
|
216
|
-
|
|
217
|
-
// Ensure report directory exists
|
|
218
|
-
await fs.mkdir(path.dirname(projectReportPath), { recursive: true });
|
|
219
|
-
|
|
220
|
-
await fs.writeFile(projectReportPath, evaluationOutput);
|
|
221
|
-
await fs.writeFile(projectSummaryPath, summary);
|
|
222
|
-
|
|
223
|
-
if (monitor) {
|
|
224
|
-
monitor?.addAgentOutput(
|
|
225
|
-
'evaluator',
|
|
226
|
-
`📁 Project report saved to: ${projectReportPath}`
|
|
227
|
-
);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
resolve();
|
|
232
|
-
} else {
|
|
233
|
-
monitor?.addAgentOutput('evaluator', `❌ Evaluation failed with exit code ${code}`);
|
|
234
|
-
reject(new Error(`Evaluation failed with code ${code}`));
|
|
235
|
-
}
|
|
236
|
-
});
|
|
237
|
-
|
|
238
|
-
evaluationProcess.on('error', (error) => {
|
|
239
|
-
reject(error);
|
|
240
|
-
});
|
|
241
|
-
});
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
/**
|
|
245
|
-
* Internal helper: Build evaluation prompt from agent timings
|
|
246
|
-
*/
|
|
247
|
-
async function buildEvaluationPrompt(agentTimings: AgentTimings): Promise<string> {
|
|
248
|
-
// Load template from file - required file, no fallback
|
|
249
|
-
const templatePath = path.join(process.cwd(), 'templates', 'evaluation-prompt.md');
|
|
250
|
-
|
|
251
|
-
try {
|
|
252
|
-
const template = await fs.readFile(templatePath, 'utf-8');
|
|
253
|
-
|
|
254
|
-
// Generate agent performance data section
|
|
255
|
-
const performanceData = Object.entries(agentTimings)
|
|
256
|
-
.map(([agent, timing]) => {
|
|
257
|
-
const duration = timing.duration || 0;
|
|
258
|
-
const scoreRange = PERFORMANCE_SCORE_RANGES.find((range) => duration <= range.max)!;
|
|
259
|
-
return `- ${agent}: ${duration}s execution time (Performance: ${scoreRange.score}/10)`;
|
|
260
|
-
})
|
|
261
|
-
.join('\n');
|
|
262
|
-
|
|
263
|
-
// Replace template variables
|
|
264
|
-
return template.replace('{{AGENT_PERFORMANCE_DATA}}', performanceData);
|
|
265
|
-
} catch (error) {
|
|
266
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
267
|
-
throw new Error(
|
|
268
|
-
`Failed to load evaluation template from ${templatePath}. Error: ${errorMessage}\n\nPlease ensure:\n1. The file exists at: ${templatePath}\n2. The file is readable (check permissions)\n3. The file contains valid markdown content`
|
|
269
|
-
);
|
|
270
|
-
}
|
|
271
|
-
}
|
|
@@ -1,296 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Business logic for evaluation service
|
|
3
|
-
* Pure functions for evaluation processing
|
|
4
|
-
*
|
|
5
|
-
* DESIGN RATIONALE:
|
|
6
|
-
* - Pure functions for data transformation
|
|
7
|
-
* - Testable without file system or child processes
|
|
8
|
-
* - Clear separation of I/O and logic
|
|
9
|
-
* - Composable evaluation pipeline
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import type { FileSystemError } from '../../core/functional/error-types.js';
|
|
13
|
-
import { fileSystemError } from '../../core/functional/error-types.js';
|
|
14
|
-
import { pipe } from '../../core/functional/pipe.js';
|
|
15
|
-
import type { Result } from '../../core/functional/result.js';
|
|
16
|
-
import { failure, success } from '../../core/functional/result.js';
|
|
17
|
-
import { Arr, Obj, Str } from '../../utils/functional/index.js';
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Domain types
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
export interface AgentTiming {
|
|
24
|
-
duration: number;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export interface AgentTimings {
|
|
28
|
-
[agentName: string]: AgentTiming;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface PerformanceScoreRange {
|
|
32
|
-
max: number;
|
|
33
|
-
score: number;
|
|
34
|
-
label: string;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export interface AgentPerformanceData {
|
|
38
|
-
agent: string;
|
|
39
|
-
duration: number;
|
|
40
|
-
score: number;
|
|
41
|
-
label: string;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
export interface EvaluationTemplate {
|
|
45
|
-
content: string;
|
|
46
|
-
variables: string[];
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/**
|
|
50
|
-
* Pure functions for timing processing
|
|
51
|
-
*/
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Parse duration from execution time text
|
|
55
|
-
*/
|
|
56
|
-
export const parseDuration = (content: string): number => {
|
|
57
|
-
const match = content.match(/Duration:\s*(\d+)\s*seconds/);
|
|
58
|
-
return match ? Number.parseInt(match[1], 10) : 0;
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
/**
|
|
62
|
-
* Parse timing from JSON
|
|
63
|
-
*/
|
|
64
|
-
export const parseTimingJSON = (content: string): Result<AgentTiming, FileSystemError> => {
|
|
65
|
-
try {
|
|
66
|
-
const data = JSON.parse(content);
|
|
67
|
-
return success({ duration: data.duration || 0 });
|
|
68
|
-
} catch (error) {
|
|
69
|
-
return failure(
|
|
70
|
-
fileSystemError('Failed to parse timing JSON', '', 'read', {
|
|
71
|
-
cause: error instanceof Error ? error : undefined,
|
|
72
|
-
})
|
|
73
|
-
);
|
|
74
|
-
}
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Find performance score range for duration
|
|
79
|
-
*/
|
|
80
|
-
export const findScoreRange = (
|
|
81
|
-
duration: number,
|
|
82
|
-
ranges: PerformanceScoreRange[]
|
|
83
|
-
): PerformanceScoreRange => {
|
|
84
|
-
const found = ranges.find((range) => duration <= range.max);
|
|
85
|
-
return found || ranges[ranges.length - 1];
|
|
86
|
-
};
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
* Calculate agent performance data
|
|
90
|
-
*/
|
|
91
|
-
export const calculatePerformance = (
|
|
92
|
-
agentName: string,
|
|
93
|
-
timing: AgentTiming,
|
|
94
|
-
scoreRanges: PerformanceScoreRange[]
|
|
95
|
-
): AgentPerformanceData => {
|
|
96
|
-
const scoreRange = findScoreRange(timing.duration, scoreRanges);
|
|
97
|
-
return {
|
|
98
|
-
agent: agentName,
|
|
99
|
-
duration: timing.duration,
|
|
100
|
-
score: scoreRange.score,
|
|
101
|
-
label: scoreRange.label,
|
|
102
|
-
};
|
|
103
|
-
};
|
|
104
|
-
|
|
105
|
-
/**
|
|
106
|
-
* Format agent performance as string
|
|
107
|
-
*/
|
|
108
|
-
export const formatPerformance = (data: AgentPerformanceData): string => {
|
|
109
|
-
return `- ${data.agent}: ${data.duration}s execution time (Performance: ${data.score}/10)`;
|
|
110
|
-
};
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Generate performance data section
|
|
114
|
-
*/
|
|
115
|
-
export const generatePerformanceSection = (
|
|
116
|
-
timings: AgentTimings,
|
|
117
|
-
scoreRanges: PerformanceScoreRange[]
|
|
118
|
-
): string => {
|
|
119
|
-
return pipe(
|
|
120
|
-
Obj.entries(timings),
|
|
121
|
-
Arr.map(([agent, timing]) => calculatePerformance(agent, timing, scoreRanges)),
|
|
122
|
-
Arr.map(formatPerformance),
|
|
123
|
-
Str.join('\n')
|
|
124
|
-
);
|
|
125
|
-
};
|
|
126
|
-
|
|
127
|
-
/**
|
|
128
|
-
* Template variable replacement (pure)
|
|
129
|
-
*/
|
|
130
|
-
export const replaceTemplateVariables = (
|
|
131
|
-
template: string,
|
|
132
|
-
variables: Record<string, string>
|
|
133
|
-
): string => {
|
|
134
|
-
let result = template;
|
|
135
|
-
|
|
136
|
-
for (const [key, value] of Obj.entries(variables)) {
|
|
137
|
-
const placeholder = `{{${String(key)}}}`;
|
|
138
|
-
result = Str.replaceAll(placeholder, value)(result);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
return result;
|
|
142
|
-
};
|
|
143
|
-
|
|
144
|
-
/**
|
|
145
|
-
* Parse template and find variables
|
|
146
|
-
*/
|
|
147
|
-
export const parseTemplate = (content: string): EvaluationTemplate => {
|
|
148
|
-
const variablePattern = /\{\{([A-Z_]+)\}\}/g;
|
|
149
|
-
const variables: string[] = [];
|
|
150
|
-
|
|
151
|
-
let match;
|
|
152
|
-
while ((match = variablePattern.exec(content)) !== null) {
|
|
153
|
-
if (!variables.includes(match[1])) {
|
|
154
|
-
variables.push(match[1]);
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
return {
|
|
159
|
-
content,
|
|
160
|
-
variables,
|
|
161
|
-
};
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Build evaluation prompt from template
|
|
166
|
-
*/
|
|
167
|
-
export const buildEvaluationPrompt = (
|
|
168
|
-
template: string,
|
|
169
|
-
timings: AgentTimings,
|
|
170
|
-
scoreRanges: PerformanceScoreRange[]
|
|
171
|
-
): string => {
|
|
172
|
-
const performanceData = generatePerformanceSection(timings, scoreRanges);
|
|
173
|
-
|
|
174
|
-
return replaceTemplateVariables(template, {
|
|
175
|
-
AGENT_PERFORMANCE_DATA: performanceData,
|
|
176
|
-
});
|
|
177
|
-
};
|
|
178
|
-
|
|
179
|
-
/**
|
|
180
|
-
* Format agent work content
|
|
181
|
-
*/
|
|
182
|
-
export const formatAgentWork = (agentName: string, files: string[]): string => {
|
|
183
|
-
const header = `=== ${agentName} WORK ===\n\n`;
|
|
184
|
-
const fileContents = files.map((content) => content).join('\n');
|
|
185
|
-
return header + fileContents;
|
|
186
|
-
};
|
|
187
|
-
|
|
188
|
-
/**
|
|
189
|
-
* Format file content
|
|
190
|
-
*/
|
|
191
|
-
export const formatFileContent = (fileName: string, content: string): string => {
|
|
192
|
-
return `\n--- File: ${fileName} ---\n${content}\n`;
|
|
193
|
-
};
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Combine agent work sections
|
|
197
|
-
*/
|
|
198
|
-
export const combineAgentWork = (workSections: string[]): string => {
|
|
199
|
-
const separator = `\n${'='.repeat(80)}\n`;
|
|
200
|
-
return workSections.join(separator);
|
|
201
|
-
};
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Build full evaluation input
|
|
205
|
-
*/
|
|
206
|
-
export const buildEvaluationInput = (prompt: string, agentWork: string): string => {
|
|
207
|
-
return `${prompt}\n\nAGENT WORK TO EVALUATE:\n${agentWork}`;
|
|
208
|
-
};
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Validate evaluation template
|
|
212
|
-
*/
|
|
213
|
-
export const validateTemplate = (template: string): Result<string, FileSystemError> => {
|
|
214
|
-
if (Str.isBlank(template)) {
|
|
215
|
-
return failure(fileSystemError('Template is empty', '', 'read'));
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
const parsed = parseTemplate(template);
|
|
219
|
-
if (!parsed.variables.includes('AGENT_PERFORMANCE_DATA')) {
|
|
220
|
-
return failure(
|
|
221
|
-
fileSystemError('Template missing required variable: AGENT_PERFORMANCE_DATA', '', 'read')
|
|
222
|
-
);
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
return success(template);
|
|
226
|
-
};
|
|
227
|
-
|
|
228
|
-
/**
|
|
229
|
-
* Extract summary statistics from agent work
|
|
230
|
-
*/
|
|
231
|
-
export const extractSummaryStats = (agentWork: Record<string, string>): Record<string, number> => {
|
|
232
|
-
return pipe(
|
|
233
|
-
Obj.entries(agentWork),
|
|
234
|
-
Arr.map(([agent, content]) => {
|
|
235
|
-
const fileCount = (content.match(/--- File: /g) || []).length;
|
|
236
|
-
return [agent, fileCount] as [string, number];
|
|
237
|
-
}),
|
|
238
|
-
Obj.fromEntries
|
|
239
|
-
);
|
|
240
|
-
};
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* Format summary statistics
|
|
244
|
-
*/
|
|
245
|
-
export const formatSummaryStats = (stats: Record<string, number>): string => {
|
|
246
|
-
return pipe(
|
|
247
|
-
Obj.entries(stats),
|
|
248
|
-
Arr.map(([agent, count]) => `${agent}: ${count} files created`),
|
|
249
|
-
Str.join('\n')
|
|
250
|
-
);
|
|
251
|
-
};
|
|
252
|
-
|
|
253
|
-
/**
|
|
254
|
-
* Parse evaluation output (extract sections)
|
|
255
|
-
*/
|
|
256
|
-
export const parseEvaluationOutput = (
|
|
257
|
-
output: string
|
|
258
|
-
): {
|
|
259
|
-
sections: Array<{ title: string; content: string }>;
|
|
260
|
-
} => {
|
|
261
|
-
const sections: Array<{ title: string; content: string }> = [];
|
|
262
|
-
const lines = Str.lines(output);
|
|
263
|
-
|
|
264
|
-
let currentSection: { title: string; content: string } | null = null;
|
|
265
|
-
|
|
266
|
-
for (const line of lines) {
|
|
267
|
-
if (line.startsWith('##')) {
|
|
268
|
-
if (currentSection) {
|
|
269
|
-
sections.push(currentSection);
|
|
270
|
-
}
|
|
271
|
-
currentSection = {
|
|
272
|
-
title: line.replace(/^##\s*/, '').trim(),
|
|
273
|
-
content: '',
|
|
274
|
-
};
|
|
275
|
-
} else if (currentSection) {
|
|
276
|
-
currentSection.content += `${line}\n`;
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
if (currentSection) {
|
|
281
|
-
sections.push(currentSection);
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
return { sections };
|
|
285
|
-
};
|
|
286
|
-
|
|
287
|
-
/**
|
|
288
|
-
* Default performance score ranges
|
|
289
|
-
*/
|
|
290
|
-
export const DEFAULT_SCORE_RANGES: PerformanceScoreRange[] = [
|
|
291
|
-
{ max: 30, score: 10, label: 'Excellent' },
|
|
292
|
-
{ max: 60, score: 8, label: 'Good' },
|
|
293
|
-
{ max: 120, score: 6, label: 'Fair' },
|
|
294
|
-
{ max: 240, score: 4, label: 'Slow' },
|
|
295
|
-
{ max: Number.POSITIVE_INFINITY, score: 2, label: 'Very Slow' },
|
|
296
|
-
];
|