ccjk 13.3.6 → 13.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunks/agent-teams.mjs +7 -5
- package/dist/chunks/agent.mjs +2 -2
- package/dist/chunks/agents.mjs +16 -16
- package/dist/chunks/api-cli.mjs +6 -6
- package/dist/chunks/api-providers.mjs +1 -1
- package/dist/chunks/api.mjs +4 -4
- package/dist/chunks/auto-bootstrap.mjs +1 -1
- package/dist/chunks/auto-fix.mjs +49 -4
- package/dist/chunks/auto-fixer.mjs +7 -5
- package/dist/chunks/auto-init.mjs +9 -7208
- package/dist/chunks/auto-memory-bridge.mjs +9 -3
- package/dist/chunks/auto-updater.mjs +9 -9
- package/dist/chunks/auto-upgrade.mjs +5 -3
- package/dist/chunks/banner.mjs +4 -3
- package/dist/chunks/boost.mjs +118 -62
- package/dist/chunks/ccjk-agents.mjs +3 -3
- package/dist/chunks/ccjk-all.mjs +7 -7
- package/dist/chunks/ccjk-config.mjs +2 -2
- package/dist/chunks/ccjk-hooks.mjs +4 -4
- package/dist/chunks/ccjk-mcp.mjs +5 -5
- package/dist/chunks/ccjk-setup.mjs +5 -5
- package/dist/chunks/ccjk-skills.mjs +5 -5
- package/dist/chunks/ccr.mjs +18 -16
- package/dist/chunks/ccu.mjs +2 -2
- package/dist/chunks/check-updates.mjs +8 -8
- package/dist/chunks/claude-code-config-manager.mjs +12 -10
- package/dist/chunks/claude-code-incremental-manager.mjs +7 -7
- package/dist/chunks/claude-config.mjs +1 -1
- package/dist/chunks/claude-wrapper.mjs +1 -1
- package/dist/chunks/cli-hook.mjs +15 -15
- package/dist/chunks/codex-config-switch.mjs +7 -7
- package/dist/chunks/codex-provider-manager.mjs +7 -7
- package/dist/chunks/codex-uninstaller.mjs +2 -2
- package/dist/chunks/codex.mjs +5 -5
- package/dist/chunks/commands.mjs +2 -2
- package/dist/chunks/commands2.mjs +3 -3
- package/dist/chunks/commit.mjs +2 -2
- package/dist/chunks/completion.mjs +2 -2
- package/dist/chunks/config-consolidator.mjs +2 -2
- package/dist/chunks/config-switch.mjs +8 -8
- package/dist/chunks/config.mjs +6 -5
- package/dist/chunks/config2.mjs +5 -5
- package/dist/chunks/config3.mjs +4 -4
- package/dist/chunks/constants.mjs +1 -1
- package/dist/chunks/context-opt.mjs +92 -90
- package/dist/chunks/context.mjs +659 -0
- package/dist/chunks/dashboard.mjs +14 -9
- package/dist/chunks/doctor.mjs +4 -4
- package/dist/chunks/eval.mjs +502 -0
- package/dist/chunks/evolution.mjs +46 -39
- package/dist/chunks/health-alerts.mjs +9 -9
- package/dist/chunks/help.mjs +1 -1
- package/dist/chunks/hook-installer.mjs +6 -3
- package/dist/chunks/index.mjs +23 -0
- package/dist/chunks/index10.mjs +634 -571
- package/dist/chunks/index11.mjs +1061 -569
- package/dist/chunks/index12.mjs +914 -1076
- package/dist/chunks/index13.mjs +136 -951
- package/dist/chunks/index14.mjs +209 -185
- package/dist/chunks/index2.mjs +19 -24
- package/dist/chunks/index3.mjs +19085 -12
- package/dist/chunks/index4.mjs +16 -19092
- package/dist/chunks/index5.mjs +7602 -16
- package/dist/chunks/index6.mjs +159 -7590
- package/dist/chunks/index7.mjs +1602 -171
- package/dist/chunks/index8.mjs +19 -1602
- package/dist/chunks/index9.mjs +612 -15
- package/dist/chunks/init.mjs +26 -19
- package/dist/chunks/installer.mjs +5 -5
- package/dist/chunks/installer2.mjs +2 -2
- package/dist/chunks/intent-engine.mjs +1 -1
- package/dist/chunks/interview.mjs +4 -4
- package/dist/chunks/manager.mjs +1 -1
- package/dist/chunks/marketplace.mjs +2 -2
- package/dist/chunks/mcp-cli.mjs +12 -12
- package/dist/chunks/mcp.mjs +8 -8
- package/dist/chunks/memory.mjs +8 -8
- package/dist/chunks/menu-hierarchical.mjs +24 -22
- package/dist/chunks/menu.mjs +27 -22
- package/dist/chunks/metrics-display.mjs +2 -2
- package/dist/chunks/migrator.mjs +1 -1
- package/dist/chunks/monitor.mjs +2 -2
- package/dist/chunks/notification.mjs +6 -6
- package/dist/chunks/onboarding-wizard.mjs +6 -5
- package/dist/chunks/onboarding.mjs +4 -4
- package/dist/chunks/package.mjs +1 -1
- package/dist/chunks/paradigm.mjs +2 -2
- package/dist/chunks/permission-manager.mjs +2 -2
- package/dist/chunks/permissions.mjs +3 -3
- package/dist/chunks/persistence-manager.mjs +19 -12
- package/dist/chunks/persistence.mjs +5 -3
- package/dist/chunks/plugin.mjs +2 -2
- package/dist/chunks/prompts.mjs +5 -5
- package/dist/chunks/providers.mjs +2 -2
- package/dist/chunks/quick-actions.mjs +7 -6
- package/dist/chunks/quick-provider.mjs +5 -4
- package/dist/chunks/quick-setup.mjs +20 -15
- package/dist/chunks/remote.mjs +15 -16
- package/dist/chunks/{convoy-manager.mjs → session-manager.mjs} +1129 -1095
- package/dist/chunks/session.mjs +2 -2
- package/dist/chunks/sessions.mjs +3 -3
- package/dist/chunks/silent-updater.mjs +1 -1
- package/dist/chunks/simple-config.mjs +2 -2
- package/dist/chunks/skill2.mjs +3 -3
- package/dist/chunks/skills-sync.mjs +5 -5
- package/dist/chunks/skills.mjs +3 -3
- package/dist/chunks/slash-commands.mjs +9 -8
- package/dist/chunks/smart-defaults.mjs +9 -5
- package/dist/chunks/startup.mjs +1 -1
- package/dist/chunks/stats.mjs +2 -2
- package/dist/chunks/status.mjs +37 -22
- package/dist/chunks/team.mjs +3 -3
- package/dist/chunks/thinking.mjs +4 -4
- package/dist/chunks/trace.mjs +2 -2
- package/dist/chunks/uninstall.mjs +9 -9
- package/dist/chunks/update.mjs +14 -11
- package/dist/chunks/upgrade-manager.mjs +3 -3
- package/dist/chunks/upgrade.mjs +25 -9
- package/dist/chunks/version-checker.mjs +4 -4
- package/dist/chunks/vim.mjs +3 -3
- package/dist/chunks/workflows.mjs +1 -1
- package/dist/chunks/wsl.mjs +1 -1
- package/dist/chunks/zero-config.mjs +4 -4
- package/dist/cli.mjs +60 -26
- package/dist/index.d.mts +4392 -4392
- package/dist/index.d.ts +4392 -4392
- package/dist/index.mjs +4314 -4314
- package/dist/shared/{ccjk.DcKLglJQ.mjs → ccjk.BIxuVL3_.mjs} +2 -2
- package/dist/shared/{ccjk.DJdmgr2d.mjs → ccjk.BJMRY2Ra.mjs} +5 -3
- package/dist/shared/{ccjk.B1TwPltj.mjs → ccjk.BOu1yav7.mjs} +3 -2
- package/dist/shared/{ccjk.mJpVRDZ8.mjs → ccjk.BWFpnOr3.mjs} +1 -1
- package/dist/shared/{ccjk.BfIpomdz.mjs → ccjk.CHUEFqmw.mjs} +3 -2
- package/dist/shared/{ccjk.CqdbaXqU.mjs → ccjk.CLUL0pAV.mjs} +9 -5
- package/dist/shared/{ccjk.Cot9p9_n.mjs → ccjk.Cjj8SVrn.mjs} +1 -1
- package/dist/shared/{ccjk.CfrpIIKy.mjs → ccjk.Crd_nEfj.mjs} +38 -20
- package/dist/shared/{ccjk.DCw2WnZU.mjs → ccjk.CvChMYvB.mjs} +1 -1
- package/dist/shared/{ccjk.CXzjn01x.mjs → ccjk.D8ZLYSZZ.mjs} +1 -1
- package/dist/shared/{ccjk.BrPUmTqm.mjs → ccjk.DJuyfrlL.mjs} +164 -82
- package/dist/shared/{ccjk.DHXfsrwn.mjs → ccjk.DRfdq6yl.mjs} +4 -4
- package/dist/shared/{ccjk.DXRAZcix.mjs → ccjk.DScm_NnL.mjs} +8 -4
- package/dist/shared/{ccjk.XsJWJuQP.mjs → ccjk.DfZKjHvG.mjs} +6 -128
- package/dist/shared/{ccjk.BFxsJM0k.mjs → ccjk.DwSebGy0.mjs} +4 -3
- package/dist/shared/ccjk.DxWqH-EF.mjs +170 -0
- package/dist/shared/{ccjk.Cwa_FiTX.mjs → ccjk.I6IuYdc_.mjs} +2 -2
- package/dist/shared/{ccjk.DpstNaeR.mjs → ccjk.KpFl2RDA.mjs} +3 -3
- package/dist/shared/{ccjk.dYDLfmph.mjs → ccjk._dESH4Rk.mjs} +1 -1
- package/dist/shared/{ccjk.BxSmJ8B7.mjs → ccjk.wLJHO0Af.mjs} +2 -1
- package/package.json +2 -1
- package/dist/chunks/index15.mjs +0 -218
- package/dist/shared/{ccjk.c-ETfBZ_.mjs → ccjk.eIn-g1yI.mjs} +96 -96
package/dist/chunks/doctor.mjs
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, statSync, writeFileSync, readFileSync, copyFileSync, readdirSync } from 'node:fs';
|
|
2
2
|
import process__default from 'node:process';
|
|
3
|
-
import a from './
|
|
4
|
-
import { i as inquirer } from './
|
|
3
|
+
import a from './index2.mjs';
|
|
4
|
+
import { i as inquirer } from './index3.mjs';
|
|
5
5
|
import { getApiProviderPresets } from './api-providers.mjs';
|
|
6
6
|
import { SETTINGS_FILE, CLAUDE_DIR } from './constants.mjs';
|
|
7
|
-
import { i18n } from './
|
|
7
|
+
import { i18n } from './index5.mjs';
|
|
8
8
|
import { g as getPermissionManager } from '../shared/ccjk.SPoXMvZD.mjs';
|
|
9
9
|
import { e as commandExists } from './platform.mjs';
|
|
10
10
|
import { P as ProviderHealthMonitor } from '../shared/ccjk.J8YiPsOw.mjs';
|
|
11
11
|
import { platform, userInfo, homedir } from 'node:os';
|
|
12
|
-
import ora from './
|
|
12
|
+
import ora from './index7.mjs';
|
|
13
13
|
import { exec as q } from './main.mjs';
|
|
14
14
|
import { STATUS } from './banner.mjs';
|
|
15
15
|
import { writeFileAtomic } from './fs-operations.mjs';
|
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
import { mkdir, writeFile, readFile } from 'node:fs/promises';
|
|
2
|
+
import { dirname, join, extname } from 'node:path';
|
|
3
|
+
import { existsSync, readFileSync, mkdirSync, writeFileSync, readdirSync, statSync } from 'node:fs';
|
|
4
|
+
import { spawn } from 'node:child_process';
|
|
5
|
+
|
|
6
|
+
function gradeRun(scenario, result) {
|
|
7
|
+
const assertionResults = [];
|
|
8
|
+
for (const assertion of scenario.assertions) {
|
|
9
|
+
const assertionResult = evaluateAssertion(assertion, result);
|
|
10
|
+
assertionResults.push(assertionResult);
|
|
11
|
+
}
|
|
12
|
+
const success = assertionResults.every((r) => r.success);
|
|
13
|
+
return {
|
|
14
|
+
...result,
|
|
15
|
+
success,
|
|
16
|
+
assertionResults
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
function evaluateAssertion(assertion, result) {
|
|
20
|
+
switch (assertion.type) {
|
|
21
|
+
case "exit_code":
|
|
22
|
+
return checkExitCode(assertion, result);
|
|
23
|
+
case "file_exists":
|
|
24
|
+
return checkFileExists(assertion);
|
|
25
|
+
case "contains_text":
|
|
26
|
+
return checkContainsText(assertion);
|
|
27
|
+
default:
|
|
28
|
+
return {
|
|
29
|
+
type: assertion.type,
|
|
30
|
+
success: false,
|
|
31
|
+
message: `Unknown assertion type: ${assertion.type}`
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function checkExitCode(assertion, result) {
|
|
36
|
+
const expected = assertion.expected ?? 0;
|
|
37
|
+
const success = result.exitCode === expected;
|
|
38
|
+
return {
|
|
39
|
+
type: "exit_code",
|
|
40
|
+
success,
|
|
41
|
+
message: success ? `Exit code ${result.exitCode} matches expected ${expected}` : `Exit code ${result.exitCode} does not match expected ${expected}`
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
function checkFileExists(assertion) {
|
|
45
|
+
if (!assertion.path) {
|
|
46
|
+
return {
|
|
47
|
+
type: "file_exists",
|
|
48
|
+
success: false,
|
|
49
|
+
message: "Missing path for file_exists assertion"
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
const exists = existsSync(assertion.path);
|
|
53
|
+
return {
|
|
54
|
+
type: "file_exists",
|
|
55
|
+
success: exists,
|
|
56
|
+
message: exists ? `File exists: ${assertion.path}` : `File does not exist: ${assertion.path}`
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
function checkContainsText(assertion) {
|
|
60
|
+
if (!assertion.path) {
|
|
61
|
+
return {
|
|
62
|
+
type: "contains_text",
|
|
63
|
+
success: false,
|
|
64
|
+
message: "Missing path for contains_text assertion"
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
if (!assertion.value) {
|
|
68
|
+
return {
|
|
69
|
+
type: "contains_text",
|
|
70
|
+
success: false,
|
|
71
|
+
message: "Missing value for contains_text assertion"
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
if (!existsSync(assertion.path)) {
|
|
75
|
+
return {
|
|
76
|
+
type: "contains_text",
|
|
77
|
+
success: false,
|
|
78
|
+
message: `File does not exist: ${assertion.path}`
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
const content = readFileSync(assertion.path, "utf-8");
|
|
82
|
+
const contains = content.includes(assertion.value);
|
|
83
|
+
return {
|
|
84
|
+
type: "contains_text",
|
|
85
|
+
success: contains,
|
|
86
|
+
message: contains ? `File contains text: "${assertion.value}"` : `File does not contain text: "${assertion.value}"`
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function writeHtmlDashboard(reports, outputPath) {
|
|
91
|
+
const html = generateHtml(reports);
|
|
92
|
+
mkdirSync(dirname(outputPath), { recursive: true });
|
|
93
|
+
writeFileSync(outputPath, html, "utf-8");
|
|
94
|
+
}
|
|
95
|
+
function generateHtml(reports) {
|
|
96
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
97
|
+
const totalScenarios = reports.length;
|
|
98
|
+
const passedScenarios = reports.filter((r) => r.successRate === 1).length;
|
|
99
|
+
const failedScenarios = totalScenarios - passedScenarios;
|
|
100
|
+
const avgDuration = reports.reduce((sum, r) => sum + r.averageDurationMs, 0) / totalScenarios;
|
|
101
|
+
return `<!DOCTYPE html>
|
|
102
|
+
<html lang="en">
|
|
103
|
+
<head>
|
|
104
|
+
<meta charset="UTF-8">
|
|
105
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
106
|
+
<title>CCJK Eval Dashboard</title>
|
|
107
|
+
<style>
|
|
108
|
+
body {
|
|
109
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
|
110
|
+
max-width: 1200px;
|
|
111
|
+
margin: 0 auto;
|
|
112
|
+
padding: 20px;
|
|
113
|
+
background: #f5f5f5;
|
|
114
|
+
}
|
|
115
|
+
h1 { color: #333; }
|
|
116
|
+
.summary {
|
|
117
|
+
background: white;
|
|
118
|
+
padding: 20px;
|
|
119
|
+
border-radius: 8px;
|
|
120
|
+
margin-bottom: 20px;
|
|
121
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
122
|
+
}
|
|
123
|
+
.summary-grid {
|
|
124
|
+
display: grid;
|
|
125
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
126
|
+
gap: 15px;
|
|
127
|
+
margin-top: 15px;
|
|
128
|
+
}
|
|
129
|
+
.summary-item {
|
|
130
|
+
padding: 10px;
|
|
131
|
+
background: #f9f9f9;
|
|
132
|
+
border-radius: 4px;
|
|
133
|
+
}
|
|
134
|
+
.summary-item strong { display: block; font-size: 24px; margin-top: 5px; }
|
|
135
|
+
table {
|
|
136
|
+
width: 100%;
|
|
137
|
+
background: white;
|
|
138
|
+
border-collapse: collapse;
|
|
139
|
+
border-radius: 8px;
|
|
140
|
+
overflow: hidden;
|
|
141
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
142
|
+
}
|
|
143
|
+
th, td { padding: 12px; text-align: left; }
|
|
144
|
+
th { background: #333; color: white; }
|
|
145
|
+
tr:nth-child(even) { background: #f9f9f9; }
|
|
146
|
+
.pass { color: #22c55e; font-weight: bold; }
|
|
147
|
+
.fail { color: #ef4444; font-weight: bold; }
|
|
148
|
+
.timestamp { color: #666; font-size: 14px; }
|
|
149
|
+
</style>
|
|
150
|
+
</head>
|
|
151
|
+
<body>
|
|
152
|
+
<h1>CCJK Eval Dashboard</h1>
|
|
153
|
+
<p class="timestamp">Generated: ${timestamp}</p>
|
|
154
|
+
|
|
155
|
+
<div class="summary">
|
|
156
|
+
<h2>Summary</h2>
|
|
157
|
+
<div class="summary-grid">
|
|
158
|
+
<div class="summary-item">
|
|
159
|
+
<div>Total Scenarios</div>
|
|
160
|
+
<strong>${totalScenarios}</strong>
|
|
161
|
+
</div>
|
|
162
|
+
<div class="summary-item">
|
|
163
|
+
<div>Passed</div>
|
|
164
|
+
<strong class="pass">${passedScenarios}</strong>
|
|
165
|
+
</div>
|
|
166
|
+
<div class="summary-item">
|
|
167
|
+
<div>Failed</div>
|
|
168
|
+
<strong class="fail">${failedScenarios}</strong>
|
|
169
|
+
</div>
|
|
170
|
+
<div class="summary-item">
|
|
171
|
+
<div>Avg Duration</div>
|
|
172
|
+
<strong>${Math.round(avgDuration)}ms</strong>
|
|
173
|
+
</div>
|
|
174
|
+
</div>
|
|
175
|
+
</div>
|
|
176
|
+
|
|
177
|
+
<table>
|
|
178
|
+
<thead>
|
|
179
|
+
<tr>
|
|
180
|
+
<th>Scenario</th>
|
|
181
|
+
<th>Suite</th>
|
|
182
|
+
<th>Success Rate</th>
|
|
183
|
+
<th>Passed/Total</th>
|
|
184
|
+
<th>Avg Duration</th>
|
|
185
|
+
</tr>
|
|
186
|
+
</thead>
|
|
187
|
+
<tbody>
|
|
188
|
+
${reports.map((r) => ` <tr>
|
|
189
|
+
<td>${r.scenarioId}</td>
|
|
190
|
+
<td>${r.suite}</td>
|
|
191
|
+
<td class="${r.successRate === 1 ? "pass" : "fail"}">${(r.successRate * 100).toFixed(0)}%</td>
|
|
192
|
+
<td>${r.passedRuns}/${r.totalRuns}</td>
|
|
193
|
+
<td>${Math.round(r.averageDurationMs)}ms</td>
|
|
194
|
+
</tr>`).join("\n")}
|
|
195
|
+
</tbody>
|
|
196
|
+
</table>
|
|
197
|
+
</body>
|
|
198
|
+
</html>`;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async function runScenario(scenario) {
|
|
202
|
+
const runs = scenario.runs || 1;
|
|
203
|
+
const results = [];
|
|
204
|
+
for (let i = 0; i < runs; i++) {
|
|
205
|
+
const result = await runOnce(scenario, i);
|
|
206
|
+
results.push(result);
|
|
207
|
+
}
|
|
208
|
+
return results;
|
|
209
|
+
}
|
|
210
|
+
async function runOnce(scenario, runIndex) {
|
|
211
|
+
const startTime = Date.now();
|
|
212
|
+
return new Promise((resolve) => {
|
|
213
|
+
const [cmd, ...args] = scenario.command.split(/\s+/);
|
|
214
|
+
const proc = spawn(cmd, args, {
|
|
215
|
+
shell: true,
|
|
216
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
217
|
+
});
|
|
218
|
+
let stdout = "";
|
|
219
|
+
let stderr = "";
|
|
220
|
+
proc.stdout?.on("data", (data) => {
|
|
221
|
+
stdout += data.toString();
|
|
222
|
+
});
|
|
223
|
+
proc.stderr?.on("data", (data) => {
|
|
224
|
+
stderr += data.toString();
|
|
225
|
+
});
|
|
226
|
+
proc.on("close", (code) => {
|
|
227
|
+
const durationMs = Date.now() - startTime;
|
|
228
|
+
const exitCode = code ?? -1;
|
|
229
|
+
resolve({
|
|
230
|
+
scenarioId: scenario.id,
|
|
231
|
+
runIndex,
|
|
232
|
+
success: false,
|
|
233
|
+
// grader will set this
|
|
234
|
+
durationMs,
|
|
235
|
+
exitCode,
|
|
236
|
+
metrics: {
|
|
237
|
+
duration_ms: durationMs,
|
|
238
|
+
output_bytes: stdout.length + stderr.length
|
|
239
|
+
},
|
|
240
|
+
assertionResults: [],
|
|
241
|
+
stdout,
|
|
242
|
+
stderr
|
|
243
|
+
});
|
|
244
|
+
});
|
|
245
|
+
proc.on("error", (err) => {
|
|
246
|
+
const durationMs = Date.now() - startTime;
|
|
247
|
+
resolve({
|
|
248
|
+
scenarioId: scenario.id,
|
|
249
|
+
runIndex,
|
|
250
|
+
success: false,
|
|
251
|
+
durationMs,
|
|
252
|
+
exitCode: -1,
|
|
253
|
+
metrics: { duration_ms: durationMs },
|
|
254
|
+
assertionResults: [{
|
|
255
|
+
type: "spawn_error",
|
|
256
|
+
success: false,
|
|
257
|
+
message: `Failed to spawn process: ${err.message}`
|
|
258
|
+
}],
|
|
259
|
+
stderr: err.message
|
|
260
|
+
});
|
|
261
|
+
});
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function loadScenarios(scenariosDir, filter) {
|
|
266
|
+
const scenarios = [];
|
|
267
|
+
function scanDir(dir) {
|
|
268
|
+
const entries = readdirSync(dir);
|
|
269
|
+
for (const entry of entries) {
|
|
270
|
+
const fullPath = join(dir, entry);
|
|
271
|
+
const stat = statSync(fullPath);
|
|
272
|
+
if (stat.isDirectory()) {
|
|
273
|
+
scanDir(fullPath);
|
|
274
|
+
} else if (extname(entry) === ".json") {
|
|
275
|
+
try {
|
|
276
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
277
|
+
const scenario = JSON.parse(content);
|
|
278
|
+
validateScenario(scenario, fullPath);
|
|
279
|
+
if (filter?.suite && scenario.suite !== filter.suite)
|
|
280
|
+
continue;
|
|
281
|
+
if (filter?.id && scenario.id !== filter.id)
|
|
282
|
+
continue;
|
|
283
|
+
scenarios.push(scenario);
|
|
284
|
+
} catch (err) {
|
|
285
|
+
throw new Error(`Failed to load scenario from ${fullPath}: ${err}`);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
scanDir(scenariosDir);
|
|
291
|
+
return scenarios;
|
|
292
|
+
}
|
|
293
|
+
function validateScenario(scenario, path) {
|
|
294
|
+
const required = ["id", "suite", "description", "command", "assertions"];
|
|
295
|
+
for (const field of required) {
|
|
296
|
+
if (!scenario[field]) {
|
|
297
|
+
throw new Error(`Scenario ${path} missing required field: ${field}`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (!Array.isArray(scenario.assertions)) {
|
|
301
|
+
throw new TypeError(`Scenario ${path} assertions must be an array`);
|
|
302
|
+
}
|
|
303
|
+
for (const assertion of scenario.assertions) {
|
|
304
|
+
if (!assertion.type) {
|
|
305
|
+
throw new Error(`Scenario ${path} has assertion without type`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
async function writeScenarioReport(report, options = {}) {
|
|
311
|
+
const outputPath = join(
|
|
312
|
+
process.cwd(),
|
|
313
|
+
"evals/reports",
|
|
314
|
+
report.suite,
|
|
315
|
+
`${report.scenarioId}.json`
|
|
316
|
+
);
|
|
317
|
+
const data = {
|
|
318
|
+
...report,
|
|
319
|
+
results: report.results.map((run) => ({
|
|
320
|
+
...run,
|
|
321
|
+
stdout: options.includeStdout ? run.stdout : void 0,
|
|
322
|
+
stderr: options.includeStderr ? run.stderr : void 0
|
|
323
|
+
}))
|
|
324
|
+
};
|
|
325
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
326
|
+
await writeFile(outputPath, JSON.stringify(data, null, 2), "utf-8");
|
|
327
|
+
return outputPath;
|
|
328
|
+
}
|
|
329
|
+
function createSuiteSummary(suite, scenarios) {
|
|
330
|
+
const totalScenarios = scenarios.length;
|
|
331
|
+
const passedScenarios = scenarios.filter((s) => s.successRate === 1).length;
|
|
332
|
+
const failedScenarios = totalScenarios - passedScenarios;
|
|
333
|
+
const successRate = totalScenarios === 0 ? 0 : passedScenarios / totalScenarios;
|
|
334
|
+
const averageScore = totalScenarios === 0 ? 0 : scenarios.reduce((sum, s) => sum + (s.averageScore ?? s.successRate), 0) / totalScenarios;
|
|
335
|
+
return {
|
|
336
|
+
suite,
|
|
337
|
+
totalScenarios,
|
|
338
|
+
passedScenarios,
|
|
339
|
+
failedScenarios,
|
|
340
|
+
successRate,
|
|
341
|
+
averageScore,
|
|
342
|
+
scenarios
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
async function writeSuiteSummary(summary) {
|
|
346
|
+
const outputPath = join(
|
|
347
|
+
process.cwd(),
|
|
348
|
+
"evals/reports",
|
|
349
|
+
summary.suite,
|
|
350
|
+
"_summary.json"
|
|
351
|
+
);
|
|
352
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
353
|
+
await writeFile(outputPath, JSON.stringify(summary, null, 2), "utf-8");
|
|
354
|
+
return outputPath;
|
|
355
|
+
}
|
|
356
|
+
function compareReports(baseline, candidate) {
|
|
357
|
+
const deltaSuccessRate = candidate.successRate - baseline.successRate;
|
|
358
|
+
const deltaDurationPercent = (candidate.averageDurationMs - baseline.averageDurationMs) / baseline.averageDurationMs * 100;
|
|
359
|
+
return {
|
|
360
|
+
scenarioId: baseline.scenarioId,
|
|
361
|
+
baselineSuccessRate: baseline.successRate,
|
|
362
|
+
candidateSuccessRate: candidate.successRate,
|
|
363
|
+
deltaSuccessRate,
|
|
364
|
+
baselineAverageDurationMs: baseline.averageDurationMs,
|
|
365
|
+
candidateAverageDurationMs: candidate.averageDurationMs,
|
|
366
|
+
deltaDurationPercent,
|
|
367
|
+
regression: deltaSuccessRate < -0.05
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
async function writeComparisonReport(comparisons) {
|
|
371
|
+
const outputPath = join(
|
|
372
|
+
process.cwd(),
|
|
373
|
+
"evals/reports",
|
|
374
|
+
`comparison-${Date.now()}.json`
|
|
375
|
+
);
|
|
376
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
377
|
+
await writeFile(outputPath, JSON.stringify(comparisons, null, 2), "utf-8");
|
|
378
|
+
return outputPath;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
async function evalRun(options = {}) {
|
|
382
|
+
const {
|
|
383
|
+
scenario,
|
|
384
|
+
suite,
|
|
385
|
+
runs = 1,
|
|
386
|
+
verbose = false,
|
|
387
|
+
html = true,
|
|
388
|
+
json = true
|
|
389
|
+
} = options;
|
|
390
|
+
console.log("\u{1F50D} Loading scenarios...");
|
|
391
|
+
const scenarios = loadScenarios("evals/scenarios", {
|
|
392
|
+
suite,
|
|
393
|
+
id: scenario
|
|
394
|
+
});
|
|
395
|
+
if (scenarios.length === 0) {
|
|
396
|
+
console.error("\u274C No scenarios found");
|
|
397
|
+
process.exit(1);
|
|
398
|
+
}
|
|
399
|
+
console.log(`\u{1F4CB} Found ${scenarios.length} scenario(s)
|
|
400
|
+
`);
|
|
401
|
+
const reports = [];
|
|
402
|
+
for (const scenario2 of scenarios) {
|
|
403
|
+
console.log(`\u25B6\uFE0F Running: ${scenario2.id}`);
|
|
404
|
+
const results = await runScenario({
|
|
405
|
+
...scenario2,
|
|
406
|
+
runs
|
|
407
|
+
});
|
|
408
|
+
const gradedResults = results.map((r) => gradeRun(scenario2, r));
|
|
409
|
+
const passedRuns = gradedResults.filter((r) => r.success).length;
|
|
410
|
+
const totalRuns = gradedResults.length;
|
|
411
|
+
const avgDuration = gradedResults.reduce((sum, r) => sum + r.durationMs, 0) / totalRuns;
|
|
412
|
+
const avgScore = gradedResults.reduce((sum, r) => sum + (r.score ?? (r.success ? 1 : 0)), 0) / totalRuns;
|
|
413
|
+
const report = {
|
|
414
|
+
scenarioId: scenario2.id,
|
|
415
|
+
suite: scenario2.suite,
|
|
416
|
+
totalRuns,
|
|
417
|
+
passedRuns,
|
|
418
|
+
failedRuns: totalRuns - passedRuns,
|
|
419
|
+
averageDurationMs: avgDuration,
|
|
420
|
+
successRate: passedRuns / totalRuns,
|
|
421
|
+
averageScore: avgScore,
|
|
422
|
+
results: gradedResults
|
|
423
|
+
};
|
|
424
|
+
reports.push(report);
|
|
425
|
+
const status = report.successRate === 1 ? "\u2705" : "\u274C";
|
|
426
|
+
console.log(`${status} ${scenario2.id}: ${report.passedRuns}/${report.totalRuns} passed (${(report.successRate * 100).toFixed(0)}%)
|
|
427
|
+
`);
|
|
428
|
+
if (json) {
|
|
429
|
+
await writeScenarioReport(report, { includeStdout: verbose, includeStderr: verbose });
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
const suiteMap = /* @__PURE__ */ new Map();
|
|
433
|
+
for (const report of reports) {
|
|
434
|
+
const suiteReports = suiteMap.get(report.suite) || [];
|
|
435
|
+
suiteReports.push(report);
|
|
436
|
+
suiteMap.set(report.suite, suiteReports);
|
|
437
|
+
}
|
|
438
|
+
for (const [suite2, suiteReports] of suiteMap) {
|
|
439
|
+
const summary = createSuiteSummary(suite2, suiteReports);
|
|
440
|
+
if (json) {
|
|
441
|
+
await writeSuiteSummary(summary);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
if (html) {
|
|
445
|
+
const outputPath = join(process.cwd(), "evals/reports", `dashboard-${Date.now()}.html`);
|
|
446
|
+
writeHtmlDashboard(reports, outputPath);
|
|
447
|
+
console.log(`\u{1F4CA} Dashboard: ${outputPath}
|
|
448
|
+
`);
|
|
449
|
+
}
|
|
450
|
+
console.log("\u{1F4CA} Summary:");
|
|
451
|
+
console.log(` Total Scenarios: ${reports.length}`);
|
|
452
|
+
console.log(` Passed: ${reports.filter((r) => r.successRate === 1).length}`);
|
|
453
|
+
console.log(` Failed: ${reports.filter((r) => r.successRate < 1).length}`);
|
|
454
|
+
const overallSuccessRate = reports.reduce((sum, r) => sum + r.successRate, 0) / reports.length;
|
|
455
|
+
console.log(` Success Rate: ${(overallSuccessRate * 100).toFixed(1)}%`);
|
|
456
|
+
if (reports.some((r) => r.successRate < 1)) {
|
|
457
|
+
process.exit(1);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
async function evalCommand(options) {
|
|
461
|
+
if (options.compare) {
|
|
462
|
+
if (!options.baseline || !options.candidate) {
|
|
463
|
+
console.error("\u274C Both --baseline and --candidate are required for comparison");
|
|
464
|
+
process.exit(1);
|
|
465
|
+
}
|
|
466
|
+
await evalCompare({ baseline: options.baseline, candidate: options.candidate, verbose: options.verbose });
|
|
467
|
+
return;
|
|
468
|
+
}
|
|
469
|
+
await evalRun({
|
|
470
|
+
scenario: options.scenario,
|
|
471
|
+
suite: options.suite,
|
|
472
|
+
runs: options.runs,
|
|
473
|
+
verbose: options.verbose,
|
|
474
|
+
html: options.html,
|
|
475
|
+
json: options.json
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
async function evalCompare(options) {
|
|
479
|
+
const { baseline, candidate } = options;
|
|
480
|
+
console.log("\u{1F4CA} Loading reports...");
|
|
481
|
+
const baselineContent = await readFile(baseline, "utf-8");
|
|
482
|
+
const candidateContent = await readFile(candidate, "utf-8");
|
|
483
|
+
const baselineReport = JSON.parse(baselineContent);
|
|
484
|
+
const candidateReport = JSON.parse(candidateContent);
|
|
485
|
+
console.log(`Baseline: ${baseline}`);
|
|
486
|
+
console.log(`Candidate: ${candidate}
|
|
487
|
+
`);
|
|
488
|
+
const comparison = compareReports(baselineReport, candidateReport);
|
|
489
|
+
console.log(`Scenario: ${comparison.scenarioId}`);
|
|
490
|
+
console.log(`Success Rate: ${(comparison.baselineSuccessRate * 100).toFixed(0)}% \u2192 ${(comparison.candidateSuccessRate * 100).toFixed(0)}% (${comparison.deltaSuccessRate >= 0 ? "+" : ""}${(comparison.deltaSuccessRate * 100).toFixed(1)}%)`);
|
|
491
|
+
console.log(`Duration: ${comparison.baselineAverageDurationMs.toFixed(0)}ms \u2192 ${comparison.candidateAverageDurationMs.toFixed(0)}ms (${comparison.deltaDurationPercent >= 0 ? "+" : ""}${comparison.deltaDurationPercent.toFixed(1)}%)
|
|
492
|
+
`);
|
|
493
|
+
if (comparison.regression) {
|
|
494
|
+
console.log("\u274C REGRESSION DETECTED");
|
|
495
|
+
process.exit(1);
|
|
496
|
+
} else {
|
|
497
|
+
console.log("\u2705 No regression");
|
|
498
|
+
}
|
|
499
|
+
await writeComparisonReport([comparison]);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
export { evalCommand, evalCompare, evalRun };
|