@neurcode-ai/cli 0.16.6 → 0.16.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/eval.d.ts.map +1 -1
- package/dist/commands/eval.js +120 -0
- package/dist/commands/eval.js.map +1 -1
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -1
- package/dist/runtime-build.json +5 -5
- package/dist/utils/enterprise-eval-report.d.ts +232 -0
- package/dist/utils/enterprise-eval-report.d.ts.map +1 -0
- package/dist/utils/enterprise-eval-report.js +402 -0
- package/dist/utils/enterprise-eval-report.js.map +1 -0
- package/dist/utils/eval-demo.d.ts +97 -0
- package/dist/utils/eval-demo.d.ts.map +1 -0
- package/dist/utils/eval-demo.js +635 -0
- package/dist/utils/eval-demo.js.map +1 -0
- package/dist/utils/guided-eval.d.ts.map +1 -1
- package/dist/utils/guided-eval.js +27 -13
- package/dist/utils/guided-eval.js.map +1 -1
- package/package.json +3 -4
- package/.telemetry-bundle/dist/__tests__/harvest-verify.test.d.ts +0 -1
- package/.telemetry-bundle/dist/__tests__/harvest-verify.test.js +0 -86
- package/.telemetry-bundle/dist/contracts.d.ts +0 -58
- package/.telemetry-bundle/dist/contracts.js +0 -8
- package/.telemetry-bundle/dist/harvest-verify.d.ts +0 -9
- package/.telemetry-bundle/dist/harvest-verify.js +0 -128
- package/.telemetry-bundle/dist/index.d.ts +0 -10
- package/.telemetry-bundle/dist/index.js +0 -22
- package/.telemetry-bundle/dist/precision/leaderboards.d.ts +0 -20
- package/.telemetry-bundle/dist/precision/leaderboards.js +0 -72
- package/.telemetry-bundle/dist/reader.d.ts +0 -5
- package/.telemetry-bundle/dist/reader.js +0 -46
- package/.telemetry-bundle/dist/stable-json.d.ts +0 -5
- package/.telemetry-bundle/dist/stable-json.js +0 -24
- package/.telemetry-bundle/dist/store.d.ts +0 -10
- package/.telemetry-bundle/dist/store.js +0 -52
- package/.telemetry-bundle/dist/trust-scoring.d.ts +0 -20
- package/.telemetry-bundle/dist/trust-scoring.js +0 -58
- package/.telemetry-bundle/package.json +0 -8
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* `neurcode eval demo` — the one-command local enterprise demo runner.
|
|
4
|
+
*
|
|
5
|
+
* Drives a complete, safe, deterministic governance loop against a throwaway
|
|
6
|
+
* fixture repository and produces a source-free enterprise report + dashboard
|
|
7
|
+
* summary. A first-time engineering manager or senior engineer can run a single
|
|
8
|
+
* command, watch the runtime allow a safe edit, block a protected boundary,
|
|
9
|
+
* contain an exact-path approval, keep a neighbor blocked, and export a
|
|
10
|
+
* source-free AI Change Record — without founder handholding, GitHub Actions, or
|
|
11
|
+
* cloud authentication.
|
|
12
|
+
*
|
|
13
|
+
* The loop is driven by self-spawning the *real* built CLI against the fixture,
|
|
14
|
+
* so what an evaluator sees is the actual product enforcing — not a re-implemented
|
|
15
|
+
* mock. Every expected assertion is checked; any critical failure fails the run
|
|
16
|
+
* loudly and the report records exactly which checkpoint did not hold.
|
|
17
|
+
*
|
|
18
|
+
* Hard rules (shared with utils/guided-eval.ts):
|
|
19
|
+
* - Source-free: only paths, owners, symbol names, counts, verdicts, hashes,
|
|
20
|
+
* and tier labels are read or emitted. {@link assertEnterpriseEvalSourceFree}
|
|
21
|
+
* is the backstop before anything is written.
|
|
22
|
+
* - Honest trust posture: self-attested local record unless a backend signing
|
|
23
|
+
* key is configured and a receipt actually verifies. Never claims public-key
|
|
24
|
+
* cryptographic signing for an HMAC backend receipt.
|
|
25
|
+
* - The only writers are the fixture scaffold and the `.neurcode/eval/`
|
|
26
|
+
* report/summary artifacts (gitignored). User source is never touched.
|
|
27
|
+
*/
|
|
28
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
+
exports.EVAL_DEMO_RUN_SCHEMA_VERSION = void 0;
|
|
30
|
+
exports.resolveCliEntry = resolveCliEntry;
|
|
31
|
+
exports.buildEvalDemoPreflight = buildEvalDemoPreflight;
|
|
32
|
+
exports.runEvalDemo = runEvalDemo;
|
|
33
|
+
const node_child_process_1 = require("node:child_process");
|
|
34
|
+
const node_fs_1 = require("node:fs");
|
|
35
|
+
const node_path_1 = require("node:path");
|
|
36
|
+
const runtime_state_1 = require("./runtime-state");
|
|
37
|
+
const guided_eval_1 = require("./guided-eval");
|
|
38
|
+
const enterprise_eval_report_1 = require("./enterprise-eval-report");
|
|
39
|
+
exports.EVAL_DEMO_RUN_SCHEMA_VERSION = 'neurcode.eval-demo-run.v1';
|
|
40
|
+
const SIGNING_SECRET_ENV = 'NEURCODE_AI_CHANGE_RECORD_SIGNING_SECRET';
|
|
41
|
+
// Fixture paths (mirror utils/guided-eval.ts scaffoldEvalFixture).
|
|
42
|
+
const SAFE_PATH = 'src/tasks/export_task.py';
|
|
43
|
+
const BOUNDARY_PATH = 'src/billing/charge.py';
|
|
44
|
+
const NEIGHBOR_PATH = 'src/billing/refund.py';
|
|
45
|
+
// ── CLI self-spawn plumbing ───────────────────────────────────────────────────
|
|
46
|
+
/**
|
|
47
|
+
* Resolve the entry of the *running* CLI so the demo drives the real product.
|
|
48
|
+
* Works under a global install, `npx`, and local development. Prefers the
|
|
49
|
+
* compiled layout (dist/commands/eval-demo.js → ../index.js), then argv[1].
|
|
50
|
+
*/
|
|
51
|
+
function resolveCliEntry() {
|
|
52
|
+
const candidates = [];
|
|
53
|
+
// Compiled layout: this module lives at dist/utils/eval-demo.js.
|
|
54
|
+
candidates.push((0, node_path_1.resolve)(__dirname, '..', 'index.js'));
|
|
55
|
+
// Some bundlers flatten to dist/eval-demo.js.
|
|
56
|
+
candidates.push((0, node_path_1.resolve)(__dirname, 'index.js'));
|
|
57
|
+
// The script node was invoked with (bin shim or dist/index.js).
|
|
58
|
+
if (process.argv[1])
|
|
59
|
+
candidates.push((0, node_path_1.resolve)(process.argv[1]));
|
|
60
|
+
for (const candidate of candidates) {
|
|
61
|
+
if ((0, node_fs_1.existsSync)(candidate))
|
|
62
|
+
return candidate;
|
|
63
|
+
}
|
|
64
|
+
// Last resort: argv[1] as-is (may be a symlink node can still run).
|
|
65
|
+
return process.argv[1] || candidates[0];
|
|
66
|
+
}
|
|
67
|
+
function runCli(cliEntry, args, cwd, input) {
|
|
68
|
+
const r = (0, node_child_process_1.spawnSync)(process.execPath, [cliEntry, ...args], {
|
|
69
|
+
cwd,
|
|
70
|
+
encoding: 'utf8',
|
|
71
|
+
input: input !== undefined ? `${JSON.stringify(input)}\n` : undefined,
|
|
72
|
+
env: { ...process.env, NEURCODE_NONINTERACTIVE: '1' },
|
|
73
|
+
maxBuffer: 1024 * 1024 * 64,
|
|
74
|
+
timeout: 120_000,
|
|
75
|
+
});
|
|
76
|
+
return { status: r.status ?? 1, stdout: r.stdout ?? '', stderr: r.stderr ?? '' };
|
|
77
|
+
}
|
|
78
|
+
/** Extract the JSON object from CLI stdout (warnings may precede it on stderr). */
|
|
79
|
+
function parseCliJson(stdout) {
|
|
80
|
+
const first = stdout.indexOf('{');
|
|
81
|
+
const last = stdout.lastIndexOf('}');
|
|
82
|
+
if (first === -1 || last <= first)
|
|
83
|
+
return null;
|
|
84
|
+
try {
|
|
85
|
+
return JSON.parse(stdout.slice(first, last + 1));
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
function npmVersion() {
|
|
92
|
+
try {
|
|
93
|
+
const r = (0, node_child_process_1.spawnSync)('npm', ['--version'], { encoding: 'utf8', timeout: 10_000 });
|
|
94
|
+
const v = (r.stdout || '').trim();
|
|
95
|
+
return v || null;
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
function cliVersion() {
|
|
102
|
+
try {
|
|
103
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
104
|
+
const pkg = require('../../package.json');
|
|
105
|
+
return typeof pkg?.version === 'string' ? pkg.version : null;
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
function detectMultipleInstallations() {
|
|
112
|
+
try {
|
|
113
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
114
|
+
const { checkDeploymentConsistency } = require('@neurcode-ai/cli-runtime');
|
|
115
|
+
const bundledCliDir = (0, node_path_1.resolve)(__dirname, '..');
|
|
116
|
+
const report = checkDeploymentConsistency({ bundledCliDir, strict: false });
|
|
117
|
+
const installs = report?.installations ?? [];
|
|
118
|
+
const distinct = new Set(installs.map((i) => i.buildFingerprint)).size;
|
|
119
|
+
return { count: installs.length, distinctBuilds: distinct };
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
return { count: 1, distinctBuilds: 1 };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Buyer-friendly preflight: Node/npm, CLI version + multiple-install recovery,
|
|
127
|
+
* repo + fixture state, GitHub Actions (explicitly not required), and whether
|
|
128
|
+
* evidence will be backend-signed or self-attested. Short and honest.
|
|
129
|
+
*/
|
|
130
|
+
function buildEvalDemoPreflight(repoRoot, options = {}) {
|
|
131
|
+
const agent = options.agent ?? 'claude';
|
|
132
|
+
const generatedAt = options.generatedAt ?? new Date().toISOString();
|
|
133
|
+
const checks = [];
|
|
134
|
+
// Node version (>= 20 required, with a known npm install caveat).
|
|
135
|
+
const nodeMajor = Number.parseInt((process.versions.node.split('.')[0] || '0'), 10);
|
|
136
|
+
checks.push(nodeMajor >= 20
|
|
137
|
+
? { id: 'node', label: 'Node.js', status: 'ok', detail: `Node ${process.versions.node} (>= 20 required).` }
|
|
138
|
+
: {
|
|
139
|
+
id: 'node',
|
|
140
|
+
label: 'Node.js',
|
|
141
|
+
status: 'warn',
|
|
142
|
+
detail: `Node ${process.versions.node} detected; Neurcode requires Node >= 20.`,
|
|
143
|
+
recovery: 'Install Node 20 (nvm install 20 && nvm use 20), then re-run.',
|
|
144
|
+
});
|
|
145
|
+
// npm version + the documented npm install caveat.
|
|
146
|
+
const npm = npmVersion();
|
|
147
|
+
checks.push({
|
|
148
|
+
id: 'npm',
|
|
149
|
+
label: 'npm',
|
|
150
|
+
status: 'info',
|
|
151
|
+
detail: npm ? `npm ${npm}.` : 'npm not detected on PATH (optional for npx usage).',
|
|
152
|
+
recovery: 'If `npm install -g @neurcode-ai/cli` fails on older npm, use Node 20 / npm 10.8+ or `npx -y @neurcode-ai/cli@latest`.',
|
|
153
|
+
});
|
|
154
|
+
// CLI version + multiple-installation detection with clear recovery.
|
|
155
|
+
const version = cliVersion();
|
|
156
|
+
const installs = detectMultipleInstallations();
|
|
157
|
+
if (installs.distinctBuilds > 1) {
|
|
158
|
+
checks.push({
|
|
159
|
+
id: 'cli',
|
|
160
|
+
label: 'CLI version',
|
|
161
|
+
status: 'warn',
|
|
162
|
+
detail: `Neurcode CLI ${version ?? 'unknown'} running, but ${installs.count} installations (${installs.distinctBuilds} distinct builds) are visible on PATH.`,
|
|
163
|
+
recovery: 'Pin one build: `npm uninstall -g @neurcode-ai/cli` everywhere, then `npm install -g @neurcode-ai/cli@latest` — or always use `npx -y @neurcode-ai/cli@latest`.',
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
checks.push({
|
|
168
|
+
id: 'cli',
|
|
169
|
+
label: 'CLI version',
|
|
170
|
+
status: 'ok',
|
|
171
|
+
detail: `Neurcode CLI ${version ?? 'unknown'} (single build on PATH).`,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
// Repo state.
|
|
175
|
+
const rt = (0, runtime_state_1.detectRuntimeState)(repoRoot);
|
|
176
|
+
if (rt.isGitRepo && rt.hasHeadCommit) {
|
|
177
|
+
checks.push({ id: 'repo', label: 'Repository', status: 'ok', detail: 'Git repository with a HEAD commit detected.' });
|
|
178
|
+
}
|
|
179
|
+
else if (rt.isGitRepo) {
|
|
180
|
+
checks.push({
|
|
181
|
+
id: 'repo',
|
|
182
|
+
label: 'Repository',
|
|
183
|
+
status: 'warn',
|
|
184
|
+
detail: 'Git repo found but no HEAD commit.',
|
|
185
|
+
recovery: 'The demo uses its own fixture repo, so this is fine; commit a baseline before evaluating your real repo.',
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
checks.push({
|
|
190
|
+
id: 'repo',
|
|
191
|
+
label: 'Repository',
|
|
192
|
+
status: 'info',
|
|
193
|
+
detail: 'No git repository here — the demo scaffolds its own throwaway fixture repo, so this is OK.',
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
// Fixture state.
|
|
197
|
+
const fixtureDir = (0, node_path_1.join)(repoRoot, '.neurcode', 'eval', 'fixture');
|
|
198
|
+
checks.push((0, node_fs_1.existsSync)((0, node_path_1.join)(fixtureDir, 'CODEOWNERS'))
|
|
199
|
+
? { id: 'fixture', label: 'Fixture', status: 'ok', detail: 'A previous demo fixture exists; it will be reused (idempotent).' }
|
|
200
|
+
: { id: 'fixture', label: 'Fixture', status: 'info', detail: 'No fixture yet; the demo will scaffold one under .neurcode/eval/fixture (gitignored).' });
|
|
201
|
+
// GitHub Actions — explicitly not required for this evaluation.
|
|
202
|
+
checks.push({
|
|
203
|
+
id: 'github_actions',
|
|
204
|
+
label: 'GitHub Actions',
|
|
205
|
+
status: 'info',
|
|
206
|
+
detail: 'Not required. This evaluation is fully local — the post-PR Action is optional and never gates the demo.',
|
|
207
|
+
});
|
|
208
|
+
// Backend signing vs self-attested.
|
|
209
|
+
const backendSigningConfigured = Boolean(process.env[SIGNING_SECRET_ENV]);
|
|
210
|
+
checks.push(backendSigningConfigured
|
|
211
|
+
? {
|
|
212
|
+
id: 'backend_signing',
|
|
213
|
+
label: 'Evidence trust',
|
|
214
|
+
status: 'ok',
|
|
215
|
+
detail: 'Backend signing key detected — the runner will attempt a signed-receipt verification.',
|
|
216
|
+
}
|
|
217
|
+
: {
|
|
218
|
+
id: 'backend_signing',
|
|
219
|
+
label: 'Evidence trust',
|
|
220
|
+
status: 'info',
|
|
221
|
+
detail: 'No backend signing key configured — evidence will be a self-attested local record (clearly labeled). This is expected for a first evaluation.',
|
|
222
|
+
});
|
|
223
|
+
const ok = checks.every((c) => c.status !== 'warn' || c.id !== 'node');
|
|
224
|
+
return {
|
|
225
|
+
schemaVersion: 'neurcode.eval-preflight.v1',
|
|
226
|
+
generatedAt,
|
|
227
|
+
agent,
|
|
228
|
+
ok,
|
|
229
|
+
checks,
|
|
230
|
+
backendSigningConfigured,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
function decisionFromCheck(payload) {
|
|
234
|
+
const raw = typeof payload?.decision === 'string' ? payload.decision : 'allow';
|
|
235
|
+
const decision = raw === 'deny' ? 'deny' : raw === 'warn' ? 'warn' : 'allow';
|
|
236
|
+
const block = payload?.payload?.hookSpecificOutput?.blockContext ?? null;
|
|
237
|
+
return {
|
|
238
|
+
decision,
|
|
239
|
+
blockPath: block?.filePath ?? null,
|
|
240
|
+
owners: Array.isArray(block?.owners) ? block.owners : [],
|
|
241
|
+
blockType: block?.blockType ?? null,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
function enforcementMethodFor(agent, enforcement) {
|
|
245
|
+
if (enforcement === 'hard_hook') {
|
|
246
|
+
return 'Driven through the governed-session check primitive (the same boundary decision a live Claude Code pre-write hook enforces as a hard deny).';
|
|
247
|
+
}
|
|
248
|
+
if (enforcement === 'post_pr') {
|
|
249
|
+
return 'Driven through the governed-session check primitive; in production this agent routes advisory evidence post-PR via the Action.';
|
|
250
|
+
}
|
|
251
|
+
return 'Driven through the cooperative supervised-guard check primitive (the same path Codex/Cursor/Copilot use for source-free supervisor evidence).';
|
|
252
|
+
}
|
|
253
|
+
/** Ensure eval artifacts never pollute the host repo's git status. */
|
|
254
|
+
function ensureEvalGitignore(repoRoot) {
|
|
255
|
+
const dir = (0, node_path_1.join)(repoRoot, '.neurcode', 'eval');
|
|
256
|
+
(0, node_fs_1.mkdirSync)(dir, { recursive: true });
|
|
257
|
+
const ignore = (0, node_path_1.join)(dir, '.gitignore');
|
|
258
|
+
if (!(0, node_fs_1.existsSync)(ignore))
|
|
259
|
+
(0, node_fs_1.writeFileSync)(ignore, '*\n', 'utf8');
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Run the complete one-command enterprise demo. Returns a structured result; the
|
|
263
|
+
* command layer renders it and sets the exit code. Throws only on a programming
|
|
264
|
+
* error — expected governance failures are recorded as failed checkpoints with
|
|
265
|
+
* `ok: false`, so the report still explains exactly what did not hold.
|
|
266
|
+
*/
|
|
267
|
+
function runEvalDemo(options) {
|
|
268
|
+
const repoRoot = options.repoRoot;
|
|
269
|
+
const agent = (0, guided_eval_1.normalizeGuidedEvalAgent)(options.agent);
|
|
270
|
+
const enforcement = (0, guided_eval_1.enforcementForAgent)(agent);
|
|
271
|
+
const generatedAt = options.generatedAt ?? new Date().toISOString();
|
|
272
|
+
const startedAt = Date.now();
|
|
273
|
+
const step = (line) => options.onStep?.(line);
|
|
274
|
+
const preflight = buildEvalDemoPreflight(repoRoot, { agent, generatedAt });
|
|
275
|
+
const cliEntry = options.cliEntry ?? resolveCliEntry();
|
|
276
|
+
const fixtureDir = (0, node_path_1.join)(repoRoot, '.neurcode', 'eval', 'fixture');
|
|
277
|
+
const commandsRun = [];
|
|
278
|
+
const timeline = [];
|
|
279
|
+
const checkpoints = [];
|
|
280
|
+
const add = (c) => {
|
|
281
|
+
checkpoints.push(c);
|
|
282
|
+
step(`${c.status === 'pass' ? '✓' : c.status === 'fail' ? '✗' : '·'} ${c.title}: ${c.observed}`);
|
|
283
|
+
};
|
|
284
|
+
// Facts accumulator (filled as the loop runs).
|
|
285
|
+
let sessionId = null;
|
|
286
|
+
let adapter = null;
|
|
287
|
+
let compatibilityMode = null;
|
|
288
|
+
let safeEditAllowed = false;
|
|
289
|
+
let boundaryBlockPath = null;
|
|
290
|
+
let boundaryOwners = [];
|
|
291
|
+
let boundaryBlockType = null;
|
|
292
|
+
let exactApprovalPath = null;
|
|
293
|
+
let exactApprovalOnly = false;
|
|
294
|
+
let approvedPathAllowedAfter = false;
|
|
295
|
+
let neighborPath = null;
|
|
296
|
+
let neighborContained = false;
|
|
297
|
+
let aiChangeRecordSessionId = null;
|
|
298
|
+
let aiChangeRecordRelativePath = null;
|
|
299
|
+
let admissionBlockedCount = null;
|
|
300
|
+
let admissionApprovedCount = null;
|
|
301
|
+
let repoBrain = {
|
|
302
|
+
status: 'not_evaluated',
|
|
303
|
+
recoveryCommand: 'neurcode brain index',
|
|
304
|
+
filesIndexed: null,
|
|
305
|
+
sensitiveSurfaces: [],
|
|
306
|
+
ownerBoundaries: [],
|
|
307
|
+
reuseAdvisories: [],
|
|
308
|
+
highFanOutSymbols: [],
|
|
309
|
+
reviewFirst: [],
|
|
310
|
+
};
|
|
311
|
+
const backendReceipt = {
|
|
312
|
+
configured: preflight.backendSigningConfigured,
|
|
313
|
+
attempted: false,
|
|
314
|
+
verified: false,
|
|
315
|
+
trustLevel: null,
|
|
316
|
+
provenance: preflight.backendSigningConfigured ? 'configured signing key' : 'self-attested local record (no backend signing key configured)',
|
|
317
|
+
};
|
|
318
|
+
// 1) Fixture scaffold.
|
|
319
|
+
ensureEvalGitignore(repoRoot);
|
|
320
|
+
const fixture = (0, guided_eval_1.scaffoldEvalFixture)(repoRoot);
|
|
321
|
+
add({
|
|
322
|
+
id: 'fixture_scaffolded',
|
|
323
|
+
title: 'Fixture scaffolded',
|
|
324
|
+
truthTier: 'deterministic',
|
|
325
|
+
status: (0, node_fs_1.existsSync)((0, node_path_1.join)(fixtureDir, 'CODEOWNERS')) ? 'pass' : 'fail',
|
|
326
|
+
expected: 'A throwaway fixture repo with a CODEOWNERS boundary.',
|
|
327
|
+
observed: `Fixture at ${fixture.relativeDir} (${fixture.created ? 'created' : 'reused'}).`,
|
|
328
|
+
critical: true,
|
|
329
|
+
});
|
|
330
|
+
// Only proceed with the live loop if the fixture exists.
|
|
331
|
+
const fixtureReady = (0, node_fs_1.existsSync)((0, node_path_1.join)(fixtureDir, 'CODEOWNERS'));
|
|
332
|
+
// 2) Repo brain index (best-effort; advisory).
|
|
333
|
+
if (fixtureReady) {
|
|
334
|
+
commandsRun.push('neurcode brain index # (in the fixture)');
|
|
335
|
+
const brain = runCli(cliEntry, ['brain', 'index', '--json'], fixtureDir);
|
|
336
|
+
const brainPayload = parseCliJson(brain.stdout);
|
|
337
|
+
add({
|
|
338
|
+
id: 'repo_brain_indexed',
|
|
339
|
+
title: 'Repo brain indexed',
|
|
340
|
+
truthTier: 'advisory',
|
|
341
|
+
status: brainPayload?.summary ? 'advisory' : 'skipped',
|
|
342
|
+
expected: 'A structural map of files, owners, and symbols (advisory intelligence).',
|
|
343
|
+
observed: brainPayload?.summary
|
|
344
|
+
? `Indexed ${brainPayload.summary.filesIndexed ?? 'n/a'} files, ${brainPayload.summary.ownerBoundaries ?? 0} owner boundaries.`
|
|
345
|
+
: 'Brain index unavailable in this run (non-blocking).',
|
|
346
|
+
critical: false,
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
// 3) Start a governed session.
|
|
350
|
+
if (fixtureReady) {
|
|
351
|
+
const goal = `Modify only ${SAFE_PATH}. Billing requires exact approval.`;
|
|
352
|
+
commandsRun.push(`neurcode agent start ${agent} --goal "<bounded task>" --no-activate`);
|
|
353
|
+
const start = runCli(cliEntry, ['agent', 'start', agent, '--goal', goal, '--dir', fixtureDir, '--no-activate', '--json'], fixtureDir);
|
|
354
|
+
const startPayload = parseCliJson(start.stdout);
|
|
355
|
+
sessionId = startPayload?.session?.sessionId ?? startPayload?.sessionId ?? null;
|
|
356
|
+
adapter = startPayload?.agent?.adapter ?? null;
|
|
357
|
+
compatibilityMode = startPayload?.agent?.compatibilityMode ?? null;
|
|
358
|
+
add({
|
|
359
|
+
id: 'session_started',
|
|
360
|
+
title: 'Governed session live',
|
|
361
|
+
truthTier: 'deterministic',
|
|
362
|
+
status: startPayload?.ok === true && sessionId ? 'pass' : 'fail',
|
|
363
|
+
expected: 'A governed session is created for the selected agent posture.',
|
|
364
|
+
observed: sessionId ? `Session ${sessionId} live (adapter ${adapter ?? 'n/a'}, ${compatibilityMode ?? 'n/a'}).` : `Session did not start (exit ${start.status}).`,
|
|
365
|
+
critical: true,
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
const canCheck = fixtureReady && Boolean(sessionId);
|
|
369
|
+
const check = (path, toolName) => {
|
|
370
|
+
commandsRun.push(`neurcode agent check ${path} --agent ${agent} --tool-name ${toolName}`);
|
|
371
|
+
const r = runCli(cliEntry, ['agent', 'check', path, '--agent', agent, '--tool-name', toolName, '--session-id', sessionId, '--dir', fixtureDir, '--json'], fixtureDir);
|
|
372
|
+
return decisionFromCheck(parseCliJson(r.stdout) ?? {});
|
|
373
|
+
};
|
|
374
|
+
// 4) Safe edit allowed.
|
|
375
|
+
if (canCheck) {
|
|
376
|
+
const r = check(SAFE_PATH, 'Edit');
|
|
377
|
+
safeEditAllowed = r.decision !== 'deny';
|
|
378
|
+
timeline.push({ order: 1, phase: 'safe_edit', path: SAFE_PATH, toolName: 'Edit', decision: r.decision, blockType: r.blockType, owners: r.owners });
|
|
379
|
+
add({
|
|
380
|
+
id: 'safe_edit_allowed',
|
|
381
|
+
title: 'Safe edit allowed',
|
|
382
|
+
truthTier: 'deterministic',
|
|
383
|
+
status: safeEditAllowed ? 'pass' : 'fail',
|
|
384
|
+
expected: `In-scope ${SAFE_PATH} is allowed (no false positive).`,
|
|
385
|
+
observed: `Decision: ${r.decision}.`,
|
|
386
|
+
critical: true,
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
// 5) Protected boundary block.
|
|
390
|
+
if (canCheck) {
|
|
391
|
+
const r = check(BOUNDARY_PATH, 'Edit');
|
|
392
|
+
boundaryBlockPath = r.blockPath ?? (r.decision === 'deny' ? BOUNDARY_PATH : null);
|
|
393
|
+
boundaryOwners = r.owners;
|
|
394
|
+
boundaryBlockType = r.blockType;
|
|
395
|
+
timeline.push({ order: 2, phase: 'boundary_block', path: BOUNDARY_PATH, toolName: 'Edit', decision: r.decision, blockType: r.blockType, owners: r.owners });
|
|
396
|
+
add({
|
|
397
|
+
id: 'boundary_block',
|
|
398
|
+
title: 'Protected boundary blocked',
|
|
399
|
+
truthTier: 'deterministic',
|
|
400
|
+
status: r.decision === 'deny' ? 'pass' : 'fail',
|
|
401
|
+
expected: `${BOUNDARY_PATH} is denied before the write lands.`,
|
|
402
|
+
observed: r.decision === 'deny' ? `Blocked ${boundaryBlockPath} (owner ${boundaryOwners.join(', ') || 'n/a'}, ${boundaryBlockType ?? 'boundary'}).` : `Decision: ${r.decision} (expected deny).`,
|
|
403
|
+
critical: true,
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
// 6) Exact-path approval. Uses the adapter-agnostic session-level approval so
|
|
407
|
+
// it works for every posture (the cooperative `agent approve` event is not
|
|
408
|
+
// supported by the claude-code-hooks / copilot adapters — those approve at the
|
|
409
|
+
// session/operator level).
|
|
410
|
+
if (canCheck) {
|
|
411
|
+
commandsRun.push(`neurcode session approve --path ${BOUNDARY_PATH} --reason "guided eval exact-path approval"`);
|
|
412
|
+
const r = runCli(cliEntry, ['session', 'approve', '--path', BOUNDARY_PATH, '--reason', 'guided eval exact-path approval', '--session-id', sessionId, '--dir', fixtureDir, '--json'], fixtureDir);
|
|
413
|
+
const payload = parseCliJson(r.stdout);
|
|
414
|
+
const approvedPath = payload?.approvedPath ?? payload?.payload?.approvedPath ?? null;
|
|
415
|
+
const approvedPaths = payload?.approvedPaths ?? payload?.payload?.approvedPaths ?? (approvedPath ? [approvedPath] : []);
|
|
416
|
+
exactApprovalPath = approvedPath;
|
|
417
|
+
exactApprovalOnly = approvedPaths.length === 1 && approvedPath === BOUNDARY_PATH;
|
|
418
|
+
add({
|
|
419
|
+
id: 'exact_approval',
|
|
420
|
+
title: 'Exact-path approval',
|
|
421
|
+
truthTier: 'deterministic',
|
|
422
|
+
status: payload?.ok === true && approvedPath === BOUNDARY_PATH && exactApprovalOnly ? 'pass' : 'fail',
|
|
423
|
+
expected: `Approval grants exactly ${BOUNDARY_PATH} — and nothing else.`,
|
|
424
|
+
observed: approvedPath ? `Approved ${approvedPath} (exact-only: ${exactApprovalOnly ? 'yes' : 'no'}, ${approvedPaths.length} path(s)).` : `Approval not applied (exit ${r.status}).`,
|
|
425
|
+
critical: true,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
// 7) Approved path allowed after approval.
|
|
429
|
+
if (canCheck) {
|
|
430
|
+
const r = check(BOUNDARY_PATH, 'Edit');
|
|
431
|
+
approvedPathAllowedAfter = r.decision !== 'deny';
|
|
432
|
+
timeline.push({ order: 3, phase: 'post_approval_allow', path: BOUNDARY_PATH, toolName: 'Edit', decision: r.decision, blockType: r.blockType, owners: r.owners });
|
|
433
|
+
add({
|
|
434
|
+
id: 'approved_path_allowed',
|
|
435
|
+
title: 'Approved path allowed',
|
|
436
|
+
truthTier: 'deterministic',
|
|
437
|
+
status: approvedPathAllowedAfter ? 'pass' : 'fail',
|
|
438
|
+
expected: `${BOUNDARY_PATH} is allowed after its exact approval.`,
|
|
439
|
+
observed: `Decision: ${r.decision}.`,
|
|
440
|
+
critical: true,
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
// 8) Neighbor containment.
|
|
444
|
+
if (canCheck) {
|
|
445
|
+
const r = check(NEIGHBOR_PATH, 'Edit');
|
|
446
|
+
neighborPath = r.blockPath ?? NEIGHBOR_PATH;
|
|
447
|
+
neighborContained = r.decision === 'deny';
|
|
448
|
+
timeline.push({ order: 4, phase: 'neighbor_block', path: NEIGHBOR_PATH, toolName: 'Edit', decision: r.decision, blockType: r.blockType, owners: r.owners });
|
|
449
|
+
add({
|
|
450
|
+
id: 'neighbor_contained',
|
|
451
|
+
title: 'Neighbor containment',
|
|
452
|
+
truthTier: 'deterministic',
|
|
453
|
+
status: neighborContained ? 'pass' : 'fail',
|
|
454
|
+
expected: `${NEIGHBOR_PATH} stays blocked — the approval did not widen scope.`,
|
|
455
|
+
observed: neighborContained ? `Neighbor ${neighborPath} stayed blocked.` : `Decision: ${r.decision} (expected deny).`,
|
|
456
|
+
critical: true,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
// 9) Finish the session.
|
|
460
|
+
if (canCheck) {
|
|
461
|
+
commandsRun.push('neurcode agent finish');
|
|
462
|
+
runCli(cliEntry, ['agent', 'finish', '--session-id', sessionId, '--dir', fixtureDir, '--json'], fixtureDir);
|
|
463
|
+
}
|
|
464
|
+
// 10) Export AI Change Record / admission record.
|
|
465
|
+
if (canCheck) {
|
|
466
|
+
commandsRun.push('neurcode session export-admission');
|
|
467
|
+
const r = runCli(cliEntry, ['session', 'export-admission', '--dir', fixtureDir, '--json'], fixtureDir);
|
|
468
|
+
const payload = parseCliJson(r.stdout);
|
|
469
|
+
aiChangeRecordRelativePath = payload?.publicRelativePath ?? null;
|
|
470
|
+
if (aiChangeRecordRelativePath) {
|
|
471
|
+
try {
|
|
472
|
+
const admission = JSON.parse((0, node_fs_1.readFileSync)((0, node_path_1.join)(fixtureDir, aiChangeRecordRelativePath), 'utf8'));
|
|
473
|
+
aiChangeRecordSessionId = admission?.sessionId ?? sessionId;
|
|
474
|
+
admissionBlockedCount = admission?.runtimeContext?.counts?.blockedPaths ?? null;
|
|
475
|
+
admissionApprovedCount = admission?.runtimeContext?.counts?.approvedExactPaths ?? null;
|
|
476
|
+
}
|
|
477
|
+
catch {
|
|
478
|
+
aiChangeRecordSessionId = sessionId;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
add({
|
|
482
|
+
id: 'ai_change_record',
|
|
483
|
+
title: 'AI Change Record exported',
|
|
484
|
+
truthTier: 'deterministic',
|
|
485
|
+
status: aiChangeRecordRelativePath ? 'pass' : 'fail',
|
|
486
|
+
expected: 'A source-free admission record / AI Change Record is exported.',
|
|
487
|
+
observed: aiChangeRecordRelativePath ? `Exported ${aiChangeRecordRelativePath} (${admissionBlockedCount ?? '?'} blocked, ${admissionApprovedCount ?? '?'} approved).` : `Export failed (exit ${r.status}).`,
|
|
488
|
+
critical: true,
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
// 11) Backend receipt (optional — only when a signing key is configured).
|
|
492
|
+
if (canCheck && backendReceipt.configured) {
|
|
493
|
+
backendReceipt.attempted = true;
|
|
494
|
+
commandsRun.push('neurcode session export-record <session-id> --json');
|
|
495
|
+
const exp = runCli(cliEntry, ['session', 'export-record', sessionId, '--dir', fixtureDir, '--json'], fixtureDir);
|
|
496
|
+
const expPayload = parseCliJson(exp.stdout);
|
|
497
|
+
const recordRel = expPayload?.publicRelativePath ?? null;
|
|
498
|
+
if (recordRel) {
|
|
499
|
+
commandsRun.push('neurcode session verify-record --record <record> --json');
|
|
500
|
+
const ver = runCli(cliEntry, ['session', 'verify-record', '--record', (0, node_path_1.join)(fixtureDir, recordRel), '--json'], fixtureDir);
|
|
501
|
+
const verPayload = parseCliJson(ver.stdout);
|
|
502
|
+
backendReceipt.trustLevel = verPayload?.trustLevel ?? null;
|
|
503
|
+
backendReceipt.verified = verPayload?.trustLevel === 'backend_signed_verified';
|
|
504
|
+
backendReceipt.provenance = backendReceipt.verified ? 'verified against the configured signing key (HMAC backend receipt)' : 'signing key configured; receipt did not verify in this run';
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
add({
|
|
508
|
+
id: 'backend_receipt',
|
|
509
|
+
title: 'Backend receipt verified',
|
|
510
|
+
truthTier: 'backend_signed',
|
|
511
|
+
status: backendReceipt.verified ? 'pass' : backendReceipt.configured ? 'advisory' : 'skipped',
|
|
512
|
+
expected: 'A signed receipt verifies under the configured key (issuance + integrity, not source correctness).',
|
|
513
|
+
observed: backendReceipt.verified
|
|
514
|
+
? `Verified (${backendReceipt.trustLevel}).`
|
|
515
|
+
: backendReceipt.configured
|
|
516
|
+
? 'Signing key configured but no verified receipt — treated as self-attested.'
|
|
517
|
+
: 'No signing key configured — evidence is a self-attested local record (honest default).',
|
|
518
|
+
critical: false,
|
|
519
|
+
});
|
|
520
|
+
// 12) Repo-brain advisory facts (read whatever was indexed, source-free).
|
|
521
|
+
if (fixtureReady) {
|
|
522
|
+
try {
|
|
523
|
+
const ctx = (0, guided_eval_1.gatherGuidedEvalContext)(fixtureDir, { agent, mode: 'fixture', generatedAt });
|
|
524
|
+
repoBrain = ctx.facts.repoBrain;
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
// keep not_evaluated default
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
// Assemble facts.
|
|
531
|
+
const facts = {
|
|
532
|
+
agent,
|
|
533
|
+
enforcement,
|
|
534
|
+
enforcementLabel: (0, guided_eval_1.enforcementLabel)(enforcement),
|
|
535
|
+
enforcementMethod: enforcementMethodFor(agent, enforcement),
|
|
536
|
+
mode: 'fixture',
|
|
537
|
+
generatedAt,
|
|
538
|
+
durationMs: Date.now() - startedAt,
|
|
539
|
+
sessionId,
|
|
540
|
+
repoRootHash: (0, guided_eval_1.hashRepoIdentity)(repoRoot),
|
|
541
|
+
fixtureRelativeDir: fixture.relativeDir,
|
|
542
|
+
adapter,
|
|
543
|
+
compatibilityMode,
|
|
544
|
+
cliVersion: cliVersion(),
|
|
545
|
+
safeEditAllowed,
|
|
546
|
+
boundaryBlockPath,
|
|
547
|
+
boundaryOwners,
|
|
548
|
+
boundaryBlockType,
|
|
549
|
+
exactApprovalPath,
|
|
550
|
+
exactApprovalOnly,
|
|
551
|
+
approvedPathAllowedAfter,
|
|
552
|
+
neighborPath,
|
|
553
|
+
neighborContained,
|
|
554
|
+
aiChangeRecordSessionId,
|
|
555
|
+
aiChangeRecordRelativePath,
|
|
556
|
+
admissionBlockedCount,
|
|
557
|
+
admissionApprovedCount,
|
|
558
|
+
backendReceipt,
|
|
559
|
+
repoBrain,
|
|
560
|
+
boundaryTimeline: timeline,
|
|
561
|
+
commandsRun,
|
|
562
|
+
};
|
|
563
|
+
// 13) Source-free leak scan over the live observations before building artifacts.
|
|
564
|
+
const report = (0, enterprise_eval_report_1.buildEnterpriseEvalReport)(facts, checkpoints);
|
|
565
|
+
const summary = (0, enterprise_eval_report_1.buildEvalDemoSummary)(facts, checkpoints);
|
|
566
|
+
const reportMarkdown = (0, enterprise_eval_report_1.renderEnterpriseEvalReportMarkdown)(report);
|
|
567
|
+
// Cross-check guided-eval state so the dashboard mirror stays consistent.
|
|
568
|
+
let guidedReportMarkdown = '';
|
|
569
|
+
try {
|
|
570
|
+
const ctx = (0, guided_eval_1.gatherGuidedEvalContext)(fixtureDir, { agent, mode: 'fixture', generatedAt });
|
|
571
|
+
const guidedState = (0, guided_eval_1.buildGuidedEvalState)(ctx);
|
|
572
|
+
const guidedReport = (0, guided_eval_1.buildGuidedEvalReport)(guidedState, ctx);
|
|
573
|
+
guidedReportMarkdown = (0, guided_eval_1.renderGuidedEvalReportMarkdown)(guidedReport);
|
|
574
|
+
}
|
|
575
|
+
catch {
|
|
576
|
+
guidedReportMarkdown = '# Guided eval report unavailable for this run\n';
|
|
577
|
+
}
|
|
578
|
+
let sourceFreeOk = true;
|
|
579
|
+
let leakDetail = '';
|
|
580
|
+
for (const [label, value] of [
|
|
581
|
+
['enterprise report json', report],
|
|
582
|
+
['enterprise report markdown', reportMarkdown],
|
|
583
|
+
['eval summary json', summary],
|
|
584
|
+
['guided report markdown', guidedReportMarkdown],
|
|
585
|
+
]) {
|
|
586
|
+
try {
|
|
587
|
+
(0, enterprise_eval_report_1.assertEnterpriseEvalSourceFree)(value, label);
|
|
588
|
+
}
|
|
589
|
+
catch (error) {
|
|
590
|
+
sourceFreeOk = false;
|
|
591
|
+
leakDetail = error instanceof Error ? error.message : String(error);
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
add({
|
|
595
|
+
id: 'source_free_scan',
|
|
596
|
+
title: 'Source-free scan',
|
|
597
|
+
truthTier: 'deterministic',
|
|
598
|
+
status: sourceFreeOk ? 'pass' : 'fail',
|
|
599
|
+
expected: 'No source, diffs, prompts, or secrets appear in any generated artifact.',
|
|
600
|
+
observed: sourceFreeOk ? 'Clean across report, summary, and guided artifacts.' : `Leak detected: ${leakDetail}`,
|
|
601
|
+
critical: true,
|
|
602
|
+
});
|
|
603
|
+
// Rebuild the report/summary now that the source-free checkpoint is recorded.
|
|
604
|
+
const finalReport = (0, enterprise_eval_report_1.buildEnterpriseEvalReport)(facts, checkpoints);
|
|
605
|
+
const finalSummary = (0, enterprise_eval_report_1.buildEvalDemoSummary)(facts, checkpoints);
|
|
606
|
+
const finalReportMarkdown = (0, enterprise_eval_report_1.renderEnterpriseEvalReportMarkdown)(finalReport);
|
|
607
|
+
// 14) Write artifacts under .neurcode/eval/ (gitignored).
|
|
608
|
+
const evalDir = (0, node_path_1.join)(repoRoot, '.neurcode', 'eval');
|
|
609
|
+
(0, node_fs_1.mkdirSync)(evalDir, { recursive: true });
|
|
610
|
+
const artifacts = {
|
|
611
|
+
reportMarkdownPath: (0, node_path_1.join)(evalDir, 'enterprise-eval-report.md'),
|
|
612
|
+
reportJsonPath: (0, node_path_1.join)(evalDir, 'enterprise-eval-report.json'),
|
|
613
|
+
summaryJsonPath: (0, node_path_1.join)(evalDir, 'eval-demo-summary.json'),
|
|
614
|
+
guidedReportMarkdownPath: (0, node_path_1.join)(evalDir, 'guided-eval-report.md'),
|
|
615
|
+
};
|
|
616
|
+
(0, node_fs_1.writeFileSync)(artifacts.reportMarkdownPath, finalReportMarkdown, 'utf8');
|
|
617
|
+
(0, node_fs_1.writeFileSync)(artifacts.reportJsonPath, JSON.stringify(finalReport, null, 2) + '\n', 'utf8');
|
|
618
|
+
(0, node_fs_1.writeFileSync)(artifacts.summaryJsonPath, JSON.stringify(finalSummary, null, 2) + '\n', 'utf8');
|
|
619
|
+
if (guidedReportMarkdown)
|
|
620
|
+
(0, node_fs_1.writeFileSync)(artifacts.guidedReportMarkdownPath, guidedReportMarkdown, 'utf8');
|
|
621
|
+
const ok = checkpoints.every((c) => !c.critical || c.status !== 'fail');
|
|
622
|
+
return {
|
|
623
|
+
schemaVersion: exports.EVAL_DEMO_RUN_SCHEMA_VERSION,
|
|
624
|
+
ok,
|
|
625
|
+
agent,
|
|
626
|
+
enforcement,
|
|
627
|
+
preflight,
|
|
628
|
+
checkpoints,
|
|
629
|
+
facts,
|
|
630
|
+
report: finalReport,
|
|
631
|
+
summary: finalSummary,
|
|
632
|
+
artifacts,
|
|
633
|
+
};
|
|
634
|
+
}
|
|
635
|
+
//# sourceMappingURL=eval-demo.js.map
|