@pugi/cli 0.1.0-beta.100 → 0.1.0-beta.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +2 -0
  2. package/dist/core/codegraph/parser.js +574 -47
  3. package/dist/core/codegraph/queries/go.scm +57 -0
  4. package/dist/core/codegraph/queries/javascript.scm +56 -0
  5. package/dist/core/codegraph/queries/python.scm +55 -0
  6. package/dist/core/codegraph/queries/rust.scm +63 -0
  7. package/dist/core/codegraph/queries/typescript.scm +91 -0
  8. package/dist/core/codegraph/reindex.js +218 -0
  9. package/dist/core/codegraph/resolve-edges.js +107 -0
  10. package/dist/core/codegraph/watcher.js +440 -0
  11. package/dist/core/diagnostics/probes/sandbox.js +7 -12
  12. package/dist/core/engine/prompts.js +32 -0
  13. package/dist/core/eval/v1/ledger.js +83 -0
  14. package/dist/core/eval/v1/runner.js +280 -0
  15. package/dist/core/eval/v1/scoring.js +68 -0
  16. package/dist/core/eval/v1/task-loader.js +191 -0
  17. package/dist/core/eval/v1/types.js +14 -0
  18. package/dist/core/eval/v1/verifier.js +176 -0
  19. package/dist/core/eval/v1/yaml-parser.js +250 -0
  20. package/dist/core/sandboxing/adapter.js +31 -17
  21. package/dist/core/sandboxing/bubblewrap.js +209 -0
  22. package/dist/core/sandboxing/index.js +32 -3
  23. package/dist/core/sandboxing/policy.js +97 -0
  24. package/dist/core/sandboxing/seatbelt.js +69 -21
  25. package/dist/core/settings.js +31 -7
  26. package/dist/runtime/cli.js +58 -0
  27. package/dist/runtime/commands/eval-v1.js +266 -0
  28. package/dist/runtime/commands/index-cmd.js +125 -19
  29. package/dist/runtime/commands/servers-cli.js +182 -0
  30. package/dist/runtime/version.js +1 -1
  31. package/dist/tools/bash.js +187 -3
  32. package/package.json +10 -3
@@ -0,0 +1,280 @@
1
+ /**
2
+ * Runner for pugi-eval-v1.
3
+ *
4
+ * Per task: spawn a fresh tmp workspace, copy fixture files, invoke
5
+ * the `pugi <command>` subprocess with the brief, capture stdout +
6
+ * exit code + wall-clock, then run the verification checks.
7
+ *
8
+ * The runner is deliberately subprocess-based - mirrors the smoke
9
+ * harness pattern in `core/smoke/headless-driver.ts`. Validating the
10
+ * AS-PUBLISHED CLI is the whole point of a benchmark; bypassing
11
+ * `bin/run.js` would let us miss whole categories of regression
12
+ * (loader cost, env propagation, exit-code handling).
13
+ *
14
+ * Tests inject a `runner` callback that returns a fake `RunCapture`
15
+ * so the meta-spec can exercise scoring + ledger without a real
16
+ * engine.
17
+ */
18
+ import { spawn } from 'node:child_process';
19
+ import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync, readdirSync, statSync } from 'node:fs';
20
+ import { tmpdir } from 'node:os';
21
+ import { dirname, join, resolve } from 'node:path';
22
+ import { computePugiScore } from './scoring.js';
23
+ import { runVerifications } from './verifier.js';
24
+ /**
25
+ * Default executor: spawn `pugi <command> "<brief>" --json --print`
26
+ * inside the workspace. The `--print` flag forces non-interactive
27
+ * mode; `--json` produces the structured envelope the runner parses
28
+ * for `tokensUsed` etc.
29
+ */
30
+ export const subprocessRunner = async (input) => {
31
+ const args = [
32
+ input.spec.command,
33
+ '--print',
34
+ '--json',
35
+ '--intensity',
36
+ input.spec.intensity,
37
+ '--max-turns',
38
+ String(input.spec.maxTurns),
39
+ ];
40
+ if (input.model) {
41
+ args.push('--model', input.model);
42
+ }
43
+ args.push(input.spec.brief);
44
+ const child = spawn(input.pugiBin, args, {
45
+ cwd: input.workspaceRoot,
46
+ env: input.env,
47
+ stdio: ['ignore', 'pipe', 'pipe'],
48
+ });
49
+ const start = Date.now();
50
+ let stdout = '';
51
+ let stderr = '';
52
+ child.stdout?.on('data', (chunk) => {
53
+ stdout += chunk.toString('utf8');
54
+ });
55
+ child.stderr?.on('data', (chunk) => {
56
+ stderr += chunk.toString('utf8');
57
+ });
58
+ let timedOut = false;
59
+ const timer = setTimeout(() => {
60
+ timedOut = true;
61
+ try {
62
+ child.kill('SIGTERM');
63
+ }
64
+ catch {
65
+ /* noop */
66
+ }
67
+ setTimeout(() => {
68
+ try {
69
+ child.kill('SIGKILL');
70
+ }
71
+ catch {
72
+ /* noop */
73
+ }
74
+ }, 5_000);
75
+ }, input.spec.timeoutMs);
76
+ const onAbort = () => {
77
+ try {
78
+ child.kill('SIGTERM');
79
+ }
80
+ catch {
81
+ /* noop */
82
+ }
83
+ };
84
+ input.signal?.addEventListener('abort', onAbort);
85
+ const exitCode = await new Promise((resolveExit) => {
86
+ child.on('exit', (code) => resolveExit(code ?? -1));
87
+ child.on('error', () => resolveExit(-1));
88
+ });
89
+ clearTimeout(timer);
90
+ input.signal?.removeEventListener('abort', onAbort);
91
+ const wallClockMs = Date.now() - start;
92
+ const parsed = parseEnvelope(stdout);
93
+ const budgetExhausted = parsed?.status === 'budget_exceeded' ||
94
+ parsed?.status === 'budget_exhausted';
95
+ const engineError = parsed?.status === 'engine_unavailable' ||
96
+ parsed?.status === 'failed';
97
+ return {
98
+ stdout,
99
+ stderr,
100
+ exitCode,
101
+ wallClockMs,
102
+ tokensUsed: parsed?.tokensUsed ?? 0,
103
+ turnsUsed: parsed?.turnsUsed ?? 0,
104
+ toolCallCount: parsed?.toolCallCount ?? 0,
105
+ timedOut,
106
+ budgetExhausted,
107
+ engineError,
108
+ };
109
+ };
110
+ /**
111
+ * Parse the last JSON envelope from stdout. Pugi `--json` emits one
112
+ * JSON object per invocation; the runner scans for the final `{...}`
113
+ * block so warning lines before it do not break parsing.
114
+ */
115
+ function parseEnvelope(stdout) {
116
+ const trimmed = stdout.trim();
117
+ if (trimmed === '')
118
+ return null;
119
+ // Try the entire trimmed payload first (common case).
120
+ try {
121
+ return JSON.parse(trimmed);
122
+ }
123
+ catch {
124
+ /* fall through to line scan */
125
+ }
126
+ const lines = trimmed.split(/\r?\n/);
127
+ for (let i = lines.length - 1; i >= 0; i -= 1) {
128
+ const line = lines[i].trim();
129
+ if (!line.startsWith('{'))
130
+ continue;
131
+ try {
132
+ return JSON.parse(line);
133
+ }
134
+ catch {
135
+ continue;
136
+ }
137
+ }
138
+ return null;
139
+ }
140
+ function walkFiles(root, prefix, out) {
141
+ let entries;
142
+ try {
143
+ entries = readdirSync(root);
144
+ }
145
+ catch {
146
+ return;
147
+ }
148
+ for (const entry of entries) {
149
+ if (entry === '.pugi' || entry === 'node_modules' || entry === '.git') {
150
+ continue;
151
+ }
152
+ const abs = join(root, entry);
153
+ const rel = prefix === '' ? entry : `${prefix}/${entry}`;
154
+ let st;
155
+ try {
156
+ st = statSync(abs);
157
+ }
158
+ catch {
159
+ continue;
160
+ }
161
+ if (st.isDirectory()) {
162
+ walkFiles(abs, rel, out);
163
+ }
164
+ else if (st.isFile()) {
165
+ out.push(rel);
166
+ }
167
+ }
168
+ }
169
+ function classifyStatus(capture, verificationsAllPassed) {
170
+ if (capture.timedOut)
171
+ return 'timeout';
172
+ if (capture.budgetExhausted)
173
+ return 'budget_exhausted';
174
+ if (capture.engineError)
175
+ return 'engine_error';
176
+ if (capture.exitCode !== 0)
177
+ return 'fail';
178
+ return verificationsAllPassed ? 'pass' : 'fail';
179
+ }
180
+ export function prepareWorkspace(spec) {
181
+ const root = mkdtempSync(join(tmpdir(), `pugi-eval-v1-${spec.id}-`));
182
+ if (spec.fixture) {
183
+ for (const [relPath, body] of Object.entries(spec.fixture)) {
184
+ if (relPath.split(/[\\/]/).includes('..')) {
185
+ throw new Error(`eval-v1 task ${spec.id}: fixture path ${relPath} contains ..`);
186
+ }
187
+ const abs = resolve(root, relPath);
188
+ mkdirSync(dirname(abs), { recursive: true });
189
+ writeFileSync(abs, body, { mode: 0o644 });
190
+ }
191
+ }
192
+ const cleanup = () => {
193
+ try {
194
+ rmSync(root, { recursive: true, force: true });
195
+ }
196
+ catch {
197
+ /* swallow */
198
+ }
199
+ };
200
+ return { root, cleanup };
201
+ }
202
+ export async function runTaskWithCapture(spec, workspaceRoot, capture) {
203
+ const parsed = parseEnvelope(capture.stdout);
204
+ const finalText = parsed?.finalText ?? capture.stdout;
205
+ const verifications = runVerifications(spec.verification, {
206
+ workspaceRoot,
207
+ finalText,
208
+ });
209
+ const allPassed = verifications.every((v) => v.passed);
210
+ const status = classifyStatus(capture, allPassed);
211
+ const filesWritten = [];
212
+ walkFiles(workspaceRoot, '', filesWritten);
213
+ filesWritten.sort();
214
+ const base = {
215
+ taskId: spec.id,
216
+ status,
217
+ tokensUsed: capture.tokensUsed,
218
+ toolCallCount: capture.toolCallCount,
219
+ turnsUsed: capture.turnsUsed,
220
+ wallClockMs: capture.wallClockMs,
221
+ exitCode: capture.exitCode,
222
+ verifications,
223
+ finalText,
224
+ filesWritten,
225
+ };
226
+ const pugiScore = computePugiScore(base, spec);
227
+ return { ...base, pugiScore };
228
+ }
229
+ export async function runOneTask(spec, options) {
230
+ const ws = prepareWorkspace(spec);
231
+ try {
232
+ const capture = await options.runner({
233
+ spec,
234
+ workspaceRoot: ws.root,
235
+ pugiBin: options.pugiBin,
236
+ ...(options.model !== undefined ? { model: options.model } : {}),
237
+ env: options.env,
238
+ ...(options.signal !== undefined ? { signal: options.signal } : {}),
239
+ });
240
+ return await runTaskWithCapture(spec, ws.root, capture);
241
+ }
242
+ finally {
243
+ ws.cleanup();
244
+ }
245
+ }
246
+ export async function runHarness(input) {
247
+ const runner = input.options.runner ?? subprocessRunner;
248
+ const env = input.options.env ?? process.env;
249
+ const onlyFilter = input.options.only
250
+ ? new Set(input.options.only)
251
+ : null;
252
+ const out = [];
253
+ for (const spec of input.specs) {
254
+ if (onlyFilter && !onlyFilter.has(spec.id))
255
+ continue;
256
+ input.options.onTaskStart?.(spec);
257
+ const runOpts = {
258
+ pugiBin: input.options.pugiBin,
259
+ env,
260
+ runner,
261
+ };
262
+ if (input.options.model !== undefined) {
263
+ runOpts.model = input.options.model;
264
+ }
265
+ const result = await runOneTask(spec, runOpts);
266
+ out.push(result);
267
+ input.options.onTaskFinish?.(result);
268
+ }
269
+ if (onlyFilter && out.length === 0) {
270
+ throw new Error(`eval-v1: --task filter matched zero tasks (asked for ${[...onlyFilter].join(', ')})`);
271
+ }
272
+ // Verify path safety: workspace cleanup happened, no temp dirs
273
+ // leaked beyond tmpdir prefix.
274
+ if (!existsSync(tmpdir())) {
275
+ // pathological - tmpdir disappeared. Surface so CI fails loud.
276
+ throw new Error('eval-v1: tmpdir no longer exists post-run');
277
+ }
278
+ return out;
279
+ }
280
+ //# sourceMappingURL=runner.js.map
@@ -0,0 +1,68 @@
1
+ /**
2
+ * pugi_score scoring formula for eval-v1.
3
+ *
4
+ * Per-task score (0..150, higher = better):
5
+ *
6
+ * pugi_score = pass_rate * 100 // 0..100
7
+ * + verification_completeness * 50 // 0.. 50
8
+ * - (tokens_used / max_tokens) * 30 // 0..-30
9
+ * - (wall_clock_ms / timeout_ms) * 20 // 0..-20
10
+ *
11
+ * Where:
12
+ * - `pass_rate` is 1.0 if status is `pass`, 0 otherwise.
13
+ * - `verification_completeness` is `passed_checks / total_checks`.
14
+ * - Token and wall-clock penalties are clamped to [0, 1] so a run
15
+ * that exceeds the budget cap caps the penalty (avoid runaway
16
+ * negative scores that would skew the aggregate).
17
+ *
18
+ * Aggregate is the arithmetic mean across all per-task scores. Mean
19
+ * is defensible because every task contributes equally to the
20
+ * benchmark (we are not weighting by difficulty - eval-v2 may add
21
+ * weights once we have a baseline year of data).
22
+ */
23
+ const PASS_WEIGHT = 100;
24
+ const VERIFICATION_WEIGHT = 50;
25
+ const TOKEN_PENALTY = 30;
26
+ const WALL_PENALTY = 20;
27
+ function clamp01(n) {
28
+ if (!Number.isFinite(n))
29
+ return 1;
30
+ if (n < 0)
31
+ return 0;
32
+ if (n > 1)
33
+ return 1;
34
+ return n;
35
+ }
36
+ /**
37
+ * Compute the per-task pugi_score. Called by the runner before
38
+ * appending the result to the ledger.
39
+ */
40
+ export function computePugiScore(result, spec) {
41
+ const passRate = result.status === 'pass' ? 1 : 0;
42
+ const totalChecks = Math.max(1, result.verifications.length);
43
+ const passedChecks = result.verifications.filter((v) => v.passed).length;
44
+ const completeness = passedChecks / totalChecks;
45
+ const tokenPenalty = clamp01(result.tokensUsed / spec.maxTokens);
46
+ const wallPenalty = clamp01(result.wallClockMs / spec.timeoutMs);
47
+ const score = passRate * PASS_WEIGHT +
48
+ completeness * VERIFICATION_WEIGHT -
49
+ tokenPenalty * TOKEN_PENALTY -
50
+ wallPenalty * WALL_PENALTY;
51
+ // Clamp к [0, 150] so a partial-completeness fail with non-zero
52
+ // penalties never produces a negative aggregate. The penalties are
53
+ // already clamped to [0, 1] individually; this final clamp protects
54
+ // the documented range invariant when verification_completeness is
55
+ // 0 AND budget penalties land.
56
+ const clamped = Math.max(0, Math.min(150, score));
57
+ return Math.round(clamped * 100) / 100;
58
+ }
59
+ /**
60
+ * Aggregate score across an entire harness run. Mean by design.
61
+ */
62
+ export function aggregateScore(results) {
63
+ if (results.length === 0)
64
+ return 0;
65
+ const sum = results.reduce((acc, r) => acc + r.pugiScore, 0);
66
+ return Math.round((sum / results.length) * 100) / 100;
67
+ }
68
+ //# sourceMappingURL=scoring.js.map
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Task loader for pugi-eval-v1.
3
+ *
4
+ * Reads `<root>/eval/v1/tasks/<NN>-<slug>.task.yml`, parses via the
5
+ * embedded minimal YAML parser, validates via Zod, then asserts the
6
+ * filename matches the parsed `id` so a refactor cannot silently
7
+ * decouple the basename from the schema id.
8
+ *
9
+ * The loader also exposes `loadTaskManifest` which is the spec-side
10
+ * gate that pins task content via sha256 and refuses mismatches (the
11
+ * frozen-benchmark invariant from backlog #120).
12
+ */
13
+ import { createHash } from 'node:crypto';
14
+ import { readdirSync, readFileSync } from 'node:fs';
15
+ import { basename, join, resolve } from 'node:path';
16
+ import { z } from 'zod';
17
+ import { parseTaskYaml } from './yaml-parser.js';
18
+ const TASK_ID_RE = /^\d{2}-[a-z][a-z0-9-]*$/;
19
+ const TASK_FILENAME_RE = /^(\d{2}-[a-z][a-z0-9-]*)\.task\.yml$/;
20
+ const verificationSchema = z.discriminatedUnion('kind', [
21
+ z
22
+ .object({ kind: z.literal('file_exists'), path: z.string().min(1) })
23
+ .strict(),
24
+ z
25
+ .object({
26
+ kind: z.literal('file_contains'),
27
+ path: z.string().min(1),
28
+ pattern: z.string().min(1),
29
+ mode: z.enum(['literal', 'regex']).optional(),
30
+ })
31
+ .strict(),
32
+ z
33
+ .object({
34
+ kind: z.literal('output_contains'),
35
+ pattern: z.string().min(1),
36
+ mode: z.enum(['literal', 'regex']).optional(),
37
+ })
38
+ .strict(),
39
+ z
40
+ .object({
41
+ kind: z.literal('command_exit_code'),
42
+ command: z.string().min(1),
43
+ expectedExitCode: z.number().int(),
44
+ timeoutMs: z.number().int().positive().optional(),
45
+ })
46
+ .strict(),
47
+ ]);
48
+ const taskSchema = z
49
+ .object({
50
+ id: z.string().regex(TASK_ID_RE, 'id must match <NN>-<slug>'),
51
+ difficulty: z.enum(['simple', 'medium', 'hard']),
52
+ intensity: z.enum(['quick', 'standard', 'deep', 'marathon']),
53
+ command: z.enum(['code', 'fix', 'explain', 'plan', 'build']),
54
+ brief: z.string().min(1),
55
+ fixture: z.record(z.string(), z.string()).optional(),
56
+ verification: z.array(verificationSchema).min(1),
57
+ maxTokens: z.number().int().positive(),
58
+ maxTurns: z.number().int().positive(),
59
+ timeoutMs: z.number().int().positive(),
60
+ })
61
+ .strict();
62
+ /**
63
+ * Default tasks directory relative to the @pugi/cli workspace root.
64
+ * Tests inject a different directory; production resolves it through
65
+ * `defaultTasksDir`.
66
+ */
67
+ export function defaultTasksDir(packageRoot) {
68
+ return resolve(packageRoot, 'eval', 'v1', 'tasks');
69
+ }
70
+ export function defaultManifestPath(packageRoot) {
71
+ return resolve(packageRoot, 'eval', 'v1', 'manifest.json');
72
+ }
73
+ export function defaultLedgerPath(packageRoot) {
74
+ return resolve(packageRoot, 'eval', 'v1', 'results.tsv');
75
+ }
76
+ export function listTaskFiles(tasksDir) {
77
+ let entries;
78
+ try {
79
+ entries = readdirSync(tasksDir);
80
+ }
81
+ catch (err) {
82
+ throw new Error(`eval-v1 tasks directory not found at ${tasksDir}: ${err.message}`);
83
+ }
84
+ const files = entries.filter((e) => TASK_FILENAME_RE.test(e)).sort();
85
+ return files.map((f) => join(tasksDir, f));
86
+ }
87
+ export function loadTaskFile(path) {
88
+ const raw = readFileSync(path, 'utf8');
89
+ const filenameMatch = TASK_FILENAME_RE.exec(basename(path));
90
+ if (!filenameMatch) {
91
+ throw new Error(`eval-v1: filename ${basename(path)} does not match <NN>-<slug>.task.yml`);
92
+ }
93
+ const expectedId = filenameMatch[1];
94
+ let parsed;
95
+ try {
96
+ parsed = parseTaskYaml(raw);
97
+ }
98
+ catch (err) {
99
+ throw new Error(`eval-v1 task ${basename(path)} failed YAML parse: ${err.message}`);
100
+ }
101
+ const result = taskSchema.safeParse(parsed);
102
+ if (!result.success) {
103
+ throw new Error(`eval-v1 task ${basename(path)} failed schema validation: ${result.error.message}`);
104
+ }
105
+ if (result.data.id !== expectedId) {
106
+ throw new Error(`eval-v1 task ${basename(path)} id field ${result.data.id} does not match filename ${expectedId}`);
107
+ }
108
+ return { path, raw, spec: result.data };
109
+ }
110
+ export function loadAllTasks(tasksDir) {
111
+ const files = listTaskFiles(tasksDir);
112
+ const loaded = files.map((f) => loadTaskFile(f));
113
+ // Detect duplicate ids that survived (cannot happen given filename
114
+ // regex but defensive against future refactors).
115
+ const seen = new Set();
116
+ for (const entry of loaded) {
117
+ if (seen.has(entry.spec.id)) {
118
+ throw new Error(`eval-v1: duplicate task id ${entry.spec.id}`);
119
+ }
120
+ seen.add(entry.spec.id);
121
+ }
122
+ return loaded;
123
+ }
124
+ export function manifestEntryFor(path, raw) {
125
+ const filenameMatch = TASK_FILENAME_RE.exec(basename(path));
126
+ if (!filenameMatch) {
127
+ throw new Error(`cannot derive manifest entry: filename ${basename(path)} does not match`);
128
+ }
129
+ const id = filenameMatch[1];
130
+ const sha = createHash('sha256').update(raw).digest('hex');
131
+ return { id, sha256: sha, byteLength: Buffer.byteLength(raw, 'utf8') };
132
+ }
133
+ export function readManifest(manifestPath) {
134
+ const raw = readFileSync(manifestPath, 'utf8');
135
+ const parsed = JSON.parse(raw);
136
+ const schema = z
137
+ .object({
138
+ schemaVersion: z.literal(1),
139
+ generatedAt: z.string(),
140
+ entries: z
141
+ .array(z
142
+ .object({
143
+ id: z.string().regex(TASK_ID_RE),
144
+ sha256: z.string().regex(/^[0-9a-f]{64}$/),
145
+ byteLength: z.number().int().nonnegative(),
146
+ })
147
+ .strict())
148
+ .min(1),
149
+ })
150
+ .strict();
151
+ const result = schema.safeParse(parsed);
152
+ if (!result.success) {
153
+ throw new Error(`eval-v1 manifest ${manifestPath} invalid: ${result.error.message}`);
154
+ }
155
+ return result.data;
156
+ }
157
+ /**
158
+ * Compare the on-disk task files against the committed manifest. Used
159
+ * by the meta-spec to enforce the frozen-benchmark invariant.
160
+ */
161
+ export function diffManifest(tasks, manifest) {
162
+ const reasons = [];
163
+ const computed = new Map();
164
+ for (const t of tasks) {
165
+ const entry = manifestEntryFor(t.path, t.raw);
166
+ computed.set(entry.id, entry);
167
+ }
168
+ const declared = new Map();
169
+ for (const e of manifest.entries)
170
+ declared.set(e.id, e);
171
+ for (const [id, entry] of computed) {
172
+ const decl = declared.get(id);
173
+ if (!decl) {
174
+ reasons.push(`task ${id} present on disk but missing from manifest`);
175
+ continue;
176
+ }
177
+ if (decl.sha256 !== entry.sha256) {
178
+ reasons.push(`task ${id} sha256 mismatch (disk=${entry.sha256.slice(0, 12)} manifest=${decl.sha256.slice(0, 12)})`);
179
+ }
180
+ if (decl.byteLength !== entry.byteLength) {
181
+ reasons.push(`task ${id} byteLength mismatch (disk=${entry.byteLength} manifest=${decl.byteLength})`);
182
+ }
183
+ }
184
+ for (const [id] of declared) {
185
+ if (!computed.has(id)) {
186
+ reasons.push(`task ${id} declared in manifest but missing on disk`);
187
+ }
188
+ }
189
+ return { ok: reasons.length === 0, reasons };
190
+ }
191
+ //# sourceMappingURL=task-loader.js.map
@@ -0,0 +1,14 @@
1
+ /**
2
+ * pugi-eval-v1 type definitions (backlog #120, Reviewer foundation).
3
+ *
4
+ * Frozen benchmark harness types. The shapes here are stable: every
5
+ * field added later must preserve backward compatibility with the v1
6
+ * `results.tsv` ledger columns and the v1 task YAML schema. Breaking
7
+ * changes ship as `eval-v2`.
8
+ *
9
+ * Why types live in `core/eval/v1/` and not next to the CLI command:
10
+ * the meta-spec, ledger, scoring, and verifier all consume them. CLI
11
+ * command modules stay thin wrappers per the project convention.
12
+ */
13
+ export {};
14
+ //# sourceMappingURL=types.js.map