claude-test-bench 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +474 -0
- package/dist/bin/ctb.d.ts +3 -0
- package/dist/bin/ctb.d.ts.map +1 -0
- package/dist/bin/ctb.js +107 -0
- package/dist/bin/ctb.js.map +1 -0
- package/dist/server/index.d.ts +13 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +72 -0
- package/dist/server/index.js.map +1 -0
- package/dist/server/interfaces/evaluator.d.ts +15 -0
- package/dist/server/interfaces/evaluator.d.ts.map +1 -0
- package/dist/server/interfaces/evaluator.js +2 -0
- package/dist/server/interfaces/evaluator.js.map +1 -0
- package/dist/server/interfaces/logger.d.ts +9 -0
- package/dist/server/interfaces/logger.d.ts.map +1 -0
- package/dist/server/interfaces/logger.js +2 -0
- package/dist/server/interfaces/logger.js.map +1 -0
- package/dist/server/interfaces/runner.d.ts +9 -0
- package/dist/server/interfaces/runner.d.ts.map +1 -0
- package/dist/server/interfaces/runner.js +2 -0
- package/dist/server/interfaces/runner.js.map +1 -0
- package/dist/server/interfaces/storage.d.ts +36 -0
- package/dist/server/interfaces/storage.d.ts.map +1 -0
- package/dist/server/interfaces/storage.js +2 -0
- package/dist/server/interfaces/storage.js.map +1 -0
- package/dist/server/interfaces/workspace.d.ts +9 -0
- package/dist/server/interfaces/workspace.d.ts.map +1 -0
- package/dist/server/interfaces/workspace.js +2 -0
- package/dist/server/interfaces/workspace.js.map +1 -0
- package/dist/server/routes/eval-queue.d.ts +23 -0
- package/dist/server/routes/eval-queue.d.ts.map +1 -0
- package/dist/server/routes/eval-queue.js +45 -0
- package/dist/server/routes/eval-queue.js.map +1 -0
- package/dist/server/routes/evaluations.d.ts +8 -0
- package/dist/server/routes/evaluations.d.ts.map +1 -0
- package/dist/server/routes/evaluations.js +217 -0
- package/dist/server/routes/evaluations.js.map +1 -0
- package/dist/server/routes/providers.d.ts +5 -0
- package/dist/server/routes/providers.d.ts.map +1 -0
- package/dist/server/routes/providers.js +194 -0
- package/dist/server/routes/providers.js.map +1 -0
- package/dist/server/routes/run-queue.d.ts +17 -0
- package/dist/server/routes/run-queue.d.ts.map +1 -0
- package/dist/server/routes/run-queue.js +34 -0
- package/dist/server/routes/run-queue.js.map +1 -0
- package/dist/server/routes/run-sse.d.ts +18 -0
- package/dist/server/routes/run-sse.d.ts.map +1 -0
- package/dist/server/routes/run-sse.js +57 -0
- package/dist/server/routes/run-sse.js.map +1 -0
- package/dist/server/routes/runs.d.ts +9 -0
- package/dist/server/routes/runs.d.ts.map +1 -0
- package/dist/server/routes/runs.js +379 -0
- package/dist/server/routes/runs.js.map +1 -0
- package/dist/server/routes/scenarios.d.ts +5 -0
- package/dist/server/routes/scenarios.d.ts.map +1 -0
- package/dist/server/routes/scenarios.js +209 -0
- package/dist/server/routes/scenarios.js.map +1 -0
- package/dist/server/routes/setups.d.ts +5 -0
- package/dist/server/routes/setups.d.ts.map +1 -0
- package/dist/server/routes/setups.js +194 -0
- package/dist/server/routes/setups.js.map +1 -0
- package/dist/server/services/agent-mapper.d.ts +12 -0
- package/dist/server/services/agent-mapper.d.ts.map +1 -0
- package/dist/server/services/agent-mapper.js +75 -0
- package/dist/server/services/agent-mapper.js.map +1 -0
- package/dist/server/services/env-builder.d.ts +10 -0
- package/dist/server/services/env-builder.d.ts.map +1 -0
- package/dist/server/services/env-builder.js +50 -0
- package/dist/server/services/env-builder.js.map +1 -0
- package/dist/server/services/eval-helpers.d.ts +22 -0
- package/dist/server/services/eval-helpers.d.ts.map +1 -0
- package/dist/server/services/eval-helpers.js +75 -0
- package/dist/server/services/eval-helpers.js.map +1 -0
- package/dist/server/services/eval-parsers-debate-impl.d.ts +11 -0
- package/dist/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
- package/dist/server/services/eval-parsers-debate-impl.js +133 -0
- package/dist/server/services/eval-parsers-debate-impl.js.map +1 -0
- package/dist/server/services/eval-parsers.d.ts +24 -0
- package/dist/server/services/eval-parsers.d.ts.map +1 -0
- package/dist/server/services/eval-parsers.js +153 -0
- package/dist/server/services/eval-parsers.js.map +1 -0
- package/dist/server/services/eval-prompts.d.ts +9 -0
- package/dist/server/services/eval-prompts.d.ts.map +1 -0
- package/dist/server/services/eval-prompts.js +170 -0
- package/dist/server/services/eval-prompts.js.map +1 -0
- package/dist/server/services/evaluator.d.ts +10 -0
- package/dist/server/services/evaluator.d.ts.map +1 -0
- package/dist/server/services/evaluator.js +156 -0
- package/dist/server/services/evaluator.js.map +1 -0
- package/dist/server/services/fs-adapter.d.ts +20 -0
- package/dist/server/services/fs-adapter.d.ts.map +1 -0
- package/dist/server/services/fs-adapter.js +13 -0
- package/dist/server/services/fs-adapter.js.map +1 -0
- package/dist/server/services/instruction-parser.d.ts +26 -0
- package/dist/server/services/instruction-parser.d.ts.map +1 -0
- package/dist/server/services/instruction-parser.js +121 -0
- package/dist/server/services/instruction-parser.js.map +1 -0
- package/dist/server/services/log-rotator.d.ts +20 -0
- package/dist/server/services/log-rotator.d.ts.map +1 -0
- package/dist/server/services/log-rotator.js +60 -0
- package/dist/server/services/log-rotator.js.map +1 -0
- package/dist/server/services/logger.d.ts +15 -0
- package/dist/server/services/logger.d.ts.map +1 -0
- package/dist/server/services/logger.js +69 -0
- package/dist/server/services/logger.js.map +1 -0
- package/dist/server/services/runner.d.ts +12 -0
- package/dist/server/services/runner.d.ts.map +1 -0
- package/dist/server/services/runner.js +161 -0
- package/dist/server/services/runner.js.map +1 -0
- package/dist/server/services/seeder.d.ts +5 -0
- package/dist/server/services/seeder.d.ts.map +1 -0
- package/dist/server/services/seeder.js +79 -0
- package/dist/server/services/seeder.js.map +1 -0
- package/dist/server/services/storage-test-helpers.d.ts +21 -0
- package/dist/server/services/storage-test-helpers.d.ts.map +1 -0
- package/dist/server/services/storage-test-helpers.js +158 -0
- package/dist/server/services/storage-test-helpers.js.map +1 -0
- package/dist/server/services/storage.d.ts +35 -0
- package/dist/server/services/storage.d.ts.map +1 -0
- package/dist/server/services/storage.js +219 -0
- package/dist/server/services/storage.js.map +1 -0
- package/dist/server/services/transcript-formatter.d.ts +18 -0
- package/dist/server/services/transcript-formatter.d.ts.map +1 -0
- package/dist/server/services/transcript-formatter.js +156 -0
- package/dist/server/services/transcript-formatter.js.map +1 -0
- package/dist/server/services/workspace.d.ts +11 -0
- package/dist/server/services/workspace.d.ts.map +1 -0
- package/dist/server/services/workspace.js +113 -0
- package/dist/server/services/workspace.js.map +1 -0
- package/dist/server/types/evaluation.d.ts +108 -0
- package/dist/server/types/evaluation.d.ts.map +1 -0
- package/dist/server/types/evaluation.js +5 -0
- package/dist/server/types/evaluation.js.map +1 -0
- package/dist/server/types/index.d.ts +5 -0
- package/dist/server/types/index.d.ts.map +1 -0
- package/dist/server/types/index.js +5 -0
- package/dist/server/types/index.js.map +1 -0
- package/dist/server/types/provider.d.ts +99 -0
- package/dist/server/types/provider.d.ts.map +1 -0
- package/dist/server/types/provider.js +5 -0
- package/dist/server/types/provider.js.map +1 -0
- package/dist/server/types/run.d.ts +31 -0
- package/dist/server/types/run.d.ts.map +1 -0
- package/dist/server/types/run.js +5 -0
- package/dist/server/types/run.js.map +1 -0
- package/dist/server/types/scenario.d.ts +32 -0
- package/dist/server/types/scenario.d.ts.map +1 -0
- package/dist/server/types/scenario.js +5 -0
- package/dist/server/types/scenario.js.map +1 -0
- package/dist/server/types/setup.d.ts +99 -0
- package/dist/server/types/setup.d.ts.map +1 -0
- package/dist/server/types/setup.js +5 -0
- package/dist/server/types/setup.js.map +1 -0
- package/dist/src/server/index.d.ts +13 -0
- package/dist/src/server/index.d.ts.map +1 -0
- package/dist/src/server/index.js +72 -0
- package/dist/src/server/index.js.map +1 -0
- package/dist/src/server/interfaces/evaluator.d.ts +15 -0
- package/dist/src/server/interfaces/evaluator.d.ts.map +1 -0
- package/dist/src/server/interfaces/evaluator.js +2 -0
- package/dist/src/server/interfaces/evaluator.js.map +1 -0
- package/dist/src/server/interfaces/logger.d.ts +9 -0
- package/dist/src/server/interfaces/logger.d.ts.map +1 -0
- package/dist/src/server/interfaces/logger.js +2 -0
- package/dist/src/server/interfaces/logger.js.map +1 -0
- package/dist/src/server/interfaces/runner.d.ts +9 -0
- package/dist/src/server/interfaces/runner.d.ts.map +1 -0
- package/dist/src/server/interfaces/runner.js +2 -0
- package/dist/src/server/interfaces/runner.js.map +1 -0
- package/dist/src/server/interfaces/storage.d.ts +36 -0
- package/dist/src/server/interfaces/storage.d.ts.map +1 -0
- package/dist/src/server/interfaces/storage.js +2 -0
- package/dist/src/server/interfaces/storage.js.map +1 -0
- package/dist/src/server/interfaces/workspace.d.ts +9 -0
- package/dist/src/server/interfaces/workspace.d.ts.map +1 -0
- package/dist/src/server/interfaces/workspace.js +2 -0
- package/dist/src/server/interfaces/workspace.js.map +1 -0
- package/dist/src/server/routes/eval-queue.d.ts +23 -0
- package/dist/src/server/routes/eval-queue.d.ts.map +1 -0
- package/dist/src/server/routes/eval-queue.js +45 -0
- package/dist/src/server/routes/eval-queue.js.map +1 -0
- package/dist/src/server/routes/evaluations.d.ts +8 -0
- package/dist/src/server/routes/evaluations.d.ts.map +1 -0
- package/dist/src/server/routes/evaluations.js +217 -0
- package/dist/src/server/routes/evaluations.js.map +1 -0
- package/dist/src/server/routes/providers.d.ts +5 -0
- package/dist/src/server/routes/providers.d.ts.map +1 -0
- package/dist/src/server/routes/providers.js +194 -0
- package/dist/src/server/routes/providers.js.map +1 -0
- package/dist/src/server/routes/run-queue.d.ts +17 -0
- package/dist/src/server/routes/run-queue.d.ts.map +1 -0
- package/dist/src/server/routes/run-queue.js +34 -0
- package/dist/src/server/routes/run-queue.js.map +1 -0
- package/dist/src/server/routes/run-sse.d.ts +18 -0
- package/dist/src/server/routes/run-sse.d.ts.map +1 -0
- package/dist/src/server/routes/run-sse.js +57 -0
- package/dist/src/server/routes/run-sse.js.map +1 -0
- package/dist/src/server/routes/runs.d.ts +9 -0
- package/dist/src/server/routes/runs.d.ts.map +1 -0
- package/dist/src/server/routes/runs.js +379 -0
- package/dist/src/server/routes/runs.js.map +1 -0
- package/dist/src/server/routes/scenarios.d.ts +5 -0
- package/dist/src/server/routes/scenarios.d.ts.map +1 -0
- package/dist/src/server/routes/scenarios.js +209 -0
- package/dist/src/server/routes/scenarios.js.map +1 -0
- package/dist/src/server/routes/setups.d.ts +5 -0
- package/dist/src/server/routes/setups.d.ts.map +1 -0
- package/dist/src/server/routes/setups.js +194 -0
- package/dist/src/server/routes/setups.js.map +1 -0
- package/dist/src/server/services/agent-mapper.d.ts +12 -0
- package/dist/src/server/services/agent-mapper.d.ts.map +1 -0
- package/dist/src/server/services/agent-mapper.js +75 -0
- package/dist/src/server/services/agent-mapper.js.map +1 -0
- package/dist/src/server/services/env-builder.d.ts +10 -0
- package/dist/src/server/services/env-builder.d.ts.map +1 -0
- package/dist/src/server/services/env-builder.js +50 -0
- package/dist/src/server/services/env-builder.js.map +1 -0
- package/dist/src/server/services/eval-helpers.d.ts +22 -0
- package/dist/src/server/services/eval-helpers.d.ts.map +1 -0
- package/dist/src/server/services/eval-helpers.js +75 -0
- package/dist/src/server/services/eval-helpers.js.map +1 -0
- package/dist/src/server/services/eval-parsers-debate-impl.d.ts +11 -0
- package/dist/src/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
- package/dist/src/server/services/eval-parsers-debate-impl.js +133 -0
- package/dist/src/server/services/eval-parsers-debate-impl.js.map +1 -0
- package/dist/src/server/services/eval-parsers.d.ts +24 -0
- package/dist/src/server/services/eval-parsers.d.ts.map +1 -0
- package/dist/src/server/services/eval-parsers.js +153 -0
- package/dist/src/server/services/eval-parsers.js.map +1 -0
- package/dist/src/server/services/eval-prompts.d.ts +9 -0
- package/dist/src/server/services/eval-prompts.d.ts.map +1 -0
- package/dist/src/server/services/eval-prompts.js +170 -0
- package/dist/src/server/services/eval-prompts.js.map +1 -0
- package/dist/src/server/services/evaluator.d.ts +10 -0
- package/dist/src/server/services/evaluator.d.ts.map +1 -0
- package/dist/src/server/services/evaluator.js +156 -0
- package/dist/src/server/services/evaluator.js.map +1 -0
- package/dist/src/server/services/fs-adapter.d.ts +20 -0
- package/dist/src/server/services/fs-adapter.d.ts.map +1 -0
- package/dist/src/server/services/fs-adapter.js +13 -0
- package/dist/src/server/services/fs-adapter.js.map +1 -0
- package/dist/src/server/services/instruction-parser.d.ts +26 -0
- package/dist/src/server/services/instruction-parser.d.ts.map +1 -0
- package/dist/src/server/services/instruction-parser.js +121 -0
- package/dist/src/server/services/instruction-parser.js.map +1 -0
- package/dist/src/server/services/log-rotator.d.ts +20 -0
- package/dist/src/server/services/log-rotator.d.ts.map +1 -0
- package/dist/src/server/services/log-rotator.js +60 -0
- package/dist/src/server/services/log-rotator.js.map +1 -0
- package/dist/src/server/services/logger.d.ts +15 -0
- package/dist/src/server/services/logger.d.ts.map +1 -0
- package/dist/src/server/services/logger.js +69 -0
- package/dist/src/server/services/logger.js.map +1 -0
- package/dist/src/server/services/runner.d.ts +12 -0
- package/dist/src/server/services/runner.d.ts.map +1 -0
- package/dist/src/server/services/runner.js +161 -0
- package/dist/src/server/services/runner.js.map +1 -0
- package/dist/src/server/services/seeder.d.ts +5 -0
- package/dist/src/server/services/seeder.d.ts.map +1 -0
- package/dist/src/server/services/seeder.js +79 -0
- package/dist/src/server/services/seeder.js.map +1 -0
- package/dist/src/server/services/storage.d.ts +35 -0
- package/dist/src/server/services/storage.d.ts.map +1 -0
- package/dist/src/server/services/storage.js +219 -0
- package/dist/src/server/services/storage.js.map +1 -0
- package/dist/src/server/services/transcript-formatter.d.ts +18 -0
- package/dist/src/server/services/transcript-formatter.d.ts.map +1 -0
- package/dist/src/server/services/transcript-formatter.js +156 -0
- package/dist/src/server/services/transcript-formatter.js.map +1 -0
- package/dist/src/server/services/workspace.d.ts +11 -0
- package/dist/src/server/services/workspace.d.ts.map +1 -0
- package/dist/src/server/services/workspace.js +113 -0
- package/dist/src/server/services/workspace.js.map +1 -0
- package/dist/src/server/types/evaluation.d.ts +108 -0
- package/dist/src/server/types/evaluation.d.ts.map +1 -0
- package/dist/src/server/types/evaluation.js +5 -0
- package/dist/src/server/types/evaluation.js.map +1 -0
- package/dist/src/server/types/index.d.ts +5 -0
- package/dist/src/server/types/index.d.ts.map +1 -0
- package/dist/src/server/types/index.js +5 -0
- package/dist/src/server/types/index.js.map +1 -0
- package/dist/src/server/types/provider.d.ts +99 -0
- package/dist/src/server/types/provider.d.ts.map +1 -0
- package/dist/src/server/types/provider.js +5 -0
- package/dist/src/server/types/provider.js.map +1 -0
- package/dist/src/server/types/run.d.ts +31 -0
- package/dist/src/server/types/run.d.ts.map +1 -0
- package/dist/src/server/types/run.js +5 -0
- package/dist/src/server/types/run.js.map +1 -0
- package/dist/src/server/types/scenario.d.ts +32 -0
- package/dist/src/server/types/scenario.d.ts.map +1 -0
- package/dist/src/server/types/scenario.js +5 -0
- package/dist/src/server/types/scenario.js.map +1 -0
- package/dist/src/server/types/setup.d.ts +99 -0
- package/dist/src/server/types/setup.d.ts.map +1 -0
- package/dist/src/server/types/setup.js +5 -0
- package/dist/src/server/types/setup.js.map +1 -0
- package/dist/web/assets/index-C4dw8OpW.css +1 -0
- package/dist/web/assets/index-wve8IczO.js +76 -0
- package/dist/web/index.html +15 -0
- package/docs/schemas/provider-api.example.json +16 -0
- package/docs/schemas/provider-oauth.example.json +15 -0
- package/docs/schemas/provider.example.json +16 -0
- package/docs/schemas/scenario-baseline.example.json +35 -0
- package/docs/schemas/scenario-carwash-baseline.example.json +33 -0
- package/docs/schemas/scenario-carwash-with-claude-md.example.json +40 -0
- package/docs/schemas/scenario-golden-rules-baseline.example.json +51 -0
- package/docs/schemas/scenario-golden-rules-with-claude-md.example.json +61 -0
- package/docs/schemas/scenario-negative-analysis-baseline.example.json +34 -0
- package/docs/schemas/scenario-negative-analysis-with-claude-md.example.json +41 -0
- package/docs/schemas/scenario-with-claude-md.example.json +41 -0
- package/docs/schemas/scenario.example.json +33 -0
- package/package.json +92 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { IWorkspaceBuilder, WorkspaceResult } from '../interfaces/workspace.js';
|
|
2
|
+
import type { Scenario } from '../types/index.js';
|
|
3
|
+
export declare class WorkspaceBuilder implements IWorkspaceBuilder {
|
|
4
|
+
createWorkspace(scenario: Scenario): Promise<WorkspaceResult>;
|
|
5
|
+
private writeClaudeMdFiles;
|
|
6
|
+
private writeRules;
|
|
7
|
+
private writeSkills;
|
|
8
|
+
private writeWorkspaceFiles;
|
|
9
|
+
private resolveContent;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=workspace.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace.d.ts","sourceRoot":"","sources":["../../../src/server/services/workspace.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AACrF,OAAO,KAAK,EAAE,QAAQ,EAAuD,MAAM,mBAAmB,CAAC;AAkCvG,qBAAa,gBAAiB,YAAW,iBAAiB;IAClD,eAAe,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,eAAe,CAAC;YAyBrD,kBAAkB;YAmBlB,UAAU;YAkBV,WAAW;YAoBX,mBAAmB;YAcnB,cAAc;CAS7B"}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// WorkspaceBuilder — creates isolated temp directories for each run
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
import os from 'node:os';
|
|
5
|
+
import path from 'node:path';
|
|
6
|
+
import fs from 'node:fs/promises';
|
|
7
|
+
import { randomUUID } from 'node:crypto';
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Path validation
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
class PathValidationError extends Error {
|
|
12
|
+
constructor(message) {
|
|
13
|
+
super(message);
|
|
14
|
+
this.name = 'PathValidationError';
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
/** Reject absolute paths and path traversal attempts. */
|
|
18
|
+
function assertSafePath(value, label) {
|
|
19
|
+
if (path.isAbsolute(value)) {
|
|
20
|
+
throw new PathValidationError(`${label} must not be an absolute path: ${value}`);
|
|
21
|
+
}
|
|
22
|
+
const normalized = path.normalize(value);
|
|
23
|
+
if (normalized.startsWith('..') || normalized.includes(`${path.sep}..`)) {
|
|
24
|
+
throw new PathValidationError(`${label} must not contain '..': ${value}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
function assertSafeName(value, label) {
|
|
28
|
+
if (value.includes('..') || value.includes(path.sep) || value.includes('/')) {
|
|
29
|
+
throw new PathValidationError(`${label} must be a simple name, got: ${value}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
// Implementation
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
export class WorkspaceBuilder {
|
|
36
|
+
async createWorkspace(scenario) {
|
|
37
|
+
const workspacePath = path.join(os.tmpdir(), `ctb-run-${randomUUID()}`);
|
|
38
|
+
await fs.mkdir(workspacePath, { recursive: true });
|
|
39
|
+
try {
|
|
40
|
+
await this.writeClaudeMdFiles(workspacePath, scenario.claudeMdFiles);
|
|
41
|
+
await this.writeRules(workspacePath, scenario.rules);
|
|
42
|
+
await this.writeSkills(workspacePath, scenario.skills);
|
|
43
|
+
await this.writeWorkspaceFiles(workspacePath, scenario.workspaceFiles);
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
// Cleanup on failure
|
|
47
|
+
await fs.rm(workspacePath, { recursive: true, force: true }).catch(() => { });
|
|
48
|
+
throw err;
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
workspacePath,
|
|
52
|
+
cleanup: async () => {
|
|
53
|
+
await fs.rm(workspacePath, { recursive: true, force: true });
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
// ─── CLAUDE.md files ────────────────────────────────────────────────
|
|
58
|
+
async writeClaudeMdFiles(root, entries) {
|
|
59
|
+
for (const entry of entries) {
|
|
60
|
+
const content = await this.resolveContent(entry.content, entry.loadFromFile);
|
|
61
|
+
if (entry.role === 'project') {
|
|
62
|
+
await fs.writeFile(path.join(root, 'CLAUDE.md'), content, 'utf-8');
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
const claudeDir = path.join(root, '.claude');
|
|
66
|
+
await fs.mkdir(claudeDir, { recursive: true });
|
|
67
|
+
await fs.writeFile(path.join(claudeDir, 'CLAUDE.md'), content, 'utf-8');
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// ─── Rules ──────────────────────────────────────────────────────────
|
|
72
|
+
async writeRules(root, rules) {
|
|
73
|
+
if (rules.length === 0)
|
|
74
|
+
return;
|
|
75
|
+
const rulesDir = path.join(root, '.claude', 'rules');
|
|
76
|
+
await fs.mkdir(rulesDir, { recursive: true });
|
|
77
|
+
for (const rule of rules) {
|
|
78
|
+
assertSafeName(rule.name, 'rule name');
|
|
79
|
+
const content = await this.resolveContent(rule.content, rule.loadFromFile);
|
|
80
|
+
await fs.writeFile(path.join(rulesDir, `${rule.name}.md`), content, 'utf-8');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// ─── Skills ─────────────────────────────────────────────────────────
|
|
84
|
+
async writeSkills(root, skills) {
|
|
85
|
+
if (skills.length === 0)
|
|
86
|
+
return;
|
|
87
|
+
const skillsBase = path.join(root, '.claude', 'skills');
|
|
88
|
+
for (const skill of skills) {
|
|
89
|
+
assertSafeName(skill.name, 'skill name');
|
|
90
|
+
const skillDir = path.join(skillsBase, skill.name);
|
|
91
|
+
await fs.mkdir(skillDir, { recursive: true });
|
|
92
|
+
const content = await this.resolveContent(skill.content, skill.loadFromFile);
|
|
93
|
+
await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8');
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// ─── Workspace files ───────────────────────────────────────────────
|
|
97
|
+
async writeWorkspaceFiles(root, files) {
|
|
98
|
+
for (const file of files) {
|
|
99
|
+
assertSafePath(file.path, 'workspace file path');
|
|
100
|
+
const target = path.join(root, file.path);
|
|
101
|
+
await fs.mkdir(path.dirname(target), { recursive: true });
|
|
102
|
+
await fs.writeFile(target, file.content, 'utf-8');
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// ─── Content resolution ────────────────────────────────────────────
|
|
106
|
+
async resolveContent(inlineContent, loadFromFile) {
|
|
107
|
+
if (loadFromFile) {
|
|
108
|
+
return fs.readFile(loadFromFile, 'utf-8');
|
|
109
|
+
}
|
|
110
|
+
return inlineContent;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=workspace.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"workspace.js","sourceRoot":"","sources":["../../../src/server/services/workspace.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,oEAAoE;AACpE,8EAA8E;AAE9E,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAIzC,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E,MAAM,mBAAoB,SAAQ,KAAK;IACrC,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,qBAAqB,CAAC;IACpC,CAAC;CACF;AAED,yDAAyD;AACzD,SAAS,cAAc,CAAC,KAAa,EAAE,KAAa;IAClD,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,mBAAmB,CAAC,GAAG,KAAK,kCAAkC,KAAK,EAAE,CAAC,CAAC;IACnF,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IACzC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC;QACxE,MAAM,IAAI,mBAAmB,CAAC,GAAG,KAAK,2BAA2B,KAAK,EAAE,CAAC,CAAC;IAC5E,CAAC;AACH,CAAC;AAED,SAAS,cAAc,CAAC,KAAa,EAAE,KAAa;IAClD,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC5E,MAAM,IAAI,mBAAmB,CAAC,GAAG,KAAK,gCAAgC,KAAK,EAAE,CAAC,CAAC;IACjF,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E,MAAM,OAAO,gBAAgB;IAC3B,KAAK,CAAC,eAAe,CAAC,QAAkB;QACtC,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,WAAW,UAAU,EAAE,EAAE,CAAC,CAAC;QACxE,MAAM,EAAE,CAAC,KAAK,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAEnD,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,kBAAkB,CAAC,aAAa,EAAE,QAAQ,CAAC,aAAa,CAAC,CAAC;YACrE,MAAM,IAAI,CAAC,UAAU,CAAC,aAAa,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrD,MAAM,IAAI,CAAC,WAAW,CAAC,aAAa,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YACvD,MAAM,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,QAAQ,CAAC,cAAc,CAAC,CAAC;QACzE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,qBAAqB;YACrB,MAAM,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;YAC7E,MAAM,GAAG,CAAC;QACZ,CAAC;QAED,OAAO;YACL,aAAa;YACb,OAAO,EAAE,KAAK,IAAI,EAAE;gBAClB,MAAM,EAAE,CAAC,EAAE,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/D,CAAC;SACF,CAAC;IACJ,CAAC;IAED,uEAAuE;IAE/D,KAAK,CAAC,kBAAkB,CAC9B,IAAY,EACZ,OAAiC;QAEjC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;YAE7E,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAC7B,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YACrE,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBAC7C,MAAM,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC/C,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IAE/D,KAAK,CAAC,UAAU,CACtB,IAAY,EACZ,KAA2B;QAE3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAE/B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,EAAE,CAAC,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE9C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YACvC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;YAC3E,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,IAAI,CAAC,IAAI,KAAK,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,uEAAuE;IAE/D,KAAK,CAAC,WAAW,CACvB,IAAY,EACZ,MAA6B;QAE7B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEhC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QAExD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,KAAK,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;YACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YACnD,MAAM,EAAE,CAAC,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAE9C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;YAC7E,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QACxE,CAAC;IACH,CAAC;IAED,sEAAsE;IAE9D,KAAK,CAAC,mBAAmB,CAC/B,IAAY,EACZ,KAA+B;QAE/B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;YAC1C,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC1D,MAAM,EAAE,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,sEAAsE;IAE9D,KAAK,CAAC,cAAc,CAC1B,aAAqB,EACrB,YAAqB;QAErB,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QAC5C,CAAC;QACD,OAAO,aAAa,CAAC;IACvB,CAAC;CACF"}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import type { ProviderConfig } from './provider.js';
|
|
2
|
+
/** Configuration for an evaluator agent. */
|
|
3
|
+
export interface EvaluatorConfig {
|
|
4
|
+
readonly provider: ProviderConfig;
|
|
5
|
+
readonly role: string;
|
|
6
|
+
}
|
|
7
|
+
/** Request to start an evaluation pipeline. */
|
|
8
|
+
export interface EvaluationRequest {
|
|
9
|
+
readonly runId: string;
|
|
10
|
+
readonly evaluators: readonly EvaluatorConfig[];
|
|
11
|
+
readonly maxRounds: number;
|
|
12
|
+
readonly maxBudgetUsd?: number;
|
|
13
|
+
}
|
|
14
|
+
/** Lifecycle status of an evaluation. */
|
|
15
|
+
export type EvaluationStatus = 'pending' | 'running' | 'completed' | 'failed';
|
|
16
|
+
/** How well the answer matches the expected answer. */
|
|
17
|
+
export interface AnswerComparison {
|
|
18
|
+
readonly matches: boolean;
|
|
19
|
+
readonly explanation: string;
|
|
20
|
+
readonly similarity: number;
|
|
21
|
+
}
|
|
22
|
+
/** Result for a single critical requirement. */
|
|
23
|
+
export interface CriticalPartResult {
|
|
24
|
+
readonly requirement: string;
|
|
25
|
+
readonly met: boolean;
|
|
26
|
+
readonly evidence: string;
|
|
27
|
+
}
|
|
28
|
+
/** Per-dimension score from one evaluator. */
|
|
29
|
+
export interface IndividualEvaluation {
|
|
30
|
+
readonly evaluatorRole: string;
|
|
31
|
+
readonly dimension: string;
|
|
32
|
+
readonly score: number;
|
|
33
|
+
readonly reasoning: string;
|
|
34
|
+
}
|
|
35
|
+
/** Did the agent follow the instructions in its CLAUDE.md / rules? */
|
|
36
|
+
export interface InstructionCompliance {
|
|
37
|
+
readonly followed: readonly string[];
|
|
38
|
+
readonly violated: readonly string[];
|
|
39
|
+
readonly notApplicable: readonly string[];
|
|
40
|
+
readonly overallCompliance: number;
|
|
41
|
+
}
|
|
42
|
+
/** Report on whether defined skills were invoked. */
|
|
43
|
+
export interface SkillUsageReport {
|
|
44
|
+
readonly skillName: string;
|
|
45
|
+
readonly invoked: boolean;
|
|
46
|
+
readonly invocationCount: number;
|
|
47
|
+
readonly contextRelevant: boolean;
|
|
48
|
+
}
|
|
49
|
+
/** Report on whether defined subagents were invoked. */
|
|
50
|
+
export interface SubagentUsageReport {
|
|
51
|
+
readonly subagentName: string;
|
|
52
|
+
readonly invoked: boolean;
|
|
53
|
+
readonly invocationCount: number;
|
|
54
|
+
readonly contextRelevant: boolean;
|
|
55
|
+
}
|
|
56
|
+
/** Aggregated report on how well the setup itself performed. */
|
|
57
|
+
export interface SetupComplianceReport {
|
|
58
|
+
readonly instructionCompliance: InstructionCompliance;
|
|
59
|
+
readonly skillUsage: readonly SkillUsageReport[];
|
|
60
|
+
readonly subagentUsage: readonly SubagentUsageReport[];
|
|
61
|
+
}
|
|
62
|
+
/** Ledger tracking cost/usage per evaluator. */
|
|
63
|
+
export interface EvaluatorLedger {
|
|
64
|
+
readonly evaluatorRole: string;
|
|
65
|
+
readonly totalCostUsd: number;
|
|
66
|
+
readonly totalTokensIn: number;
|
|
67
|
+
readonly totalTokensOut: number;
|
|
68
|
+
readonly roundsParticipated: number;
|
|
69
|
+
}
|
|
70
|
+
/** A single round of evaluation (there may be multiple rounds for consensus). */
|
|
71
|
+
export interface EvaluationRound {
|
|
72
|
+
readonly roundNumber: number;
|
|
73
|
+
readonly evaluations: readonly IndividualEvaluation[];
|
|
74
|
+
readonly consensusReached: boolean;
|
|
75
|
+
readonly timestamp: string;
|
|
76
|
+
}
|
|
77
|
+
/** Synthesised final scores across evaluators and rounds. */
|
|
78
|
+
export interface EvaluationSynthesis {
|
|
79
|
+
readonly dimensionScores: Readonly<Record<string, number>>;
|
|
80
|
+
readonly weightedTotal: number;
|
|
81
|
+
readonly confidence: number;
|
|
82
|
+
readonly dissenting: readonly string[];
|
|
83
|
+
}
|
|
84
|
+
/** Overall report on how effective a setup is across evaluations. */
|
|
85
|
+
export interface SetupEffectivenessReport {
|
|
86
|
+
readonly setupId: string;
|
|
87
|
+
readonly averageScore: number;
|
|
88
|
+
readonly scenarioBreakdown: Readonly<Record<string, number>>;
|
|
89
|
+
readonly strengths: readonly string[];
|
|
90
|
+
readonly weaknesses: readonly string[];
|
|
91
|
+
}
|
|
92
|
+
/** The full evaluation record persisted after the pipeline completes. */
|
|
93
|
+
export interface Evaluation {
|
|
94
|
+
readonly id: string;
|
|
95
|
+
readonly runId: string;
|
|
96
|
+
readonly status: EvaluationStatus;
|
|
97
|
+
readonly evaluators: readonly EvaluatorConfig[];
|
|
98
|
+
readonly rounds: readonly EvaluationRound[];
|
|
99
|
+
readonly answerComparison: AnswerComparison;
|
|
100
|
+
readonly criticalResults: readonly CriticalPartResult[];
|
|
101
|
+
readonly setupCompliance: SetupComplianceReport;
|
|
102
|
+
readonly synthesis: EvaluationSynthesis;
|
|
103
|
+
readonly ledger: readonly EvaluatorLedger[];
|
|
104
|
+
readonly totalCostUsd: number;
|
|
105
|
+
readonly createdAt: string;
|
|
106
|
+
readonly updatedAt: string;
|
|
107
|
+
}
|
|
108
|
+
//# sourceMappingURL=evaluation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../../../src/server/types/evaluation.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAEpD,4CAA4C;AAC5C,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,+CAA+C;AAC/C,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,SAAS,eAAe,EAAE,CAAC;IAChD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,yCAAyC;AACzC,MAAM,MAAM,gBAAgB,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;AAE9E,uDAAuD;AACvD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,gDAAgD;AAChD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAED,8CAA8C;AAC9C,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,sEAAsE;AACtE,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACpC;AAED,qDAAqD;AACrD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;CACnC;AAED,wDAAwD;AACxD,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;CACnC;AAED,gEAAgE;AAChE,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;IACtD,QAAQ,CAAC,UAAU,EAAE,SAAS,gBAAgB,EAAE,CAAC;IACjD,QAAQ,CAAC,aAAa,EAAE,SAAS,mBAAmB,EAAE,CAAC;CACxD;AAED,gDAAgD;AAChD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;CACrC;AAED,iFAAiF;AACjF,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,SAAS,oBAAoB,EAAE,CAAC;IACtD,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;IACnC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,6DAA6D;AAC7D,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,eAAe,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC3D,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,qEAAqE;AACrE,MAAM,WAAW,wBAAwB;IACvC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,iBAAiB,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAC7D,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAED,yEAAyE;AACzE,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC;IAClC,QAAQ,CAAC,UAAU,EAAE,SAAS,eAAe,EAAE,CAAC;IAChD,QAAQ,CAAC,MAAM,EAAE,SAAS,eAAe,EAAE,CAAC;IAC5C,QAAQ,CAAC,gBAAgB,EAAE,gBAAgB,CAAC;IAC5C,QAAQ,CAAC,eAAe,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACxD,QAAQ,CAAC,eAAe,EAAE,qBAAqB,CAAC;IAChD,QAAQ,CAAC,SAAS,EAAE,mBAAmB,CAAC;IACxC,QAAQ,CAAC,MAAM,EAAE,SAAS,eAAe,EAAE,CAAC;IAC5C,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluation.js","sourceRoot":"","sources":["../../../src/server/types/evaluation.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type { ApiProviderConfig, ClaudeMdEntry, EffortLevel, McpHttpConfig, McpServerConfig, McpServerEntry, McpSseConfig, McpStdioConfig, OAuthProviderConfig, PermissionMode, Provider, ProviderConfig, RuleEntry, ScoringDimension, SkillEntry, SubagentEntry, ThinkingConfig, } from './provider.js';
|
|
2
|
+
export type { Scenario, ScenarioCategory, WorkspaceFile } from './scenario.js';
|
|
3
|
+
export type { Run, RunStatus, SDKMessageRecord } from './run.js';
|
|
4
|
+
export type { AnswerComparison, CriticalPartResult, Evaluation, EvaluationRequest, EvaluationRound, EvaluationStatus, EvaluationSynthesis, EvaluatorConfig, EvaluatorLedger, IndividualEvaluation, InstructionCompliance, SetupComplianceReport, SetupEffectivenessReport, SkillUsageReport, SubagentUsageReport, } from './evaluation.js';
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/server/types/index.ts"],"names":[],"mappings":"AAIA,YAAY,EACV,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,aAAa,EACb,eAAe,EACf,cAAc,EACd,YAAY,EACZ,cAAc,EACd,mBAAmB,EACnB,cAAc,EACd,QAAQ,EACR,cAAc,EACd,SAAS,EACT,gBAAgB,EAChB,UAAU,EACV,aAAa,EACb,cAAc,GACf,MAAM,eAAe,CAAC;AAEvB,YAAY,EAAE,QAAQ,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAE/E,YAAY,EAAE,GAAG,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjE,YAAY,EACV,gBAAgB,EAChB,kBAAkB,EAClB,UAAU,EACV,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,wBAAwB,EACxB,gBAAgB,EAChB,mBAAmB,GACpB,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/server/types/index.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/** Discriminated union for provider authentication strategies. */
|
|
2
|
+
export type ProviderConfig = ApiProviderConfig | OAuthProviderConfig;
|
|
3
|
+
export interface ApiProviderConfig {
|
|
4
|
+
readonly kind: 'api';
|
|
5
|
+
readonly baseUrl: string;
|
|
6
|
+
readonly apiKey: string;
|
|
7
|
+
readonly model: string;
|
|
8
|
+
}
|
|
9
|
+
export interface OAuthProviderConfig {
|
|
10
|
+
readonly kind: 'oauth';
|
|
11
|
+
readonly oauthToken: string;
|
|
12
|
+
readonly model: string;
|
|
13
|
+
}
|
|
14
|
+
/** Discriminated union for thinking configuration. */
|
|
15
|
+
export type ThinkingConfig = {
|
|
16
|
+
readonly kind: 'adaptive';
|
|
17
|
+
} | {
|
|
18
|
+
readonly kind: 'enabled';
|
|
19
|
+
readonly budgetTokens: number;
|
|
20
|
+
} | {
|
|
21
|
+
readonly kind: 'disabled';
|
|
22
|
+
};
|
|
23
|
+
/** A CLAUDE.md entry that can be inlined or loaded from a file. */
|
|
24
|
+
export interface ClaudeMdEntry {
|
|
25
|
+
readonly role: 'project' | 'user';
|
|
26
|
+
readonly content: string;
|
|
27
|
+
readonly loadFromFile?: string;
|
|
28
|
+
}
|
|
29
|
+
/** A named rule entry. */
|
|
30
|
+
export interface RuleEntry {
|
|
31
|
+
readonly name: string;
|
|
32
|
+
readonly content: string;
|
|
33
|
+
readonly loadFromFile?: string;
|
|
34
|
+
}
|
|
35
|
+
/** A named skill entry. */
|
|
36
|
+
export interface SkillEntry {
|
|
37
|
+
readonly name: string;
|
|
38
|
+
readonly content: string;
|
|
39
|
+
readonly loadFromFile?: string;
|
|
40
|
+
}
|
|
41
|
+
/** A named subagent definition. */
|
|
42
|
+
export interface SubagentEntry {
|
|
43
|
+
readonly name: string;
|
|
44
|
+
readonly description: string;
|
|
45
|
+
readonly prompt: string;
|
|
46
|
+
readonly tools?: readonly string[];
|
|
47
|
+
readonly disallowedTools?: readonly string[];
|
|
48
|
+
readonly model?: string;
|
|
49
|
+
readonly mcpServers?: readonly string[];
|
|
50
|
+
readonly skills?: readonly string[];
|
|
51
|
+
readonly maxTurns?: number;
|
|
52
|
+
readonly loadFromFile?: string;
|
|
53
|
+
}
|
|
54
|
+
/** MCP server transport configs — discriminated on `transport`. */
|
|
55
|
+
export interface McpStdioConfig {
|
|
56
|
+
readonly transport: 'stdio';
|
|
57
|
+
readonly command: string;
|
|
58
|
+
readonly args?: readonly string[];
|
|
59
|
+
readonly env?: Readonly<Record<string, string>>;
|
|
60
|
+
}
|
|
61
|
+
export interface McpHttpConfig {
|
|
62
|
+
readonly transport: 'http';
|
|
63
|
+
readonly url: string;
|
|
64
|
+
readonly headers?: Readonly<Record<string, string>>;
|
|
65
|
+
}
|
|
66
|
+
export interface McpSseConfig {
|
|
67
|
+
readonly transport: 'sse';
|
|
68
|
+
readonly url: string;
|
|
69
|
+
readonly headers?: Readonly<Record<string, string>>;
|
|
70
|
+
}
|
|
71
|
+
export type McpServerConfig = McpStdioConfig | McpHttpConfig | McpSseConfig;
|
|
72
|
+
/** A named MCP server entry. */
|
|
73
|
+
export interface McpServerEntry {
|
|
74
|
+
readonly name: string;
|
|
75
|
+
readonly config: McpServerConfig;
|
|
76
|
+
}
|
|
77
|
+
/** Permission mode passed to the SDK. */
|
|
78
|
+
export type PermissionMode = 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk';
|
|
79
|
+
/** Effort level for Claude. 'none' = not applicable (e.g. non-Anthropic providers). */
|
|
80
|
+
export type EffortLevel = 'none' | 'low' | 'medium' | 'high';
|
|
81
|
+
/** Scoring dimension used to evaluate a run. */
|
|
82
|
+
export interface ScoringDimension {
|
|
83
|
+
readonly name: string;
|
|
84
|
+
readonly weight: number;
|
|
85
|
+
readonly description: string;
|
|
86
|
+
}
|
|
87
|
+
/** A provider configuration that defines how to connect to an LLM provider. */
|
|
88
|
+
export interface Provider {
|
|
89
|
+
readonly id: string;
|
|
90
|
+
readonly name: string;
|
|
91
|
+
readonly description: string;
|
|
92
|
+
readonly provider: ProviderConfig;
|
|
93
|
+
readonly thinking?: ThinkingConfig;
|
|
94
|
+
readonly effort?: EffortLevel;
|
|
95
|
+
readonly timeoutSeconds: number;
|
|
96
|
+
readonly createdAt: string;
|
|
97
|
+
readonly updatedAt: string;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider.d.ts","sourceRoot":"","sources":["../../../src/server/types/provider.ts"],"names":[],"mappings":"AAIA,kEAAkE;AAClE,MAAM,MAAM,cAAc,GAAG,iBAAiB,GAAG,mBAAmB,CAAC;AAErE,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;IACrB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED,sDAAsD;AACtD,MAAM,MAAM,cAAc,GACtB;IAAE,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAA;CAAE,GAC7B;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAA;CAAE,GAC3D;IAAE,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAA;CAAE,CAAC;AAElC,mEAAmE;AACnE,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;IAClC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,0BAA0B;AAC1B,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,2BAA2B;AAC3B,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,mCAAmC;AACnC,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,eAAe,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC7C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACpC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,mEAAmE;AACnE,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAClC,QAAQ,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACjD;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACrD;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACrD;AAED,MAAM,MAAM,eAAe,GAAG,cAAc,GAAG,aAAa,GAAG,YAAY,CAAC;AAE5E,gCAAgC;AAChC,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,eAAe,CAAC;CAClC;AAED,yCAAyC;AACzC,MAAM,MAAM,cAAc,GACtB,SAAS,GACT,aAAa,GACb,mBAAmB,GACnB,MAAM,GACN,SAAS,CAAC;AAEd,uFAAuF;AACvF,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAE7D,gDAAgD;AAChD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,+EAA+E;AAC/E,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,QAAQ,CAAC,EAAE,cAAc,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provider.js","sourceRoot":"","sources":["../../../src/server/types/provider.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Provider } from './provider.js';
|
|
2
|
+
import type { Scenario } from './scenario.js';
|
|
3
|
+
/** A raw SDK message captured during a run. */
|
|
4
|
+
export interface SDKMessageRecord {
|
|
5
|
+
readonly timestamp: string;
|
|
6
|
+
readonly message: Readonly<Record<string, unknown>>;
|
|
7
|
+
}
|
|
8
|
+
/** Lifecycle status of a run. */
|
|
9
|
+
export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
10
|
+
/** A single run: one provider + one scenario + captured output. */
|
|
11
|
+
export interface Run {
|
|
12
|
+
readonly id: string;
|
|
13
|
+
readonly providerId: string;
|
|
14
|
+
readonly scenarioId: string;
|
|
15
|
+
readonly status: RunStatus;
|
|
16
|
+
readonly providerSnapshot: Provider;
|
|
17
|
+
readonly scenarioSnapshot: Scenario;
|
|
18
|
+
readonly messages: readonly SDKMessageRecord[];
|
|
19
|
+
readonly resultText: string;
|
|
20
|
+
readonly totalCostUsd: number;
|
|
21
|
+
readonly durationMs: number;
|
|
22
|
+
readonly numTurns: number;
|
|
23
|
+
readonly error?: string;
|
|
24
|
+
readonly reviewerProviderIds?: readonly string[];
|
|
25
|
+
readonly reviewerProviderSnapshots?: readonly Provider[];
|
|
26
|
+
readonly maxEvalRounds?: number;
|
|
27
|
+
readonly evaluationId?: string;
|
|
28
|
+
readonly createdAt: string;
|
|
29
|
+
readonly updatedAt: string;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=run.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/server/types/run.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAE9C,+CAA+C;AAC/C,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACrD;AAED,iCAAiC;AACjC,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,GAAG,WAAW,CAAC;AAErF,mEAAmE;AACnE,MAAM,WAAW,GAAG;IAClB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC;IAC3B,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,gBAAgB,EAAE,CAAC;IAC/C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAExB,QAAQ,CAAC,mBAAmB,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,yBAAyB,CAAC,EAAE,SAAS,QAAQ,EAAE,CAAC;IACzD,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/server/types/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type { ClaudeMdEntry, McpServerEntry, PermissionMode, RuleEntry, ScoringDimension, SkillEntry, SubagentEntry } from './provider.js';
|
|
2
|
+
/** Built-in category labels for scenarios (behavior/planning focused). */
|
|
3
|
+
export type ScenarioCategory = 'planning' | 'instruction-following' | 'reasoning' | 'tool-strategy' | 'error-handling' | 'ambiguity-handling' | 'scope-management' | 'custom';
|
|
4
|
+
/** A file that should exist in the workspace before the scenario runs. */
|
|
5
|
+
export interface WorkspaceFile {
|
|
6
|
+
readonly path: string;
|
|
7
|
+
readonly content: string;
|
|
8
|
+
}
|
|
9
|
+
/** A scenario defines what to test, agent config, and how to grade. */
|
|
10
|
+
export interface Scenario {
|
|
11
|
+
readonly id: string;
|
|
12
|
+
readonly name: string;
|
|
13
|
+
readonly category: ScenarioCategory;
|
|
14
|
+
readonly claudeMdFiles: readonly ClaudeMdEntry[];
|
|
15
|
+
readonly rules: readonly RuleEntry[];
|
|
16
|
+
readonly skills: readonly SkillEntry[];
|
|
17
|
+
readonly subagents: readonly SubagentEntry[];
|
|
18
|
+
readonly mcpServers: readonly McpServerEntry[];
|
|
19
|
+
readonly permissionMode: PermissionMode;
|
|
20
|
+
readonly maxTurns?: number;
|
|
21
|
+
readonly allowedTools?: readonly string[];
|
|
22
|
+
readonly disallowedTools?: readonly string[];
|
|
23
|
+
readonly prompt: string;
|
|
24
|
+
readonly workspaceFiles: readonly WorkspaceFile[];
|
|
25
|
+
readonly expectedAnswer: string;
|
|
26
|
+
readonly criticalRequirements: readonly string[];
|
|
27
|
+
readonly gradingGuidelines: string;
|
|
28
|
+
readonly scoringDimensions: readonly ScoringDimension[];
|
|
29
|
+
readonly createdAt: string;
|
|
30
|
+
readonly updatedAt: string;
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=scenario.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario.d.ts","sourceRoot":"","sources":["../../../src/server/types/scenario.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,aAAa,EACb,cAAc,EACd,cAAc,EACd,SAAS,EACT,gBAAgB,EAChB,UAAU,EACV,aAAa,EACd,MAAM,eAAe,CAAC;AAEvB,0EAA0E;AAC1E,MAAM,MAAM,gBAAgB,GACxB,UAAU,GACV,uBAAuB,GACvB,WAAW,GACX,eAAe,GACf,gBAAgB,GAChB,oBAAoB,GACpB,kBAAkB,GAClB,QAAQ,CAAC;AAEb,0EAA0E;AAC1E,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,uEAAuE;AACvE,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,CAAC;IAEpC,QAAQ,CAAC,aAAa,EAAE,SAAS,aAAa,EAAE,CAAC;IACjD,QAAQ,CAAC,KAAK,EAAE,SAAS,SAAS,EAAE,CAAC;IACrC,QAAQ,CAAC,MAAM,EAAE,SAAS,UAAU,EAAE,CAAC;IACvC,QAAQ,CAAC,SAAS,EAAE,SAAS,aAAa,EAAE,CAAC;IAC7C,QAAQ,CAAC,UAAU,EAAE,SAAS,cAAc,EAAE,CAAC;IAC/C,QAAQ,CAAC,cAAc,EAAE,cAAc,CAAC;IACxC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,eAAe,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAE7C,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,cAAc,EAAE,SAAS,aAAa,EAAE,CAAC;IAElD,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,iBAAiB,EAAE,SAAS,gBAAgB,EAAE,CAAC;IACxD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario.js","sourceRoot":"","sources":["../../../src/server/types/scenario.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/** Discriminated union for provider authentication strategies. */
|
|
2
|
+
export type ProviderConfig = ApiProviderConfig | OAuthProviderConfig;
|
|
3
|
+
export interface ApiProviderConfig {
|
|
4
|
+
readonly kind: 'api';
|
|
5
|
+
readonly baseUrl: string;
|
|
6
|
+
readonly apiKey: string;
|
|
7
|
+
readonly model: string;
|
|
8
|
+
}
|
|
9
|
+
export interface OAuthProviderConfig {
|
|
10
|
+
readonly kind: 'oauth';
|
|
11
|
+
readonly oauthToken: string;
|
|
12
|
+
readonly model: string;
|
|
13
|
+
}
|
|
14
|
+
/** Discriminated union for thinking configuration. */
|
|
15
|
+
export type ThinkingConfig = {
|
|
16
|
+
readonly kind: 'adaptive';
|
|
17
|
+
} | {
|
|
18
|
+
readonly kind: 'enabled';
|
|
19
|
+
readonly budgetTokens: number;
|
|
20
|
+
} | {
|
|
21
|
+
readonly kind: 'disabled';
|
|
22
|
+
};
|
|
23
|
+
/** A CLAUDE.md entry that can be inlined or loaded from a file. */
|
|
24
|
+
export interface ClaudeMdEntry {
|
|
25
|
+
readonly role: 'project' | 'user';
|
|
26
|
+
readonly content: string;
|
|
27
|
+
readonly loadFromFile?: string;
|
|
28
|
+
}
|
|
29
|
+
/** A named rule entry. */
|
|
30
|
+
export interface RuleEntry {
|
|
31
|
+
readonly name: string;
|
|
32
|
+
readonly content: string;
|
|
33
|
+
readonly loadFromFile?: string;
|
|
34
|
+
}
|
|
35
|
+
/** A named skill entry. */
|
|
36
|
+
export interface SkillEntry {
|
|
37
|
+
readonly name: string;
|
|
38
|
+
readonly content: string;
|
|
39
|
+
readonly loadFromFile?: string;
|
|
40
|
+
}
|
|
41
|
+
/** A named subagent definition. */
|
|
42
|
+
export interface SubagentEntry {
|
|
43
|
+
readonly name: string;
|
|
44
|
+
readonly description: string;
|
|
45
|
+
readonly prompt: string;
|
|
46
|
+
readonly tools?: readonly string[];
|
|
47
|
+
readonly disallowedTools?: readonly string[];
|
|
48
|
+
readonly model?: string;
|
|
49
|
+
readonly mcpServers?: readonly string[];
|
|
50
|
+
readonly skills?: readonly string[];
|
|
51
|
+
readonly maxTurns?: number;
|
|
52
|
+
readonly loadFromFile?: string;
|
|
53
|
+
}
|
|
54
|
+
/** MCP server transport configs — discriminated on `transport`. */
|
|
55
|
+
export interface McpStdioConfig {
|
|
56
|
+
readonly transport: 'stdio';
|
|
57
|
+
readonly command: string;
|
|
58
|
+
readonly args?: readonly string[];
|
|
59
|
+
readonly env?: Readonly<Record<string, string>>;
|
|
60
|
+
}
|
|
61
|
+
export interface McpHttpConfig {
|
|
62
|
+
readonly transport: 'http';
|
|
63
|
+
readonly url: string;
|
|
64
|
+
readonly headers?: Readonly<Record<string, string>>;
|
|
65
|
+
}
|
|
66
|
+
export interface McpSseConfig {
|
|
67
|
+
readonly transport: 'sse';
|
|
68
|
+
readonly url: string;
|
|
69
|
+
readonly headers?: Readonly<Record<string, string>>;
|
|
70
|
+
}
|
|
71
|
+
export type McpServerConfig = McpStdioConfig | McpHttpConfig | McpSseConfig;
|
|
72
|
+
/** A named MCP server entry. */
|
|
73
|
+
export interface McpServerEntry {
|
|
74
|
+
readonly name: string;
|
|
75
|
+
readonly config: McpServerConfig;
|
|
76
|
+
}
|
|
77
|
+
/** Permission mode passed to the SDK. */
|
|
78
|
+
export type PermissionMode = 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk';
|
|
79
|
+
/** Effort level for Claude. 'none' = not applicable (e.g. non-Anthropic providers). */
|
|
80
|
+
export type EffortLevel = 'none' | 'low' | 'medium' | 'high';
|
|
81
|
+
/** Scoring dimension used to evaluate a run. */
|
|
82
|
+
export interface ScoringDimension {
|
|
83
|
+
readonly name: string;
|
|
84
|
+
readonly weight: number;
|
|
85
|
+
readonly description: string;
|
|
86
|
+
}
|
|
87
|
+
/** A complete test setup that defines how to connect to the provider. */
|
|
88
|
+
export interface TestSetup {
|
|
89
|
+
readonly id: string;
|
|
90
|
+
readonly name: string;
|
|
91
|
+
readonly description: string;
|
|
92
|
+
readonly provider: ProviderConfig;
|
|
93
|
+
readonly thinking?: ThinkingConfig;
|
|
94
|
+
readonly effort?: EffortLevel;
|
|
95
|
+
readonly timeoutSeconds: number;
|
|
96
|
+
readonly createdAt: string;
|
|
97
|
+
readonly updatedAt: string;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=setup.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"setup.d.ts","sourceRoot":"","sources":["../../../src/server/types/setup.ts"],"names":[],"mappings":"AAIA,kEAAkE;AAClE,MAAM,MAAM,cAAc,GAAG,iBAAiB,GAAG,mBAAmB,CAAC;AAErE,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC;IACrB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED,sDAAsD;AACtD,MAAM,MAAM,cAAc,GACtB;IAAE,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAA;CAAE,GAC7B;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IAAC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAA;CAAE,GAC3D;IAAE,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAA;CAAE,CAAC;AAElC,mEAAmE;AACnE,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;IAClC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,0BAA0B;AAC1B,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,2BAA2B;AAC3B,MAAM,WAAW,UAAU;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,mCAAmC;AACnC,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,eAAe,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC7C,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACpC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,mEAAmE;AACnE,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;IAC5B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAClC,QAAQ,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACjD;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACrD;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,OAAO,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;CACrD;AAED,MAAM,MAAM,eAAe,GAAG,cAAc,GAAG,aAAa,GAAG,YAAY,CAAC;AAE5E,gCAAgC;AAChC,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,eAAe,CAAC;CAClC;AAED,yCAAyC;AACzC,MAAM,MAAM,cAAc,GACtB,SAAS,GACT,aAAa,GACb,mBAAmB,GACnB,MAAM,GACN,SAAS,CAAC;AAEd,uFAAuF;AACvF,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAE7D,gDAAgD;AAChD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,yEAAyE;AACzE,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,QAAQ,CAAC,EAAE,cAAc,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"setup.js","sourceRoot":"","sources":["../../../src/server/types/setup.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import express from 'express';
|
|
2
|
+
import type { IStorage } from './interfaces/storage.js';
|
|
3
|
+
import type { ILogger } from './interfaces/logger.js';
|
|
4
|
+
import type { IRunner } from './interfaces/runner.js';
|
|
5
|
+
import type { IEvaluator } from './interfaces/evaluator.js';
|
|
6
|
+
export interface AppDeps {
|
|
7
|
+
storage: IStorage;
|
|
8
|
+
logger: ILogger;
|
|
9
|
+
runner?: IRunner;
|
|
10
|
+
evaluator?: IEvaluator;
|
|
11
|
+
}
|
|
12
|
+
export declare function createApp(deps: AppDeps): express.Express;
|
|
13
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/server/index.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAK9B,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AAQ5D,MAAM,WAAW,OAAO;IACtB,OAAO,EAAE,QAAQ,CAAC;IAClB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,UAAU,CAAC;CACxB;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,CAgExD"}
|