claude-test-bench 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +474 -0
- package/dist/bin/ctb.d.ts +3 -0
- package/dist/bin/ctb.d.ts.map +1 -0
- package/dist/bin/ctb.js +107 -0
- package/dist/bin/ctb.js.map +1 -0
- package/dist/server/index.d.ts +13 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +72 -0
- package/dist/server/index.js.map +1 -0
- package/dist/server/interfaces/evaluator.d.ts +15 -0
- package/dist/server/interfaces/evaluator.d.ts.map +1 -0
- package/dist/server/interfaces/evaluator.js +2 -0
- package/dist/server/interfaces/evaluator.js.map +1 -0
- package/dist/server/interfaces/logger.d.ts +9 -0
- package/dist/server/interfaces/logger.d.ts.map +1 -0
- package/dist/server/interfaces/logger.js +2 -0
- package/dist/server/interfaces/logger.js.map +1 -0
- package/dist/server/interfaces/runner.d.ts +9 -0
- package/dist/server/interfaces/runner.d.ts.map +1 -0
- package/dist/server/interfaces/runner.js +2 -0
- package/dist/server/interfaces/runner.js.map +1 -0
- package/dist/server/interfaces/storage.d.ts +36 -0
- package/dist/server/interfaces/storage.d.ts.map +1 -0
- package/dist/server/interfaces/storage.js +2 -0
- package/dist/server/interfaces/storage.js.map +1 -0
- package/dist/server/interfaces/workspace.d.ts +9 -0
- package/dist/server/interfaces/workspace.d.ts.map +1 -0
- package/dist/server/interfaces/workspace.js +2 -0
- package/dist/server/interfaces/workspace.js.map +1 -0
- package/dist/server/routes/eval-queue.d.ts +23 -0
- package/dist/server/routes/eval-queue.d.ts.map +1 -0
- package/dist/server/routes/eval-queue.js +45 -0
- package/dist/server/routes/eval-queue.js.map +1 -0
- package/dist/server/routes/evaluations.d.ts +8 -0
- package/dist/server/routes/evaluations.d.ts.map +1 -0
- package/dist/server/routes/evaluations.js +217 -0
- package/dist/server/routes/evaluations.js.map +1 -0
- package/dist/server/routes/providers.d.ts +5 -0
- package/dist/server/routes/providers.d.ts.map +1 -0
- package/dist/server/routes/providers.js +194 -0
- package/dist/server/routes/providers.js.map +1 -0
- package/dist/server/routes/run-queue.d.ts +17 -0
- package/dist/server/routes/run-queue.d.ts.map +1 -0
- package/dist/server/routes/run-queue.js +34 -0
- package/dist/server/routes/run-queue.js.map +1 -0
- package/dist/server/routes/run-sse.d.ts +18 -0
- package/dist/server/routes/run-sse.d.ts.map +1 -0
- package/dist/server/routes/run-sse.js +57 -0
- package/dist/server/routes/run-sse.js.map +1 -0
- package/dist/server/routes/runs.d.ts +9 -0
- package/dist/server/routes/runs.d.ts.map +1 -0
- package/dist/server/routes/runs.js +379 -0
- package/dist/server/routes/runs.js.map +1 -0
- package/dist/server/routes/scenarios.d.ts +5 -0
- package/dist/server/routes/scenarios.d.ts.map +1 -0
- package/dist/server/routes/scenarios.js +209 -0
- package/dist/server/routes/scenarios.js.map +1 -0
- package/dist/server/routes/setups.d.ts +5 -0
- package/dist/server/routes/setups.d.ts.map +1 -0
- package/dist/server/routes/setups.js +194 -0
- package/dist/server/routes/setups.js.map +1 -0
- package/dist/server/services/agent-mapper.d.ts +12 -0
- package/dist/server/services/agent-mapper.d.ts.map +1 -0
- package/dist/server/services/agent-mapper.js +75 -0
- package/dist/server/services/agent-mapper.js.map +1 -0
- package/dist/server/services/env-builder.d.ts +10 -0
- package/dist/server/services/env-builder.d.ts.map +1 -0
- package/dist/server/services/env-builder.js +50 -0
- package/dist/server/services/env-builder.js.map +1 -0
- package/dist/server/services/eval-helpers.d.ts +22 -0
- package/dist/server/services/eval-helpers.d.ts.map +1 -0
- package/dist/server/services/eval-helpers.js +75 -0
- package/dist/server/services/eval-helpers.js.map +1 -0
- package/dist/server/services/eval-parsers-debate-impl.d.ts +11 -0
- package/dist/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
- package/dist/server/services/eval-parsers-debate-impl.js +133 -0
- package/dist/server/services/eval-parsers-debate-impl.js.map +1 -0
- package/dist/server/services/eval-parsers.d.ts +24 -0
- package/dist/server/services/eval-parsers.d.ts.map +1 -0
- package/dist/server/services/eval-parsers.js +153 -0
- package/dist/server/services/eval-parsers.js.map +1 -0
- package/dist/server/services/eval-prompts.d.ts +9 -0
- package/dist/server/services/eval-prompts.d.ts.map +1 -0
- package/dist/server/services/eval-prompts.js +170 -0
- package/dist/server/services/eval-prompts.js.map +1 -0
- package/dist/server/services/evaluator.d.ts +10 -0
- package/dist/server/services/evaluator.d.ts.map +1 -0
- package/dist/server/services/evaluator.js +156 -0
- package/dist/server/services/evaluator.js.map +1 -0
- package/dist/server/services/fs-adapter.d.ts +20 -0
- package/dist/server/services/fs-adapter.d.ts.map +1 -0
- package/dist/server/services/fs-adapter.js +13 -0
- package/dist/server/services/fs-adapter.js.map +1 -0
- package/dist/server/services/instruction-parser.d.ts +26 -0
- package/dist/server/services/instruction-parser.d.ts.map +1 -0
- package/dist/server/services/instruction-parser.js +121 -0
- package/dist/server/services/instruction-parser.js.map +1 -0
- package/dist/server/services/log-rotator.d.ts +20 -0
- package/dist/server/services/log-rotator.d.ts.map +1 -0
- package/dist/server/services/log-rotator.js +60 -0
- package/dist/server/services/log-rotator.js.map +1 -0
- package/dist/server/services/logger.d.ts +15 -0
- package/dist/server/services/logger.d.ts.map +1 -0
- package/dist/server/services/logger.js +69 -0
- package/dist/server/services/logger.js.map +1 -0
- package/dist/server/services/runner.d.ts +12 -0
- package/dist/server/services/runner.d.ts.map +1 -0
- package/dist/server/services/runner.js +161 -0
- package/dist/server/services/runner.js.map +1 -0
- package/dist/server/services/seeder.d.ts +5 -0
- package/dist/server/services/seeder.d.ts.map +1 -0
- package/dist/server/services/seeder.js +79 -0
- package/dist/server/services/seeder.js.map +1 -0
- package/dist/server/services/storage-test-helpers.d.ts +21 -0
- package/dist/server/services/storage-test-helpers.d.ts.map +1 -0
- package/dist/server/services/storage-test-helpers.js +158 -0
- package/dist/server/services/storage-test-helpers.js.map +1 -0
- package/dist/server/services/storage.d.ts +35 -0
- package/dist/server/services/storage.d.ts.map +1 -0
- package/dist/server/services/storage.js +219 -0
- package/dist/server/services/storage.js.map +1 -0
- package/dist/server/services/transcript-formatter.d.ts +18 -0
- package/dist/server/services/transcript-formatter.d.ts.map +1 -0
- package/dist/server/services/transcript-formatter.js +156 -0
- package/dist/server/services/transcript-formatter.js.map +1 -0
- package/dist/server/services/workspace.d.ts +11 -0
- package/dist/server/services/workspace.d.ts.map +1 -0
- package/dist/server/services/workspace.js +113 -0
- package/dist/server/services/workspace.js.map +1 -0
- package/dist/server/types/evaluation.d.ts +108 -0
- package/dist/server/types/evaluation.d.ts.map +1 -0
- package/dist/server/types/evaluation.js +5 -0
- package/dist/server/types/evaluation.js.map +1 -0
- package/dist/server/types/index.d.ts +5 -0
- package/dist/server/types/index.d.ts.map +1 -0
- package/dist/server/types/index.js +5 -0
- package/dist/server/types/index.js.map +1 -0
- package/dist/server/types/provider.d.ts +99 -0
- package/dist/server/types/provider.d.ts.map +1 -0
- package/dist/server/types/provider.js +5 -0
- package/dist/server/types/provider.js.map +1 -0
- package/dist/server/types/run.d.ts +31 -0
- package/dist/server/types/run.d.ts.map +1 -0
- package/dist/server/types/run.js +5 -0
- package/dist/server/types/run.js.map +1 -0
- package/dist/server/types/scenario.d.ts +32 -0
- package/dist/server/types/scenario.d.ts.map +1 -0
- package/dist/server/types/scenario.js +5 -0
- package/dist/server/types/scenario.js.map +1 -0
- package/dist/server/types/setup.d.ts +99 -0
- package/dist/server/types/setup.d.ts.map +1 -0
- package/dist/server/types/setup.js +5 -0
- package/dist/server/types/setup.js.map +1 -0
- package/dist/src/server/index.d.ts +13 -0
- package/dist/src/server/index.d.ts.map +1 -0
- package/dist/src/server/index.js +72 -0
- package/dist/src/server/index.js.map +1 -0
- package/dist/src/server/interfaces/evaluator.d.ts +15 -0
- package/dist/src/server/interfaces/evaluator.d.ts.map +1 -0
- package/dist/src/server/interfaces/evaluator.js +2 -0
- package/dist/src/server/interfaces/evaluator.js.map +1 -0
- package/dist/src/server/interfaces/logger.d.ts +9 -0
- package/dist/src/server/interfaces/logger.d.ts.map +1 -0
- package/dist/src/server/interfaces/logger.js +2 -0
- package/dist/src/server/interfaces/logger.js.map +1 -0
- package/dist/src/server/interfaces/runner.d.ts +9 -0
- package/dist/src/server/interfaces/runner.d.ts.map +1 -0
- package/dist/src/server/interfaces/runner.js +2 -0
- package/dist/src/server/interfaces/runner.js.map +1 -0
- package/dist/src/server/interfaces/storage.d.ts +36 -0
- package/dist/src/server/interfaces/storage.d.ts.map +1 -0
- package/dist/src/server/interfaces/storage.js +2 -0
- package/dist/src/server/interfaces/storage.js.map +1 -0
- package/dist/src/server/interfaces/workspace.d.ts +9 -0
- package/dist/src/server/interfaces/workspace.d.ts.map +1 -0
- package/dist/src/server/interfaces/workspace.js +2 -0
- package/dist/src/server/interfaces/workspace.js.map +1 -0
- package/dist/src/server/routes/eval-queue.d.ts +23 -0
- package/dist/src/server/routes/eval-queue.d.ts.map +1 -0
- package/dist/src/server/routes/eval-queue.js +45 -0
- package/dist/src/server/routes/eval-queue.js.map +1 -0
- package/dist/src/server/routes/evaluations.d.ts +8 -0
- package/dist/src/server/routes/evaluations.d.ts.map +1 -0
- package/dist/src/server/routes/evaluations.js +217 -0
- package/dist/src/server/routes/evaluations.js.map +1 -0
- package/dist/src/server/routes/providers.d.ts +5 -0
- package/dist/src/server/routes/providers.d.ts.map +1 -0
- package/dist/src/server/routes/providers.js +194 -0
- package/dist/src/server/routes/providers.js.map +1 -0
- package/dist/src/server/routes/run-queue.d.ts +17 -0
- package/dist/src/server/routes/run-queue.d.ts.map +1 -0
- package/dist/src/server/routes/run-queue.js +34 -0
- package/dist/src/server/routes/run-queue.js.map +1 -0
- package/dist/src/server/routes/run-sse.d.ts +18 -0
- package/dist/src/server/routes/run-sse.d.ts.map +1 -0
- package/dist/src/server/routes/run-sse.js +57 -0
- package/dist/src/server/routes/run-sse.js.map +1 -0
- package/dist/src/server/routes/runs.d.ts +9 -0
- package/dist/src/server/routes/runs.d.ts.map +1 -0
- package/dist/src/server/routes/runs.js +379 -0
- package/dist/src/server/routes/runs.js.map +1 -0
- package/dist/src/server/routes/scenarios.d.ts +5 -0
- package/dist/src/server/routes/scenarios.d.ts.map +1 -0
- package/dist/src/server/routes/scenarios.js +209 -0
- package/dist/src/server/routes/scenarios.js.map +1 -0
- package/dist/src/server/routes/setups.d.ts +5 -0
- package/dist/src/server/routes/setups.d.ts.map +1 -0
- package/dist/src/server/routes/setups.js +194 -0
- package/dist/src/server/routes/setups.js.map +1 -0
- package/dist/src/server/services/agent-mapper.d.ts +12 -0
- package/dist/src/server/services/agent-mapper.d.ts.map +1 -0
- package/dist/src/server/services/agent-mapper.js +75 -0
- package/dist/src/server/services/agent-mapper.js.map +1 -0
- package/dist/src/server/services/env-builder.d.ts +10 -0
- package/dist/src/server/services/env-builder.d.ts.map +1 -0
- package/dist/src/server/services/env-builder.js +50 -0
- package/dist/src/server/services/env-builder.js.map +1 -0
- package/dist/src/server/services/eval-helpers.d.ts +22 -0
- package/dist/src/server/services/eval-helpers.d.ts.map +1 -0
- package/dist/src/server/services/eval-helpers.js +75 -0
- package/dist/src/server/services/eval-helpers.js.map +1 -0
- package/dist/src/server/services/eval-parsers-debate-impl.d.ts +11 -0
- package/dist/src/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
- package/dist/src/server/services/eval-parsers-debate-impl.js +133 -0
- package/dist/src/server/services/eval-parsers-debate-impl.js.map +1 -0
- package/dist/src/server/services/eval-parsers.d.ts +24 -0
- package/dist/src/server/services/eval-parsers.d.ts.map +1 -0
- package/dist/src/server/services/eval-parsers.js +153 -0
- package/dist/src/server/services/eval-parsers.js.map +1 -0
- package/dist/src/server/services/eval-prompts.d.ts +9 -0
- package/dist/src/server/services/eval-prompts.d.ts.map +1 -0
- package/dist/src/server/services/eval-prompts.js +170 -0
- package/dist/src/server/services/eval-prompts.js.map +1 -0
- package/dist/src/server/services/evaluator.d.ts +10 -0
- package/dist/src/server/services/evaluator.d.ts.map +1 -0
- package/dist/src/server/services/evaluator.js +156 -0
- package/dist/src/server/services/evaluator.js.map +1 -0
- package/dist/src/server/services/fs-adapter.d.ts +20 -0
- package/dist/src/server/services/fs-adapter.d.ts.map +1 -0
- package/dist/src/server/services/fs-adapter.js +13 -0
- package/dist/src/server/services/fs-adapter.js.map +1 -0
- package/dist/src/server/services/instruction-parser.d.ts +26 -0
- package/dist/src/server/services/instruction-parser.d.ts.map +1 -0
- package/dist/src/server/services/instruction-parser.js +121 -0
- package/dist/src/server/services/instruction-parser.js.map +1 -0
- package/dist/src/server/services/log-rotator.d.ts +20 -0
- package/dist/src/server/services/log-rotator.d.ts.map +1 -0
- package/dist/src/server/services/log-rotator.js +60 -0
- package/dist/src/server/services/log-rotator.js.map +1 -0
- package/dist/src/server/services/logger.d.ts +15 -0
- package/dist/src/server/services/logger.d.ts.map +1 -0
- package/dist/src/server/services/logger.js +69 -0
- package/dist/src/server/services/logger.js.map +1 -0
- package/dist/src/server/services/runner.d.ts +12 -0
- package/dist/src/server/services/runner.d.ts.map +1 -0
- package/dist/src/server/services/runner.js +161 -0
- package/dist/src/server/services/runner.js.map +1 -0
- package/dist/src/server/services/seeder.d.ts +5 -0
- package/dist/src/server/services/seeder.d.ts.map +1 -0
- package/dist/src/server/services/seeder.js +79 -0
- package/dist/src/server/services/seeder.js.map +1 -0
- package/dist/src/server/services/storage.d.ts +35 -0
- package/dist/src/server/services/storage.d.ts.map +1 -0
- package/dist/src/server/services/storage.js +219 -0
- package/dist/src/server/services/storage.js.map +1 -0
- package/dist/src/server/services/transcript-formatter.d.ts +18 -0
- package/dist/src/server/services/transcript-formatter.d.ts.map +1 -0
- package/dist/src/server/services/transcript-formatter.js +156 -0
- package/dist/src/server/services/transcript-formatter.js.map +1 -0
- package/dist/src/server/services/workspace.d.ts +11 -0
- package/dist/src/server/services/workspace.d.ts.map +1 -0
- package/dist/src/server/services/workspace.js +113 -0
- package/dist/src/server/services/workspace.js.map +1 -0
- package/dist/src/server/types/evaluation.d.ts +108 -0
- package/dist/src/server/types/evaluation.d.ts.map +1 -0
- package/dist/src/server/types/evaluation.js +5 -0
- package/dist/src/server/types/evaluation.js.map +1 -0
- package/dist/src/server/types/index.d.ts +5 -0
- package/dist/src/server/types/index.d.ts.map +1 -0
- package/dist/src/server/types/index.js +5 -0
- package/dist/src/server/types/index.js.map +1 -0
- package/dist/src/server/types/provider.d.ts +99 -0
- package/dist/src/server/types/provider.d.ts.map +1 -0
- package/dist/src/server/types/provider.js +5 -0
- package/dist/src/server/types/provider.js.map +1 -0
- package/dist/src/server/types/run.d.ts +31 -0
- package/dist/src/server/types/run.d.ts.map +1 -0
- package/dist/src/server/types/run.js +5 -0
- package/dist/src/server/types/run.js.map +1 -0
- package/dist/src/server/types/scenario.d.ts +32 -0
- package/dist/src/server/types/scenario.d.ts.map +1 -0
- package/dist/src/server/types/scenario.js +5 -0
- package/dist/src/server/types/scenario.js.map +1 -0
- package/dist/src/server/types/setup.d.ts +99 -0
- package/dist/src/server/types/setup.d.ts.map +1 -0
- package/dist/src/server/types/setup.js +5 -0
- package/dist/src/server/types/setup.js.map +1 -0
- package/dist/web/assets/index-C4dw8OpW.css +1 -0
- package/dist/web/assets/index-wve8IczO.js +76 -0
- package/dist/web/index.html +15 -0
- package/docs/schemas/provider-api.example.json +16 -0
- package/docs/schemas/provider-oauth.example.json +15 -0
- package/docs/schemas/provider.example.json +16 -0
- package/docs/schemas/scenario-baseline.example.json +35 -0
- package/docs/schemas/scenario-carwash-baseline.example.json +33 -0
- package/docs/schemas/scenario-carwash-with-claude-md.example.json +40 -0
- package/docs/schemas/scenario-golden-rules-baseline.example.json +51 -0
- package/docs/schemas/scenario-golden-rules-with-claude-md.example.json +61 -0
- package/docs/schemas/scenario-negative-analysis-baseline.example.json +34 -0
- package/docs/schemas/scenario-negative-analysis-with-claude-md.example.json +41 -0
- package/docs/schemas/scenario-with-claude-md.example.json +41 -0
- package/docs/schemas/scenario.example.json +33 -0
- package/package.json +92 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { IndividualEvaluation, InstructionCompliance } from '../types/evaluation.js';
|
|
2
|
+
export { parseSynthesisResponse, parseDebateResponse } from './eval-parsers-debate-impl.js';
|
|
3
|
+
export type { Verdict, DebateParseResult } from './eval-parsers-debate-impl.js';
|
|
4
|
+
export interface ScoreParseResult {
|
|
5
|
+
readonly scores: Readonly<Record<string, number>>;
|
|
6
|
+
readonly overallCloseness: number;
|
|
7
|
+
readonly missedCritical: readonly string[];
|
|
8
|
+
readonly strengths: readonly string[];
|
|
9
|
+
readonly weaknesses: readonly string[];
|
|
10
|
+
readonly summary: string;
|
|
11
|
+
}
|
|
12
|
+
export declare function parseScoreResponse(response: string): Partial<ScoreParseResult>;
|
|
13
|
+
export interface ComplianceParseResult {
|
|
14
|
+
readonly followed: readonly string[];
|
|
15
|
+
readonly violated: readonly string[];
|
|
16
|
+
readonly notApplicable: readonly string[];
|
|
17
|
+
readonly overallCompliance: number;
|
|
18
|
+
}
|
|
19
|
+
export declare function parseComplianceResponse(response: string): Partial<ComplianceParseResult>;
|
|
20
|
+
/** Convert parsed compliance into our InstructionCompliance type. */
|
|
21
|
+
export declare function toInstructionCompliance(result: Partial<ComplianceParseResult>): InstructionCompliance;
|
|
22
|
+
/** Convert score parse result into IndividualEvaluation entries. */
|
|
23
|
+
export declare function toIndividualEvaluations(scores: Readonly<Record<string, number>>, role: string, reasoningMap: Readonly<Record<string, string>>): IndividualEvaluation[];
|
|
24
|
+
//# sourceMappingURL=eval-parsers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-parsers.d.ts","sourceRoot":"","sources":["../../../src/server/services/eval-parsers.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,oBAAoB,EACpB,qBAAqB,EACtB,MAAM,wBAAwB,CAAC;AAGhC,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAC5F,YAAY,EAAE,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAMhF,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAClD,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,cAAc,EAAE,SAAS,MAAM,EAAE,CAAC;IAC3C,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IACvC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAa9E;AAMD,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;IACrC,QAAQ,CAAC,aAAa,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1C,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACpC;AAED,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAMxF;AAED,qEAAqE;AACrE,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,OAAO,CAAC,qBAAqB,CAAC,GACrC,qBAAqB,CAOvB;AAMD,oEAAoE;AACpE,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,EACxC,IAAI,EAAE,MAAM,EACZ,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GAC7C,oBAAoB,EAAE,CAOxB"}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Evaluation Response Parsers — extract structured data from LLM responses
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Re-export debate/synthesis parsers so existing imports continue to work
|
|
5
|
+
export { parseSynthesisResponse, parseDebateResponse } from './eval-parsers-debate-impl.js';
|
|
6
|
+
export function parseScoreResponse(response) {
|
|
7
|
+
const parsed = tryParseJson(response);
|
|
8
|
+
if (parsed) {
|
|
9
|
+
return {
|
|
10
|
+
scores: validScores(parsed.scores),
|
|
11
|
+
overallCloseness: clamp01(parsed.overallCloseness),
|
|
12
|
+
missedCritical: toStringArray(parsed.missedCritical),
|
|
13
|
+
strengths: toStringArray(parsed.strengths),
|
|
14
|
+
weaknesses: toStringArray(parsed.weaknesses),
|
|
15
|
+
summary: typeof parsed.summary === 'string' ? parsed.summary : undefined,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
return parseScoreFromText(response);
|
|
19
|
+
}
|
|
20
|
+
export function parseComplianceResponse(response) {
|
|
21
|
+
const parsed = tryParseJson(response);
|
|
22
|
+
if (parsed) {
|
|
23
|
+
return categorizeComplianceResults(parsed);
|
|
24
|
+
}
|
|
25
|
+
return parseComplianceFromText(response);
|
|
26
|
+
}
|
|
27
|
+
/** Convert parsed compliance into our InstructionCompliance type. */
|
|
28
|
+
export function toInstructionCompliance(result) {
|
|
29
|
+
return {
|
|
30
|
+
followed: result.followed ?? [],
|
|
31
|
+
violated: result.violated ?? [],
|
|
32
|
+
notApplicable: result.notApplicable ?? [],
|
|
33
|
+
overallCompliance: result.overallCompliance ?? 0,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Helpers
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
/** Convert score parse result into IndividualEvaluation entries. */
|
|
40
|
+
export function toIndividualEvaluations(scores, role, reasoningMap) {
|
|
41
|
+
return Object.entries(scores).map(([dimension, score]) => ({
|
|
42
|
+
evaluatorRole: role,
|
|
43
|
+
dimension,
|
|
44
|
+
score: clampScore(score),
|
|
45
|
+
reasoning: reasoningMap[dimension] ?? '',
|
|
46
|
+
}));
|
|
47
|
+
}
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// Internal: JSON parsing
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
function tryParseJson(text) {
|
|
52
|
+
try {
|
|
53
|
+
return JSON.parse(text);
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// fall through
|
|
57
|
+
}
|
|
58
|
+
const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
59
|
+
if (jsonMatch) {
|
|
60
|
+
try {
|
|
61
|
+
return JSON.parse(jsonMatch[1]);
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
// fall through
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
const braceMatch = text.match(/\{[\s\S]*\}/);
|
|
68
|
+
if (braceMatch) {
|
|
69
|
+
try {
|
|
70
|
+
return JSON.parse(braceMatch[0]);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
// fall through
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// Internal: Text fallback parsers
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
function parseScoreFromText(text) {
|
|
82
|
+
const scores = {};
|
|
83
|
+
const scorePattern = /(\w[\w\s]*?):\s*(\d+(?:\.\d+)?)\s*(?:\/\s*10)?/g;
|
|
84
|
+
let match;
|
|
85
|
+
while ((match = scorePattern.exec(text)) !== null) {
|
|
86
|
+
const dim = match[1].trim();
|
|
87
|
+
const val = parseFloat(match[2]);
|
|
88
|
+
if (!isNaN(val) && val <= 10)
|
|
89
|
+
scores[dim] = val;
|
|
90
|
+
}
|
|
91
|
+
return { scores: Object.keys(scores).length > 0 ? scores : undefined };
|
|
92
|
+
}
|
|
93
|
+
function parseComplianceFromText(text) {
|
|
94
|
+
const followed = [];
|
|
95
|
+
const violated = [];
|
|
96
|
+
if (/followed|compliant/i.test(text))
|
|
97
|
+
followed.push('(extracted from text)');
|
|
98
|
+
if (/violated|non-compliant/i.test(text))
|
|
99
|
+
violated.push('(extracted from text)');
|
|
100
|
+
return { followed, violated, notApplicable: [], overallCompliance: undefined };
|
|
101
|
+
}
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
// Internal: Utilities
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
function validScores(scores) {
|
|
106
|
+
if (!scores || typeof scores !== 'object')
|
|
107
|
+
return {};
|
|
108
|
+
const result = {};
|
|
109
|
+
for (const [k, v] of Object.entries(scores)) {
|
|
110
|
+
if (typeof v === 'number' && !isNaN(v))
|
|
111
|
+
result[k] = clampScore(v);
|
|
112
|
+
}
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
function clamp01(val) {
|
|
116
|
+
if (val === undefined || isNaN(val))
|
|
117
|
+
return 0;
|
|
118
|
+
return Math.max(0, Math.min(1, val));
|
|
119
|
+
}
|
|
120
|
+
function clampScore(val) {
|
|
121
|
+
if (val === undefined || isNaN(val))
|
|
122
|
+
return 0;
|
|
123
|
+
return Math.max(0, Math.min(10, val));
|
|
124
|
+
}
|
|
125
|
+
function toStringArray(arr) {
|
|
126
|
+
if (!Array.isArray(arr))
|
|
127
|
+
return [];
|
|
128
|
+
return arr.filter((x) => typeof x === 'string');
|
|
129
|
+
}
|
|
130
|
+
function categorizeComplianceResults(parsed) {
|
|
131
|
+
const followed = [];
|
|
132
|
+
const violated = [];
|
|
133
|
+
const notApplicable = [];
|
|
134
|
+
if (Array.isArray(parsed.results)) {
|
|
135
|
+
for (const r of parsed.results) {
|
|
136
|
+
const text = r.instruction ?? '(unknown)';
|
|
137
|
+
const status = (r.status ?? '').toLowerCase();
|
|
138
|
+
if (status === 'followed')
|
|
139
|
+
followed.push(text);
|
|
140
|
+
else if (status === 'violated')
|
|
141
|
+
violated.push(text);
|
|
142
|
+
else
|
|
143
|
+
notApplicable.push(text);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return {
|
|
147
|
+
followed,
|
|
148
|
+
violated,
|
|
149
|
+
notApplicable,
|
|
150
|
+
overallCompliance: clamp01(parsed.overallCompliance),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
//# sourceMappingURL=eval-parsers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-parsers.js","sourceRoot":"","sources":["../../../src/server/services/eval-parsers.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,2EAA2E;AAC3E,8EAA8E;AAO9E,0EAA0E;AAC1E,OAAO,EAAE,sBAAsB,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AAgB5F,MAAM,UAAU,kBAAkB,CAAC,QAAgB;IACjD,MAAM,MAAM,GAAG,YAAY,CAAmB,QAAQ,CAAC,CAAC;IACxD,IAAI,MAAM,EAAE,CAAC;QACX,OAAO;YACL,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC;YAClC,gBAAgB,EAAE,OAAO,CAAC,MAAM,CAAC,gBAAgB,CAAC;YAClD,cAAc,EAAE,aAAa,CAAC,MAAM,CAAC,cAAc,CAAC;YACpD,SAAS,EAAE,aAAa,CAAC,MAAM,CAAC,SAAS,CAAC;YAC1C,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,UAAU,CAAC;YAC5C,OAAO,EAAE,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;SACzE,CAAC;IACJ,CAAC;IACD,OAAO,kBAAkB,CAAC,QAAQ,CAAC,CAAC;AACtC,CAAC;AAaD,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,MAAM,MAAM,GAAG,YAAY,CAAwB,QAAQ,CAAC,CAAC;IAC7D,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,2BAA2B,CAAC,MAAM,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,uBAAuB,CAAC,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED,qEAAqE;AACrE,MAAM,UAAU,uBAAuB,CACrC,MAAsC;IAEtC,OAAO;QACL,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;QAC/B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,EAAE;QAC/B,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;QACzC,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,CAAC;KACjD,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,oEAAoE;AACpE,MAAM,UAAU,uBAAuB,CACrC,MAAwC,EACxC,IAAY,EACZ,YAA8C;IAE9C,OAAO,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACzD,aAAa,EAAE,IAAI;QACnB,SAAS;QACT,KAAK,EAAE,UAAU,CAAC,KAAK,CAAC;QACxB,SAAS,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE;KACzC,CAAC,CAAC,CAAC;AACN,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,SAAS,YAAY,CAAI,IAAY;IACnC,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAM,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,eAAe;IACjB,CAAC;IACD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAC7D,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAM,CAAC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC7C,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAM,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAS,kBAAkB,CAAC,IAAY;IACtC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,MAAM,YAAY,GAAG,iDAAiD,CAAC;IACvE,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAClD,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,EAAE;YAAE,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;AACzE,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAY;IAC3C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,QAAQ,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAC7E,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,QAAQ,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IACjF,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,aAAa,EAAE,EAAE,EAAE,iBAAiB,EAAE,SAAS,EAAE,CAAC;AACjF,CAAC;AAoBD,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,SAAS,WAAW,CAClB,MAA0C;IAE1C,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,EAAE,CAAC;IACrD,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5C,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;YAAE,MAAM,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,OAAO,CAAC,GAAuB;IACtC,IAAI,GAAG,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,UAAU,CAAC,GAAuB;IACzC,IAAI,GAAG,KAAK,SAAS,IAAI,KAAK,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC9C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;AACxC,CAAC;AAED,SAAS,aAAa,CAAC,GAAY;IACjC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IACnC,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC;AAC/D,CAAC;AAED,SAAS,2BAA2B,CAClC,MAA6B;IAE7B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,IAAI,WAAW,CAAC;YAC1C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YAC9C,IAAI,MAAM,KAAK,UAAU;gBAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC1C,IAAI,MAAM,KAAK,UAAU;gBAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;;gBAC/C,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,OAAO;QACL,QAAQ;QACR,QAAQ;QACR,aAAa;QACb,iBAAiB,EAAE,OAAO,CAAC,MAAM,CAAC,iBAAiB,CAAC;KACrD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Scenario, Provider } from '../types/index.js';
|
|
2
|
+
import type { IndividualEvaluation } from '../types/evaluation.js';
|
|
3
|
+
import type { TranscriptSummary } from './transcript-formatter.js';
|
|
4
|
+
import type { InstructionBlock } from './instruction-parser.js';
|
|
5
|
+
export declare function buildScorePrompt(transcript: string, scenario: Scenario, summary: TranscriptSummary): string;
|
|
6
|
+
export declare function buildCompliancePrompt(transcript: string, scenario: Scenario, instructions: readonly InstructionBlock[]): string;
|
|
7
|
+
export declare function buildDebatePrompt(myPreviousAssessment: string, otherAssessments: readonly string[], roundNumber: number): string;
|
|
8
|
+
export declare function buildSynthesisPrompt(allEvaluations: readonly IndividualEvaluation[], scenario: Scenario, provider: Provider): string;
|
|
9
|
+
//# sourceMappingURL=eval-prompts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-prompts.d.ts","sourceRoot":"","sources":["../../../src/server/services/eval-prompts.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAoB,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC9E,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AACnE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAMhE,wBAAgB,gBAAgB,CAC9B,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,iBAAiB,GACzB,MAAM,CA+CR;AAMD,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,QAAQ,EAClB,YAAY,EAAE,SAAS,gBAAgB,EAAE,GACxC,MAAM,CAmCR;AAMD,wBAAgB,iBAAiB,CAC/B,oBAAoB,EAAE,MAAM,EAC5B,gBAAgB,EAAE,SAAS,MAAM,EAAE,EACnC,WAAW,EAAE,MAAM,GAClB,MAAM,CAyBR;AAMD,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,SAAS,oBAAoB,EAAE,EAC/C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,GACjB,MAAM,CAkCR"}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Evaluation Prompt Builders
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// Query 1: Score + answer comparison prompt
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
export function buildScorePrompt(transcript, scenario, summary) {
|
|
8
|
+
const dimensions = formatDimensions(scenario.scoringDimensions);
|
|
9
|
+
const toolSequence = summary.toolCallSequence.length > 0
|
|
10
|
+
? `Tool call sequence: ${summary.toolCallSequence.join(' → ')}`
|
|
11
|
+
: 'No tool calls recorded.';
|
|
12
|
+
return `You are an expert evaluator assessing an AI agent's performance on a task.
|
|
13
|
+
|
|
14
|
+
## Task Description
|
|
15
|
+
${scenario.prompt}
|
|
16
|
+
|
|
17
|
+
## Expected Answer
|
|
18
|
+
${scenario.expectedAnswer}
|
|
19
|
+
|
|
20
|
+
## Critical Requirements
|
|
21
|
+
${formatCriticalRequirements(scenario.criticalRequirements)}
|
|
22
|
+
|
|
23
|
+
## Grading Guidelines
|
|
24
|
+
${scenario.gradingGuidelines || 'No specific grading guidelines provided.'}
|
|
25
|
+
|
|
26
|
+
## Scoring Dimensions
|
|
27
|
+
${dimensions}
|
|
28
|
+
|
|
29
|
+
## Agent Behavior Summary
|
|
30
|
+
${toolSequence}
|
|
31
|
+
Files read: ${summary.filesRead.length > 0 ? summary.filesRead.join(', ') : 'none'}
|
|
32
|
+
Files modified: ${summary.filesModified.length > 0 ? summary.filesModified.join(', ') : 'none'}
|
|
33
|
+
Command failures: ${summary.commandFailures.length}
|
|
34
|
+
Asked clarifying questions: ${summary.askedClarifyingQuestions ? 'yes' : 'no'}
|
|
35
|
+
|
|
36
|
+
## Full Transcript
|
|
37
|
+
${transcript}
|
|
38
|
+
|
|
39
|
+
## Instructions
|
|
40
|
+
Evaluate the agent's output. For each scoring dimension, provide a score from 0-10.
|
|
41
|
+
Also assess how closely the agent's final answer matches the expected answer (0.0-1.0).
|
|
42
|
+
Identify any critical requirements that were missed, as well as strengths and weaknesses.
|
|
43
|
+
|
|
44
|
+
Respond with valid JSON matching this structure:
|
|
45
|
+
{
|
|
46
|
+
"scores": { "<dimension_name>": <0-10>, ... },
|
|
47
|
+
"overallCloseness": <0.0-1.0>,
|
|
48
|
+
"missedCritical": ["<requirement that was not met>", ...],
|
|
49
|
+
"strengths": ["<strength>", ...],
|
|
50
|
+
"weaknesses": ["<weakness>", ...],
|
|
51
|
+
"summary": "<brief overall assessment>"
|
|
52
|
+
}`;
|
|
53
|
+
}
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Query 2: Instruction compliance prompt
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
export function buildCompliancePrompt(transcript, scenario, instructions) {
|
|
58
|
+
const instructionList = instructions
|
|
59
|
+
.map((b, i) => `${i + 1}. [${b.source}] ${b.text}`)
|
|
60
|
+
.join('\n');
|
|
61
|
+
const skillNames = scenario.skills.map((s) => s.name);
|
|
62
|
+
const subagentNames = scenario.subagents.map((s) => s.name);
|
|
63
|
+
return `You are an expert evaluator checking whether an AI agent followed its configured instructions.
|
|
64
|
+
|
|
65
|
+
## Agent Configuration
|
|
66
|
+
Skills: ${skillNames.length > 0 ? skillNames.join(', ') : 'none'}
|
|
67
|
+
Subagents: ${subagentNames.length > 0 ? subagentNames.join(', ') : 'none'}
|
|
68
|
+
|
|
69
|
+
## Instructions to Check
|
|
70
|
+
${instructionList || 'No instructions configured.'}
|
|
71
|
+
|
|
72
|
+
## Agent Transcript
|
|
73
|
+
${transcript}
|
|
74
|
+
|
|
75
|
+
## Instructions
|
|
76
|
+
For each instruction listed above, determine if the agent:
|
|
77
|
+
- "followed" it (clear evidence of compliance)
|
|
78
|
+
- "violated" it (clear evidence of non-compliance)
|
|
79
|
+
- "not_applicable" (instruction was not relevant to this task)
|
|
80
|
+
|
|
81
|
+
Also rate overall compliance from 0.0 to 1.0.
|
|
82
|
+
|
|
83
|
+
Respond with valid JSON:
|
|
84
|
+
{
|
|
85
|
+
"results": [
|
|
86
|
+
{ "instruction": "<instruction text>", "status": "followed|violated|not_applicable", "evidence": "<brief evidence>" }
|
|
87
|
+
],
|
|
88
|
+
"overallCompliance": <0.0-1.0>
|
|
89
|
+
}`;
|
|
90
|
+
}
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// Multi-round debate prompt
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
export function buildDebatePrompt(myPreviousAssessment, otherAssessments, roundNumber) {
|
|
95
|
+
const othersFormatted = otherAssessments
|
|
96
|
+
.map((a, i) => `### Other Evaluator ${i + 1}\n${a}`)
|
|
97
|
+
.join('\n\n');
|
|
98
|
+
return `You are participating in round ${roundNumber} of a multi-evaluator review.
|
|
99
|
+
|
|
100
|
+
## Your Previous Assessment
|
|
101
|
+
${myPreviousAssessment}
|
|
102
|
+
|
|
103
|
+
## Other Evaluators' Assessments
|
|
104
|
+
${othersFormatted}
|
|
105
|
+
|
|
106
|
+
## Instructions
|
|
107
|
+
Review the other evaluators' assessments and compare them with your own.
|
|
108
|
+
Determine if you agree, partially agree, or disagree with the emerging consensus.
|
|
109
|
+
Provide your updated scores if you have changed your mind, and explain why.
|
|
110
|
+
|
|
111
|
+
Respond with valid JSON:
|
|
112
|
+
{
|
|
113
|
+
"verdict": "AGREE|DISAGREE|PARTIAL",
|
|
114
|
+
"updatedScores": { "<dimension_name>": <0-10>, ... },
|
|
115
|
+
"critiques": ["<specific point of disagreement>", ...],
|
|
116
|
+
"reasoning": "<why you agree/disagree>"
|
|
117
|
+
}`;
|
|
118
|
+
}
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
// Synthesis prompt (final aggregation)
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
export function buildSynthesisPrompt(allEvaluations, scenario, provider) {
|
|
123
|
+
const evalSummaries = allEvaluations
|
|
124
|
+
.map((e) => `[${e.evaluatorRole}] ${e.dimension}: ${e.score}/10 — ${e.reasoning}`)
|
|
125
|
+
.join('\n');
|
|
126
|
+
const dimensions = formatDimensions(scenario.scoringDimensions);
|
|
127
|
+
return `You are the final synthesizer for a multi-evaluator assessment.
|
|
128
|
+
|
|
129
|
+
## Scenario
|
|
130
|
+
${scenario.name}: ${scenario.prompt}
|
|
131
|
+
|
|
132
|
+
## Scoring Dimensions & Weights
|
|
133
|
+
${dimensions}
|
|
134
|
+
|
|
135
|
+
## Provider
|
|
136
|
+
Name: ${provider.name}
|
|
137
|
+
Model: ${provider.provider.model}
|
|
138
|
+
|
|
139
|
+
## All Individual Evaluations
|
|
140
|
+
${evalSummaries}
|
|
141
|
+
|
|
142
|
+
## Instructions
|
|
143
|
+
Synthesize all evaluations into final scores. Weight each dimension according to the scoring
|
|
144
|
+
dimensions defined above. Identify areas of evaluator consensus and disagreement.
|
|
145
|
+
Provide a confidence level (0.0-1.0) based on evaluator agreement.
|
|
146
|
+
|
|
147
|
+
Respond with valid JSON:
|
|
148
|
+
{
|
|
149
|
+
"dimensionScores": { "<dimension_name>": <0-10>, ... },
|
|
150
|
+
"weightedTotal": <weighted average 0-10>,
|
|
151
|
+
"confidence": <0.0-1.0>,
|
|
152
|
+
"dissenting": ["<areas where evaluators disagreed>", ...]
|
|
153
|
+
}`;
|
|
154
|
+
}
|
|
155
|
+
// ---------------------------------------------------------------------------
|
|
156
|
+
// Internal helpers
|
|
157
|
+
// ---------------------------------------------------------------------------
|
|
158
|
+
function formatDimensions(dims) {
|
|
159
|
+
if (dims.length === 0)
|
|
160
|
+
return 'No dimensions defined. Use general quality assessment.';
|
|
161
|
+
return dims
|
|
162
|
+
.map((d) => `- ${d.name} (weight: ${d.weight}): ${d.description}`)
|
|
163
|
+
.join('\n');
|
|
164
|
+
}
|
|
165
|
+
function formatCriticalRequirements(reqs) {
|
|
166
|
+
if (reqs.length === 0)
|
|
167
|
+
return 'None specified.';
|
|
168
|
+
return reqs.map((r, i) => `${i + 1}. ${r}`).join('\n');
|
|
169
|
+
}
|
|
170
|
+
//# sourceMappingURL=eval-prompts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-prompts.js","sourceRoot":"","sources":["../../../src/server/services/eval-prompts.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6BAA6B;AAC7B,8EAA8E;AAO9E,8EAA8E;AAC9E,4CAA4C;AAC5C,8EAA8E;AAE9E,MAAM,UAAU,gBAAgB,CAC9B,UAAkB,EAClB,QAAkB,EAClB,OAA0B;IAE1B,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;IAChE,MAAM,YAAY,GAAG,OAAO,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;QACtD,CAAC,CAAC,uBAAuB,OAAO,CAAC,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE;QAC/D,CAAC,CAAC,yBAAyB,CAAC;IAE9B,OAAO;;;EAGP,QAAQ,CAAC,MAAM;;;EAGf,QAAQ,CAAC,cAAc;;;EAGvB,0BAA0B,CAAC,QAAQ,CAAC,oBAAoB,CAAC;;;EAGzD,QAAQ,CAAC,iBAAiB,IAAI,0CAA0C;;;EAGxE,UAAU;;;EAGV,YAAY;cACA,OAAO,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;kBAChE,OAAO,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;oBAC1E,OAAO,CAAC,eAAe,CAAC,MAAM;8BACpB,OAAO,CAAC,wBAAwB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI;;;EAG3E,UAAU;;;;;;;;;;;;;;;EAeV,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,yCAAyC;AACzC,8EAA8E;AAE9E,MAAM,UAAU,qBAAqB,CACnC,UAAkB,EAClB,QAAkB,EAClB,YAAyC;IAEzC,MAAM,eAAe,GAAG,YAAY;SACjC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;SAClD,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACtD,MAAM,aAAa,GAAG,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAE5D,OAAO;;;UAGC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;aACnD,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;;;EAGvE,eAAe,IAAI,6BAA6B;;;EAGhD,UAAU;;;;;;;;;;;;;;;;EAgBV,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,MAAM,UAAU,iBAAiB,CAC/B,oBAA4B,EAC5B,gBAAmC,EACnC,WAAmB;IAEnB,MAAM,eAAe,GAAG,gBAAgB;SACrC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,uBAAuB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;SACnD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO,kCAAkC,WAAW;;;EAGpD,oBAAoB;;;EAGpB,eAAe;;;;;;;;;;;;;EAaf,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,UAAU,oBAAoB,CAClC,cAA+C,EAC/C,QAAkB,EAClB,QAAkB;IAElB,MAAM,aAAa,GAAG,cAAc;SACjC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,aAAa,KAAK,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC,SAAS,EAAE,CAAC;SACjF,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;IAEhE,OAAO;;;EAGP,QAAQ,CAAC,IAAI,KAAK,QAAQ,CAAC,MAAM;;;EAGjC,UAAU;;;QAGJ,QAAQ,CAAC,IAAI;SACZ,QAAQ,CAAC,QAAQ,CAAC,KAAK;;;EAG9B,aAAa;;;;;;;;;;;;;EAab,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,SAAS,gBAAgB,CAAC,IAAiC;IACzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,wDAAwD,CAAC;IACvF,OAAO,IAAI;SACR,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;SACjE,IAAI,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAuB;IACzD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,iBAAiB,CAAC;IAChD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzD,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { IEvaluator, EvaluationCallbacks } from '../interfaces/evaluator.js';
|
|
2
|
+
import type { Run, Scenario, Provider, Evaluation, EvaluationRequest } from '../types/index.js';
|
|
3
|
+
export declare class EvaluationOrchestrator implements IEvaluator {
|
|
4
|
+
evaluateRun(run: Run, scenario: Scenario, provider: Provider, request: EvaluationRequest, callbacks: EvaluationCallbacks): Promise<Evaluation>;
|
|
5
|
+
private runRound1;
|
|
6
|
+
private runDebateRound;
|
|
7
|
+
private runSynthesis;
|
|
8
|
+
private runQuery;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../../src/server/services/evaluator.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,UAAU,EAAE,mBAAmB,EAAmB,MAAM,4BAA4B,CAAC;AACnG,OAAO,KAAK,EACV,GAAG,EACH,QAAQ,EACR,QAAQ,EACR,UAAU,EACV,iBAAiB,EAMlB,MAAM,mBAAmB,CAAC;AAyC3B,qBAAa,sBAAuB,YAAW,UAAU;IACjD,WAAW,CACf,GAAG,EAAE,GAAG,EACR,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,iBAAiB,EAC1B,SAAS,EAAE,mBAAmB,GAC7B,OAAO,CAAC,UAAU,CAAC;YAyER,SAAS;YA6BT,cAAc;YAyBd,YAAY;YAiBZ,QAAQ;CA4CvB"}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Evaluator — orchestrates evaluation pipeline via SDK query()
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
5
|
+
import { buildRunEnv } from './env-builder.js';
|
|
6
|
+
import { formatTranscript } from './transcript-formatter.js';
|
|
7
|
+
import { parseAllInstructions } from './instruction-parser.js';
|
|
8
|
+
import { buildScorePrompt, buildCompliancePrompt, buildDebatePrompt, buildSynthesisPrompt, } from './eval-prompts.js';
|
|
9
|
+
import { parseScoreResponse, parseComplianceResponse, parseSynthesisResponse, parseDebateResponse, toIndividualEvaluations, } from './eval-parsers.js';
|
|
10
|
+
import { checkConsensus, buildAnswerComparison, buildCriticalResults, mergeCompliance, } from './eval-helpers.js';
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Implementation
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
export class EvaluationOrchestrator {
|
|
15
|
+
async evaluateRun(run, scenario, provider, request, callbacks) {
|
|
16
|
+
callbacks.onStatusChange('running');
|
|
17
|
+
callbacks.onProgress('preparing', 'Formatting transcript and parsing instructions...');
|
|
18
|
+
const { text: transcript, summary } = formatTranscript(run.messages);
|
|
19
|
+
const instructions = parseAllInstructions([
|
|
20
|
+
...scenario.claudeMdFiles.map((c) => ({ content: c.content, source: `CLAUDE.md (${c.role})` })),
|
|
21
|
+
...scenario.rules.map((r) => ({ content: r.content, source: `rule:${r.name}` })),
|
|
22
|
+
]);
|
|
23
|
+
const accumulators = request.evaluators.map((e) => ({
|
|
24
|
+
role: e.role, costUsd: 0, tokensIn: 0, tokensOut: 0, rounds: 0,
|
|
25
|
+
scoreResult: {}, complianceResult: {}, assessmentText: '',
|
|
26
|
+
}));
|
|
27
|
+
// ── Round 1 ──────────────────────────────────────────────────────
|
|
28
|
+
callbacks.onProgress('scoring', `Running score and compliance queries (${request.evaluators.length} evaluator${request.evaluators.length > 1 ? 's' : ''})...`);
|
|
29
|
+
const round1Evals = await this.runRound1(request.evaluators, accumulators, transcript, scenario, provider, instructions, summary, callbacks);
|
|
30
|
+
const round1Consensus = request.maxRounds <= 1 || checkConsensus(accumulators);
|
|
31
|
+
const rounds = [{
|
|
32
|
+
roundNumber: 1, evaluations: round1Evals,
|
|
33
|
+
consensusReached: round1Consensus, timestamp: new Date().toISOString(),
|
|
34
|
+
}];
|
|
35
|
+
// ── Multi-round debate ───────────────────────────────────────────
|
|
36
|
+
if (request.maxRounds > 1 && !round1Consensus) {
|
|
37
|
+
for (let roundNum = 2; roundNum <= request.maxRounds; roundNum++) {
|
|
38
|
+
callbacks.onProgress('debate', `Debate round ${roundNum} of ${request.maxRounds}...`);
|
|
39
|
+
const debateEvals = await this.runDebateRound(request.evaluators, accumulators, roundNum, callbacks);
|
|
40
|
+
const consensus = checkConsensus(accumulators);
|
|
41
|
+
rounds.push({
|
|
42
|
+
roundNumber: roundNum, evaluations: debateEvals,
|
|
43
|
+
consensusReached: consensus, timestamp: new Date().toISOString(),
|
|
44
|
+
});
|
|
45
|
+
if (consensus)
|
|
46
|
+
break;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// ── Synthesis ────────────────────────────────────────────────────
|
|
50
|
+
callbacks.onProgress('synthesis', 'Synthesizing final scores and confidence...');
|
|
51
|
+
const latestRound = rounds[rounds.length - 1];
|
|
52
|
+
const latestEvals = latestRound.evaluations;
|
|
53
|
+
const synthesizer = request.evaluators[request.evaluators.length - 1];
|
|
54
|
+
const synthInfo = { phase: 'synthesis', evaluatorRole: synthesizer.role, roundNumber: rounds.length };
|
|
55
|
+
const synthResult = await this.runSynthesis(synthesizer, latestEvals, scenario, provider, accumulators, callbacks, synthInfo);
|
|
56
|
+
// ── Assemble final evaluation ────────────────────────────────────
|
|
57
|
+
const ledger = accumulators.map((a) => ({
|
|
58
|
+
evaluatorRole: a.role, totalCostUsd: a.costUsd,
|
|
59
|
+
totalTokensIn: a.tokensIn, totalTokensOut: a.tokensOut, roundsParticipated: a.rounds,
|
|
60
|
+
}));
|
|
61
|
+
const now = new Date().toISOString();
|
|
62
|
+
callbacks.onProgress('complete', 'Evaluation finished.');
|
|
63
|
+
callbacks.onStatusChange('completed');
|
|
64
|
+
return {
|
|
65
|
+
id: '', runId: run.id, status: 'completed', evaluators: request.evaluators, rounds,
|
|
66
|
+
answerComparison: buildAnswerComparison(accumulators),
|
|
67
|
+
criticalResults: buildCriticalResults(accumulators, scenario),
|
|
68
|
+
setupCompliance: { instructionCompliance: mergeCompliance(accumulators), skillUsage: [], subagentUsage: [] },
|
|
69
|
+
synthesis: {
|
|
70
|
+
dimensionScores: synthResult.dimensionScores ?? {}, weightedTotal: synthResult.weightedTotal ?? 0,
|
|
71
|
+
confidence: synthResult.confidence ?? 0, dissenting: synthResult.dissenting ?? [],
|
|
72
|
+
},
|
|
73
|
+
ledger, totalCostUsd: ledger.reduce((sum, l) => sum + l.totalCostUsd, 0),
|
|
74
|
+
createdAt: now, updatedAt: now,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
// ─── Round 1: Score + Compliance ─────────────────────────────────────
|
|
78
|
+
async runRound1(evaluators, accumulators, transcript, scenario, provider, instructions, summary, callbacks) {
|
|
79
|
+
const promises = evaluators.map(async (evaluator, idx) => {
|
|
80
|
+
const acc = accumulators[idx];
|
|
81
|
+
acc.rounds++;
|
|
82
|
+
const scoreInfo = { phase: 'score', evaluatorRole: evaluator.role, roundNumber: 1 };
|
|
83
|
+
const scoreResp = await this.runQuery(evaluator, buildScorePrompt(transcript, scenario, summary), callbacks, scoreInfo);
|
|
84
|
+
acc.costUsd += scoreResp.costUsd;
|
|
85
|
+
acc.scoreResult = parseScoreResponse(scoreResp.text);
|
|
86
|
+
acc.assessmentText = scoreResp.text;
|
|
87
|
+
const compInfo = { phase: 'compliance', evaluatorRole: evaluator.role, roundNumber: 1 };
|
|
88
|
+
const compResp = await this.runQuery(evaluator, buildCompliancePrompt(transcript, scenario, instructions), callbacks, compInfo);
|
|
89
|
+
acc.costUsd += compResp.costUsd;
|
|
90
|
+
acc.complianceResult = parseComplianceResponse(compResp.text);
|
|
91
|
+
return toIndividualEvaluations(acc.scoreResult.scores ?? {}, evaluator.role, {});
|
|
92
|
+
});
|
|
93
|
+
return (await Promise.all(promises)).flat();
|
|
94
|
+
}
|
|
95
|
+
// ─── Multi-round debate ──────────────────────────────────────────────
|
|
96
|
+
async runDebateRound(evaluators, accumulators, roundNumber, callbacks) {
|
|
97
|
+
const promises = evaluators.map(async (evaluator, idx) => {
|
|
98
|
+
const acc = accumulators[idx];
|
|
99
|
+
acc.rounds++;
|
|
100
|
+
const others = accumulators.filter((_, i) => i !== idx).map((a) => a.assessmentText);
|
|
101
|
+
const info = { phase: 'debate', evaluatorRole: evaluator.role, roundNumber };
|
|
102
|
+
const resp = await this.runQuery(evaluator, buildDebatePrompt(acc.assessmentText, others, roundNumber), callbacks, info);
|
|
103
|
+
acc.costUsd += resp.costUsd;
|
|
104
|
+
const result = parseDebateResponse(resp.text);
|
|
105
|
+
if (result.updatedScores && Object.keys(result.updatedScores).length > 0) {
|
|
106
|
+
acc.scoreResult = { ...acc.scoreResult, scores: result.updatedScores };
|
|
107
|
+
}
|
|
108
|
+
acc.assessmentText = resp.text;
|
|
109
|
+
return toIndividualEvaluations(acc.scoreResult.scores ?? {}, evaluator.role, {});
|
|
110
|
+
});
|
|
111
|
+
return (await Promise.all(promises)).flat();
|
|
112
|
+
}
|
|
113
|
+
// ─── Synthesis ───────────────────────────────────────────────────────
|
|
114
|
+
async runSynthesis(synthesizer, allEvals, scenario, provider, accumulators, callbacks, info) {
|
|
115
|
+
const resp = await this.runQuery(synthesizer, buildSynthesisPrompt(allEvals, scenario, provider), callbacks, info);
|
|
116
|
+
const acc = accumulators.find((a) => a.role === synthesizer.role);
|
|
117
|
+
if (acc)
|
|
118
|
+
acc.costUsd += resp.costUsd;
|
|
119
|
+
return parseSynthesisResponse(resp.text);
|
|
120
|
+
}
|
|
121
|
+
// ─── SDK query wrapper ───────────────────────────────────────────────
|
|
122
|
+
async runQuery(evaluator, prompt, callbacks, info) {
|
|
123
|
+
const q = query({
|
|
124
|
+
prompt,
|
|
125
|
+
options: {
|
|
126
|
+
env: buildRunEnv(evaluator.provider),
|
|
127
|
+
model: evaluator.provider.model,
|
|
128
|
+
tools: [],
|
|
129
|
+
maxTurns: 3,
|
|
130
|
+
permissionMode: 'dontAsk',
|
|
131
|
+
persistSession: false,
|
|
132
|
+
},
|
|
133
|
+
});
|
|
134
|
+
let resultText = '';
|
|
135
|
+
let costUsd = 0;
|
|
136
|
+
for await (const msg of q) {
|
|
137
|
+
const record = msg;
|
|
138
|
+
const sdkRecord = {
|
|
139
|
+
timestamp: new Date().toISOString(),
|
|
140
|
+
message: record,
|
|
141
|
+
};
|
|
142
|
+
callbacks.onMessage(info, sdkRecord);
|
|
143
|
+
if (record['type'] === 'result') {
|
|
144
|
+
const resultMsg = record;
|
|
145
|
+
costUsd = resultMsg.total_cost_usd ?? 0;
|
|
146
|
+
if (resultMsg.subtype !== 'success') {
|
|
147
|
+
const errorDetail = resultMsg.result ?? 'unknown error';
|
|
148
|
+
throw new Error(`SDK query failed (subtype: ${resultMsg.subtype}): ${errorDetail}`);
|
|
149
|
+
}
|
|
150
|
+
resultText = resultMsg.result ?? '';
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return { text: resultText, costUsd };
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
//# sourceMappingURL=evaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../../src/server/services/evaluator.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+DAA+D;AAC/D,8EAA8E;AAE9E,OAAO,EAAE,KAAK,EAAE,MAAM,gCAAgC,CAAC;AAcvD,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAC/D,OAAO,EACL,gBAAgB,EAChB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,cAAc,EACd,qBAAqB,EACrB,oBAAoB,EACpB,eAAe,GAChB,MAAM,mBAAmB,CAAC;AAe3B,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E,MAAM,OAAO,sBAAsB;IACjC,KAAK,CAAC,WAAW,CACf,GAAQ,EACR,QAAkB,EAClB,QAAkB,EAClB,OAA0B,EAC1B,SAA8B;QAE9B,SAAS,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;QACpC,SAAS,CAAC,UAAU,CAAC,WAAW,EAAE,mDAAmD,CAAC,CAAC;QAEvF,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACrE,MAAM,YAAY,GAAG,oBAAoB,CAAC;YACxC,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC;YAC/F,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;SACjF,CAAC,CAAC;QAEH,MAAM,YAAY,GAA2B,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1E,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;YAC9D,WAAW,EAAE,EAAE,EAAE,gBAAgB,EAAE,EAAE,EAAE,cAAc,EAAE,EAAE;SAC1D,CAAC,CAAC,CAAC;QAEJ,oEAAoE;QACpE,SAAS,CAAC,UAAU,CAAC,SAAS,EAAE,yCAAyC,OAAO,CAAC,UAAU,CAAC,MAAM,aAAa,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QAC/J,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,SAAS,CACtC,OAAO,CAAC,UAAU,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,SAAS,CACnG,CAAC;QACF,MAAM,eAAe,GAAG,OAAO,CAAC,SAAS,IAAI,CAAC,IAAI,cAAc,CAAC,YAAY,CAAC,CAAC;QAC/E,MAAM,MAAM,GAAsB,CAAC;gBACjC,WAAW,EAAE,CAAC,EAAE,WAAW,EAAE,WAAW;gBACxC,gBAAgB,EAAE,eAAe,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACvE,CAAC,CAAC;QAEH,oEAAoE;QACpE,IAAI,OAAO,CAAC,SAAS,GAAG,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC9C,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,IAAI,OAAO,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,CAAC;gBACjE,SAAS,CAAC,UAAU,CAAC,QAAQ,EAAE,gBAAgB,QAAQ,OAAO,OAAO,CAAC,SAAS,KAAK,CAAC,CAAC;gBACtF,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;gBACrG,MAAM,SAAS,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;gBAC/C,MAAM,CAAC,IAAI,CAAC;oBACV,WAAW,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW;oBAC/C,gBAAgB,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACjE,CAAC,CAAC;gBACH,IAAI,SAAS;oBAAE,MAAM;YACvB,CAAC;QACH,CAAC;QAED,oEAAoE;QACpE,SAAS,CAAC,UAAU,CAAC,WAAW,EAAE,6CAA6C,CAAC,CAAC;QACjF,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9C,MAAM,WAAW,GAAG,WAAW,CAAC,WAAW,CAAC;QAC5C,MAAM,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACtE,MAAM,SAAS,GAAoB,EAAE,KAAK,EAAE,WAAW,EAAE,aAAa,EAAE,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;QACvH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QAE9H,oEAAoE;QACpE,MAAM,MAAM,GAAsB,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACzD,aAAa,EAAE,CAAC,CAAC,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC,OAAO;YAC9C,aAAa,EAAE,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC,MAAM;SACrF,CAAC,CAAC,CAAC;QACJ,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,SAAS,CAAC,UAAU,CAAC,UAAU,EAAE,sBAAsB,CAAC,CAAC;QACzD,SAAS,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;QAEtC,OAAO;YACL,EAAE,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,OAAO,CAAC,UAAU,EAAE,MAAM;YAClF,gBAAgB,EAAE,qBAAqB,CAAC,YAAY,CAAC;YACrD,eAAe,EAAE,oBAAoB,CAAC,YAAY,EAAE,QAAQ,CAAC;YAC7D,eAAe,EAAE,EAAE,qBAAqB,EAAE,eAAe,CAAC,YAAY,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE;YAC5G,SAAS,EAAE;gBACT,eAAe,EAAE,WAAW,CAAC,eAAe,IAAI,EAAE,EAAE,aAAa,EAAE,WAAW,CAAC,aAAa,IAAI,CAAC;gBACjG,UAAU,EAAE,WAAW,CAAC,UAAU,IAAI,CAAC,EAAE,UAAU,EAAE,WAAW,CAAC,UAAU,IAAI,EAAE;aAClF;YACD,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;YACxE,SAAS,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG;SAC/B,CAAC;IACJ,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,SAAS,CACrB,UAAsC,EACtC,YAAoC,EACpC,UAAkB,EAClB,QAAkB,EAClB,QAAkB,EAClB,YAAqD,EACrD,OAAuD,EACvD,SAA8B;QAE9B,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,EAAE;YACvD,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YAC9B,GAAG,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,SAAS,GAAoB,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;YACrG,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,gBAAgB,CAAC,UAAU,EAAE,QAAQ,EAAE,OAAO,CAAC,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YACxH,GAAG,CAAC,OAAO,IAAI,SAAS,CAAC,OAAO,CAAC;YACjC,GAAG,CAAC,WAAW,GAAG,kBAAkB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACrD,GAAG,CAAC,cAAc,GAAG,SAAS,CAAC,IAAI,CAAC;YACpC,MAAM,QAAQ,GAAoB,EAAE,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;YACzG,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,qBAAqB,CAAC,UAAU,EAAE,QAAQ,EAAE,YAAY,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;YAChI,GAAG,CAAC,OAAO,IAAI,QAAQ,CAAC,OAAO,CAAC;YAChC,GAAG,CAAC,gBAAgB,GAAG,uBAAuB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC9D,OAAO,uBAAuB,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE,EAAE,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,cAAc,CAC1B,UAAsC,EACtC,YAAoC,EACpC,WAAmB,EACnB,SAA8B;QAE9B,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,EAAE;YACvD,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YAC9B,GAAG,CAAC,MAAM,EAAE,CAAC;YACb,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;YACrF,MAAM,IAAI,GAAoB,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC;YAC9F,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,iBAAiB,CAAC,GAAG,CAAC,cAAc,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;YACzH,GAAG,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC;YAC5B,MAAM,MAAM,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC9C,IAAI,MAAM,CAAC,aAAa,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzE,GAAG,CAAC,WAAW,GAAG,EAAE,GAAG,GAAG,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,CAAC,aAAa,EAAE,CAAC;YACzE,CAAC;YACD,GAAG,CAAC,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC;YAC/B,OAAO,uBAAuB,CAAC,GAAG,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE,EAAE,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC;QACH,OAAO,CAAC,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,YAAY,CACxB,WAA4B,EAC5B,QAAyC,EACzC,QAAkB,EAClB,QAAkB,EAClB,YAAoC,EACpC,SAA8B,EAC9B,IAAqB;QAErB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QACnH,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,IAAI,CAAC,CAAC;QAClE,IAAI,GAAG;YAAE,GAAG,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC;QACrC,OAAO,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC3C,CAAC;IAED,wEAAwE;IAEhE,KAAK,CAAC,QAAQ,CACpB,SAA0B,EAC1B,MAAc,EACd,SAA8B,EAC9B,IAAqB;QAErB,MAAM,CAAC,GAAG,KAAK,CAAC;YACd,MAAM;YACN,OAAO,EAAE;gBACP,GAAG,EAAE,WAAW,CAAC,SAAS,CAAC,QAAQ,CAAC;gBACpC,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,KAAK;gBAC/B,KAAK,EAAE,EAAE;gBACT,QAAQ,EAAE,CAAC;gBACX,cAAc,EAAE,SAAS;gBACzB,cAAc,EAAE,KAAK;aACtB;SACF,CAAC,CAAC;QAEH,IAAI,UAAU,GAAG,EAAE,CAAC;QACpB,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC;YAC1B,MAAM,MAAM,GAAG,GAAyC,CAAC;YACzD,MAAM,SAAS,GAAqB;gBAClC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,OAAO,EAAE,MAAM;aAChB,CAAC;YACF,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAErC,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,QAAQ,EAAE,CAAC;gBAChC,MAAM,SAAS,GAAG,MAAuC,CAAC;gBAC1D,OAAO,GAAG,SAAS,CAAC,cAAc,IAAI,CAAC,CAAC;gBACxC,IAAI,SAAS,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;oBACpC,MAAM,WAAW,GAAG,SAAS,CAAC,MAAM,IAAI,eAAe,CAAC;oBACxD,MAAM,IAAI,KAAK,CACb,8BAA8B,SAAS,CAAC,OAAO,MAAM,WAAW,EAAE,CACnE,CAAC;gBACJ,CAAC;gBACD,UAAU,GAAG,SAAS,CAAC,MAAM,IAAI,EAAE,CAAC;YACtC,CAAC;QACH,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC;IACvC,CAAC;CACF"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/** File system abstraction for testing */
|
|
2
|
+
export interface FsAdapter {
|
|
3
|
+
mkdir(dirPath: string, opts: {
|
|
4
|
+
recursive: boolean;
|
|
5
|
+
}): Promise<void>;
|
|
6
|
+
writeFile(filePath: string, data: string, opts?: {
|
|
7
|
+
mode?: number;
|
|
8
|
+
}): Promise<void>;
|
|
9
|
+
readFile(filePath: string, encoding: BufferEncoding): Promise<string>;
|
|
10
|
+
readdir(dirPath: string): Promise<string[]>;
|
|
11
|
+
unlink(filePath: string): Promise<void>;
|
|
12
|
+
rename(oldPath: string, newPath: string): Promise<void>;
|
|
13
|
+
stat(filePath: string): Promise<{
|
|
14
|
+
isFile(): boolean;
|
|
15
|
+
}>;
|
|
16
|
+
access(filePath: string): Promise<void>;
|
|
17
|
+
}
|
|
18
|
+
/** Default adapter that delegates to node:fs/promises */
|
|
19
|
+
export declare const defaultFs: FsAdapter;
|
|
20
|
+
//# sourceMappingURL=fs-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fs-adapter.d.ts","sourceRoot":"","sources":["../../../src/server/services/fs-adapter.ts"],"names":[],"mappings":"AAEA,0CAA0C;AAC1C,MAAM,WAAW,SAAS;IACxB,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnF,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACtE,OAAO,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC5C,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,MAAM,IAAI,OAAO,CAAA;KAAE,CAAC,CAAC;IACvD,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC;AAED,yDAAyD;AACzD,eAAO,MAAM,SAAS,EAAE,SAUvB,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
/** Default adapter that delegates to node:fs/promises */
|
|
3
|
+
export const defaultFs = {
|
|
4
|
+
mkdir: (dirPath, opts) => fs.mkdir(dirPath, opts).then(() => undefined),
|
|
5
|
+
writeFile: (filePath, data, opts) => fs.writeFile(filePath, data, opts).then(() => undefined),
|
|
6
|
+
readFile: (filePath, encoding) => fs.readFile(filePath, encoding),
|
|
7
|
+
readdir: (dirPath) => fs.readdir(dirPath).then((entries) => entries.map(String)),
|
|
8
|
+
unlink: (filePath) => fs.unlink(filePath),
|
|
9
|
+
rename: (oldPath, newPath) => fs.rename(oldPath, newPath),
|
|
10
|
+
stat: (filePath) => fs.stat(filePath),
|
|
11
|
+
access: (filePath) => fs.access(filePath),
|
|
12
|
+
};
|
|
13
|
+
//# sourceMappingURL=fs-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fs-adapter.js","sourceRoot":"","sources":["../../../src/server/services/fs-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAclC,yDAAyD;AACzD,MAAM,CAAC,MAAM,SAAS,GAAc;IAClC,KAAK,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC;IACvE,SAAS,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,CAClC,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC;IAC1D,QAAQ,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;IACjE,OAAO,EAAE,CAAC,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAChF,MAAM,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC;IACzC,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC;IACzD,IAAI,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC;IACrC,MAAM,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC;CAC1C,CAAC"}
|