model-test-bench 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +389 -0
- package/dist/bin/mtb.d.ts +3 -0
- package/dist/bin/mtb.d.ts.map +1 -0
- package/dist/bin/mtb.js +148 -0
- package/dist/bin/mtb.js.map +1 -0
- package/dist/server/index.d.ts +13 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +72 -0
- package/dist/server/index.js.map +1 -0
- package/dist/server/interfaces/evaluator.d.ts +15 -0
- package/dist/server/interfaces/evaluator.d.ts.map +1 -0
- package/dist/server/interfaces/evaluator.js +2 -0
- package/dist/server/interfaces/evaluator.js.map +1 -0
- package/dist/server/interfaces/logger.d.ts +9 -0
- package/dist/server/interfaces/logger.d.ts.map +1 -0
- package/dist/server/interfaces/logger.js +2 -0
- package/dist/server/interfaces/logger.js.map +1 -0
- package/dist/server/interfaces/runner.d.ts +9 -0
- package/dist/server/interfaces/runner.d.ts.map +1 -0
- package/dist/server/interfaces/runner.js +2 -0
- package/dist/server/interfaces/runner.js.map +1 -0
- package/dist/server/interfaces/storage.d.ts +36 -0
- package/dist/server/interfaces/storage.d.ts.map +1 -0
- package/dist/server/interfaces/storage.js +2 -0
- package/dist/server/interfaces/storage.js.map +1 -0
- package/dist/server/routes/eval-queue.d.ts +23 -0
- package/dist/server/routes/eval-queue.d.ts.map +1 -0
- package/dist/server/routes/eval-queue.js +45 -0
- package/dist/server/routes/eval-queue.js.map +1 -0
- package/dist/server/routes/evaluations.d.ts +8 -0
- package/dist/server/routes/evaluations.d.ts.map +1 -0
- package/dist/server/routes/evaluations.js +221 -0
- package/dist/server/routes/evaluations.js.map +1 -0
- package/dist/server/routes/providers.d.ts +5 -0
- package/dist/server/routes/providers.d.ts.map +1 -0
- package/dist/server/routes/providers.js +179 -0
- package/dist/server/routes/providers.js.map +1 -0
- package/dist/server/routes/run-queue.d.ts +17 -0
- package/dist/server/routes/run-queue.d.ts.map +1 -0
- package/dist/server/routes/run-queue.js +34 -0
- package/dist/server/routes/run-queue.js.map +1 -0
- package/dist/server/routes/run-sse.d.ts +18 -0
- package/dist/server/routes/run-sse.d.ts.map +1 -0
- package/dist/server/routes/run-sse.js +57 -0
- package/dist/server/routes/run-sse.js.map +1 -0
- package/dist/server/routes/runs.d.ts +9 -0
- package/dist/server/routes/runs.d.ts.map +1 -0
- package/dist/server/routes/runs.js +380 -0
- package/dist/server/routes/runs.js.map +1 -0
- package/dist/server/routes/scenarios.d.ts +5 -0
- package/dist/server/routes/scenarios.d.ts.map +1 -0
- package/dist/server/routes/scenarios.js +181 -0
- package/dist/server/routes/scenarios.js.map +1 -0
- package/dist/server/services/eval-helpers.d.ts +22 -0
- package/dist/server/services/eval-helpers.d.ts.map +1 -0
- package/dist/server/services/eval-helpers.js +75 -0
- package/dist/server/services/eval-helpers.js.map +1 -0
- package/dist/server/services/eval-parsers-debate-impl.d.ts +11 -0
- package/dist/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
- package/dist/server/services/eval-parsers-debate-impl.js +133 -0
- package/dist/server/services/eval-parsers-debate-impl.js.map +1 -0
- package/dist/server/services/eval-parsers.d.ts +24 -0
- package/dist/server/services/eval-parsers.d.ts.map +1 -0
- package/dist/server/services/eval-parsers.js +153 -0
- package/dist/server/services/eval-parsers.js.map +1 -0
- package/dist/server/services/eval-prompts.d.ts +9 -0
- package/dist/server/services/eval-prompts.d.ts.map +1 -0
- package/dist/server/services/eval-prompts.js +164 -0
- package/dist/server/services/eval-prompts.js.map +1 -0
- package/dist/server/services/evaluator.d.ts +10 -0
- package/dist/server/services/evaluator.d.ts.map +1 -0
- package/dist/server/services/evaluator.js +143 -0
- package/dist/server/services/evaluator.js.map +1 -0
- package/dist/server/services/fs-adapter.d.ts +20 -0
- package/dist/server/services/fs-adapter.d.ts.map +1 -0
- package/dist/server/services/fs-adapter.js +13 -0
- package/dist/server/services/fs-adapter.js.map +1 -0
- package/dist/server/services/instruction-parser.d.ts +26 -0
- package/dist/server/services/instruction-parser.d.ts.map +1 -0
- package/dist/server/services/instruction-parser.js +121 -0
- package/dist/server/services/instruction-parser.js.map +1 -0
- package/dist/server/services/log-rotator.d.ts +20 -0
- package/dist/server/services/log-rotator.d.ts.map +1 -0
- package/dist/server/services/log-rotator.js +60 -0
- package/dist/server/services/log-rotator.js.map +1 -0
- package/dist/server/services/logger.d.ts +15 -0
- package/dist/server/services/logger.d.ts.map +1 -0
- package/dist/server/services/logger.js +69 -0
- package/dist/server/services/logger.js.map +1 -0
- package/dist/server/services/model-factory.d.ts +10 -0
- package/dist/server/services/model-factory.d.ts.map +1 -0
- package/dist/server/services/model-factory.js +33 -0
- package/dist/server/services/model-factory.js.map +1 -0
- package/dist/server/services/runner.d.ts +9 -0
- package/dist/server/services/runner.d.ts.map +1 -0
- package/dist/server/services/runner.js +99 -0
- package/dist/server/services/runner.js.map +1 -0
- package/dist/server/services/seeder.d.ts +5 -0
- package/dist/server/services/seeder.d.ts.map +1 -0
- package/dist/server/services/seeder.js +79 -0
- package/dist/server/services/seeder.js.map +1 -0
- package/dist/server/services/storage-test-helpers.d.ts +15 -0
- package/dist/server/services/storage-test-helpers.d.ts.map +1 -0
- package/dist/server/services/storage-test-helpers.js +151 -0
- package/dist/server/services/storage-test-helpers.js.map +1 -0
- package/dist/server/services/storage.d.ts +35 -0
- package/dist/server/services/storage.d.ts.map +1 -0
- package/dist/server/services/storage.js +219 -0
- package/dist/server/services/storage.js.map +1 -0
- package/dist/server/services/tools.d.ts +6 -0
- package/dist/server/services/tools.d.ts.map +1 -0
- package/dist/server/services/tools.js +94 -0
- package/dist/server/services/tools.js.map +1 -0
- package/dist/server/services/transcript-formatter.d.ts +18 -0
- package/dist/server/services/transcript-formatter.d.ts.map +1 -0
- package/dist/server/services/transcript-formatter.js +227 -0
- package/dist/server/services/transcript-formatter.js.map +1 -0
- package/dist/server/services/update-checker.d.ts +3 -0
- package/dist/server/services/update-checker.d.ts.map +1 -0
- package/dist/server/services/update-checker.js +34 -0
- package/dist/server/services/update-checker.js.map +1 -0
- package/dist/server/types/evaluation.d.ts +94 -0
- package/dist/server/types/evaluation.d.ts.map +1 -0
- package/dist/server/types/evaluation.js +5 -0
- package/dist/server/types/evaluation.js.map +1 -0
- package/dist/server/types/index.d.ts +5 -0
- package/dist/server/types/index.d.ts.map +1 -0
- package/dist/server/types/index.js +5 -0
- package/dist/server/types/index.js.map +1 -0
- package/dist/server/types/provider.d.ts +23 -0
- package/dist/server/types/provider.d.ts.map +1 -0
- package/dist/server/types/provider.js +5 -0
- package/dist/server/types/provider.js.map +1 -0
- package/dist/server/types/run.d.ts +31 -0
- package/dist/server/types/run.d.ts.map +1 -0
- package/dist/server/types/run.js +5 -0
- package/dist/server/types/run.js.map +1 -0
- package/dist/server/types/scenario.d.ts +19 -0
- package/dist/server/types/scenario.d.ts.map +1 -0
- package/dist/server/types/scenario.js +5 -0
- package/dist/server/types/scenario.js.map +1 -0
- package/dist/src/server/index.d.ts +13 -0
- package/dist/src/server/index.d.ts.map +1 -0
- package/dist/src/server/index.js +72 -0
- package/dist/src/server/index.js.map +1 -0
- package/dist/src/server/interfaces/evaluator.d.ts +15 -0
- package/dist/src/server/interfaces/evaluator.d.ts.map +1 -0
- package/dist/src/server/interfaces/evaluator.js +2 -0
- package/dist/src/server/interfaces/evaluator.js.map +1 -0
- package/dist/src/server/interfaces/logger.d.ts +9 -0
- package/dist/src/server/interfaces/logger.d.ts.map +1 -0
- package/dist/src/server/interfaces/logger.js +2 -0
- package/dist/src/server/interfaces/logger.js.map +1 -0
- package/dist/src/server/interfaces/runner.d.ts +9 -0
- package/dist/src/server/interfaces/runner.d.ts.map +1 -0
- package/dist/src/server/interfaces/runner.js +2 -0
- package/dist/src/server/interfaces/runner.js.map +1 -0
- package/dist/src/server/interfaces/storage.d.ts +36 -0
- package/dist/src/server/interfaces/storage.d.ts.map +1 -0
- package/dist/src/server/interfaces/storage.js +2 -0
- package/dist/src/server/interfaces/storage.js.map +1 -0
- package/dist/src/server/routes/eval-queue.d.ts +23 -0
- package/dist/src/server/routes/eval-queue.d.ts.map +1 -0
- package/dist/src/server/routes/eval-queue.js +45 -0
- package/dist/src/server/routes/eval-queue.js.map +1 -0
- package/dist/src/server/routes/evaluations.d.ts +8 -0
- package/dist/src/server/routes/evaluations.d.ts.map +1 -0
- package/dist/src/server/routes/evaluations.js +221 -0
- package/dist/src/server/routes/evaluations.js.map +1 -0
- package/dist/src/server/routes/providers.d.ts +5 -0
- package/dist/src/server/routes/providers.d.ts.map +1 -0
- package/dist/src/server/routes/providers.js +179 -0
- package/dist/src/server/routes/providers.js.map +1 -0
- package/dist/src/server/routes/run-queue.d.ts +17 -0
- package/dist/src/server/routes/run-queue.d.ts.map +1 -0
- package/dist/src/server/routes/run-queue.js +34 -0
- package/dist/src/server/routes/run-queue.js.map +1 -0
- package/dist/src/server/routes/run-sse.d.ts +18 -0
- package/dist/src/server/routes/run-sse.d.ts.map +1 -0
- package/dist/src/server/routes/run-sse.js +57 -0
- package/dist/src/server/routes/run-sse.js.map +1 -0
- package/dist/src/server/routes/runs.d.ts +9 -0
- package/dist/src/server/routes/runs.d.ts.map +1 -0
- package/dist/src/server/routes/runs.js +380 -0
- package/dist/src/server/routes/runs.js.map +1 -0
- package/dist/src/server/routes/scenarios.d.ts +5 -0
- package/dist/src/server/routes/scenarios.d.ts.map +1 -0
- package/dist/src/server/routes/scenarios.js +181 -0
- package/dist/src/server/routes/scenarios.js.map +1 -0
- package/dist/src/server/services/eval-helpers.d.ts +22 -0
- package/dist/src/server/services/eval-helpers.d.ts.map +1 -0
- package/dist/src/server/services/eval-helpers.js +75 -0
- package/dist/src/server/services/eval-helpers.js.map +1 -0
- package/dist/src/server/services/eval-parsers-debate-impl.d.ts +11 -0
- package/dist/src/server/services/eval-parsers-debate-impl.d.ts.map +1 -0
- package/dist/src/server/services/eval-parsers-debate-impl.js +133 -0
- package/dist/src/server/services/eval-parsers-debate-impl.js.map +1 -0
- package/dist/src/server/services/eval-parsers.d.ts +24 -0
- package/dist/src/server/services/eval-parsers.d.ts.map +1 -0
- package/dist/src/server/services/eval-parsers.js +153 -0
- package/dist/src/server/services/eval-parsers.js.map +1 -0
- package/dist/src/server/services/eval-prompts.d.ts +9 -0
- package/dist/src/server/services/eval-prompts.d.ts.map +1 -0
- package/dist/src/server/services/eval-prompts.js +164 -0
- package/dist/src/server/services/eval-prompts.js.map +1 -0
- package/dist/src/server/services/evaluator.d.ts +10 -0
- package/dist/src/server/services/evaluator.d.ts.map +1 -0
- package/dist/src/server/services/evaluator.js +143 -0
- package/dist/src/server/services/evaluator.js.map +1 -0
- package/dist/src/server/services/fs-adapter.d.ts +20 -0
- package/dist/src/server/services/fs-adapter.d.ts.map +1 -0
- package/dist/src/server/services/fs-adapter.js +13 -0
- package/dist/src/server/services/fs-adapter.js.map +1 -0
- package/dist/src/server/services/instruction-parser.d.ts +26 -0
- package/dist/src/server/services/instruction-parser.d.ts.map +1 -0
- package/dist/src/server/services/instruction-parser.js +121 -0
- package/dist/src/server/services/instruction-parser.js.map +1 -0
- package/dist/src/server/services/log-rotator.d.ts +20 -0
- package/dist/src/server/services/log-rotator.d.ts.map +1 -0
- package/dist/src/server/services/log-rotator.js +60 -0
- package/dist/src/server/services/log-rotator.js.map +1 -0
- package/dist/src/server/services/logger.d.ts +15 -0
- package/dist/src/server/services/logger.d.ts.map +1 -0
- package/dist/src/server/services/logger.js +69 -0
- package/dist/src/server/services/logger.js.map +1 -0
- package/dist/src/server/services/model-factory.d.ts +10 -0
- package/dist/src/server/services/model-factory.d.ts.map +1 -0
- package/dist/src/server/services/model-factory.js +33 -0
- package/dist/src/server/services/model-factory.js.map +1 -0
- package/dist/src/server/services/runner.d.ts +9 -0
- package/dist/src/server/services/runner.d.ts.map +1 -0
- package/dist/src/server/services/runner.js +99 -0
- package/dist/src/server/services/runner.js.map +1 -0
- package/dist/src/server/services/seeder.d.ts +5 -0
- package/dist/src/server/services/seeder.d.ts.map +1 -0
- package/dist/src/server/services/seeder.js +79 -0
- package/dist/src/server/services/seeder.js.map +1 -0
- package/dist/src/server/services/storage.d.ts +35 -0
- package/dist/src/server/services/storage.d.ts.map +1 -0
- package/dist/src/server/services/storage.js +219 -0
- package/dist/src/server/services/storage.js.map +1 -0
- package/dist/src/server/services/tools.d.ts +6 -0
- package/dist/src/server/services/tools.d.ts.map +1 -0
- package/dist/src/server/services/tools.js +94 -0
- package/dist/src/server/services/tools.js.map +1 -0
- package/dist/src/server/services/transcript-formatter.d.ts +18 -0
- package/dist/src/server/services/transcript-formatter.d.ts.map +1 -0
- package/dist/src/server/services/transcript-formatter.js +227 -0
- package/dist/src/server/services/transcript-formatter.js.map +1 -0
- package/dist/src/server/services/update-checker.d.ts +3 -0
- package/dist/src/server/services/update-checker.d.ts.map +1 -0
- package/dist/src/server/services/update-checker.js +34 -0
- package/dist/src/server/services/update-checker.js.map +1 -0
- package/dist/src/server/types/evaluation.d.ts +94 -0
- package/dist/src/server/types/evaluation.d.ts.map +1 -0
- package/dist/src/server/types/evaluation.js +5 -0
- package/dist/src/server/types/evaluation.js.map +1 -0
- package/dist/src/server/types/index.d.ts +5 -0
- package/dist/src/server/types/index.d.ts.map +1 -0
- package/dist/src/server/types/index.js +5 -0
- package/dist/src/server/types/index.js.map +1 -0
- package/dist/src/server/types/provider.d.ts +23 -0
- package/dist/src/server/types/provider.d.ts.map +1 -0
- package/dist/src/server/types/provider.js +5 -0
- package/dist/src/server/types/provider.js.map +1 -0
- package/dist/src/server/types/run.d.ts +31 -0
- package/dist/src/server/types/run.d.ts.map +1 -0
- package/dist/src/server/types/run.js +5 -0
- package/dist/src/server/types/run.js.map +1 -0
- package/dist/src/server/types/scenario.d.ts +19 -0
- package/dist/src/server/types/scenario.d.ts.map +1 -0
- package/dist/src/server/types/scenario.js +5 -0
- package/dist/src/server/types/scenario.js.map +1 -0
- package/dist/web/assets/index-AJu1Yn5F.js +70 -0
- package/dist/web/assets/index-C_ioEISr.css +1 -0
- package/dist/web/index.html +15 -0
- package/docs/schemas/provider-api.example.json +12 -0
- package/docs/schemas/provider-openai.example.json +11 -0
- package/docs/schemas/scenario-baseline.example.json +24 -0
- package/docs/schemas/scenario-carwash-baseline.example.json +22 -0
- package/docs/schemas/scenario-carwash-with-system-prompt.example.json +24 -0
- package/docs/schemas/scenario-golden-rules-baseline.example.json +24 -0
- package/docs/schemas/scenario-golden-rules-with-system-prompt.example.json +28 -0
- package/docs/schemas/scenario-negative-analysis-baseline.example.json +23 -0
- package/docs/schemas/scenario-negative-analysis-with-system-prompt.example.json +25 -0
- package/docs/schemas/scenario-with-system-prompt.example.json +25 -0
- package/package.json +97 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { Provider } from './provider.js';
|
|
2
|
+
import type { Scenario } from './scenario.js';
|
|
3
|
+
/** A raw message captured during a run (provider-agnostic). */
|
|
4
|
+
export interface SDKMessageRecord {
|
|
5
|
+
readonly timestamp: string;
|
|
6
|
+
readonly message: Readonly<Record<string, unknown>>;
|
|
7
|
+
}
|
|
8
|
+
/** Lifecycle status of a run. */
|
|
9
|
+
export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
10
|
+
/** A single run: one provider + one scenario + captured output. */
|
|
11
|
+
export interface Run {
|
|
12
|
+
readonly id: string;
|
|
13
|
+
readonly providerId: string;
|
|
14
|
+
readonly scenarioId: string;
|
|
15
|
+
readonly status: RunStatus;
|
|
16
|
+
readonly providerSnapshot: Provider;
|
|
17
|
+
readonly scenarioSnapshot: Scenario;
|
|
18
|
+
readonly messages: readonly SDKMessageRecord[];
|
|
19
|
+
readonly resultText: string;
|
|
20
|
+
readonly totalCostUsd: number;
|
|
21
|
+
readonly durationMs: number;
|
|
22
|
+
readonly numTurns: number;
|
|
23
|
+
readonly error?: string;
|
|
24
|
+
readonly reviewerProviderIds?: readonly string[];
|
|
25
|
+
readonly reviewerProviderSnapshots?: readonly Provider[];
|
|
26
|
+
readonly maxEvalRounds?: number;
|
|
27
|
+
readonly evaluationId?: string;
|
|
28
|
+
readonly createdAt: string;
|
|
29
|
+
readonly updatedAt: string;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=run.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/server/types/run.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAE9C,+DAA+D;AAC/D,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACrD;AAED,iCAAiC;AACjC,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,GAAG,WAAW,CAAC;AAErF,mEAAmE;AACnE,MAAM,WAAW,GAAG;IAClB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC;IAC3B,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,gBAAgB,EAAE,QAAQ,CAAC;IACpC,QAAQ,CAAC,QAAQ,EAAE,SAAS,gBAAgB,EAAE,CAAC;IAC/C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAExB,QAAQ,CAAC,mBAAmB,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,yBAAyB,CAAC,EAAE,SAAS,QAAQ,EAAE,CAAC;IACzD,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/server/types/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { ScoringDimension } from './provider.js';
|
|
2
|
+
/** Built-in category labels for scenarios (behavior/planning focused). */
|
|
3
|
+
export type ScenarioCategory = 'planning' | 'instruction-following' | 'reasoning' | 'tool-strategy' | 'error-handling' | 'ambiguity-handling' | 'scope-management' | 'custom';
|
|
4
|
+
/** A scenario defines what to test and how to grade. */
|
|
5
|
+
export interface Scenario {
|
|
6
|
+
readonly id: string;
|
|
7
|
+
readonly name: string;
|
|
8
|
+
readonly category: ScenarioCategory;
|
|
9
|
+
readonly prompt: string;
|
|
10
|
+
readonly systemPrompt: string;
|
|
11
|
+
readonly enabledTools: readonly string[];
|
|
12
|
+
readonly expectedAnswer: string;
|
|
13
|
+
readonly criticalRequirements: readonly string[];
|
|
14
|
+
readonly gradingGuidelines: string;
|
|
15
|
+
readonly scoringDimensions: readonly ScoringDimension[];
|
|
16
|
+
readonly createdAt: string;
|
|
17
|
+
readonly updatedAt: string;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=scenario.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario.d.ts","sourceRoot":"","sources":["../../../src/server/types/scenario.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAEtD,0EAA0E;AAC1E,MAAM,MAAM,gBAAgB,GACxB,UAAU,GACV,uBAAuB,GACvB,WAAW,GACX,eAAe,GACf,gBAAgB,GAChB,oBAAoB,GACpB,kBAAkB,GAClB,QAAQ,CAAC;AAEb,wDAAwD;AACxD,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,QAAQ,EAAE,gBAAgB,CAAC;IAEpC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAE9B,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAEzC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,iBAAiB,EAAE,SAAS,gBAAgB,EAAE,CAAC;IACxD,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario.js","sourceRoot":"","sources":["../../../src/server/types/scenario.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import express from 'express';
|
|
2
|
+
import type { IStorage } from './interfaces/storage.js';
|
|
3
|
+
import type { ILogger } from './interfaces/logger.js';
|
|
4
|
+
import type { IRunner } from './interfaces/runner.js';
|
|
5
|
+
import type { IEvaluator } from './interfaces/evaluator.js';
|
|
6
|
+
export interface AppDeps {
|
|
7
|
+
storage: IStorage;
|
|
8
|
+
logger: ILogger;
|
|
9
|
+
runner?: IRunner;
|
|
10
|
+
evaluator?: IEvaluator;
|
|
11
|
+
}
|
|
12
|
+
export declare function createApp(deps: AppDeps): express.Express;
|
|
13
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/server/index.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAK9B,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AAQ5D,MAAM,WAAW,OAAO;IACtB,OAAO,EAAE,QAAQ,CAAC;IAClB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,UAAU,CAAC;CACxB;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,CAgExD"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import express from 'express';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
import { createProviderRoutes } from './routes/providers.js';
|
|
6
|
+
import { createScenarioRoutes } from './routes/scenarios.js';
|
|
7
|
+
import { createRunRoutes } from './routes/runs.js';
|
|
8
|
+
import { createEvaluationRoutes } from './routes/evaluations.js';
|
|
9
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
export function createApp(deps) {
|
|
11
|
+
const { logger } = deps;
|
|
12
|
+
const app = express();
|
|
13
|
+
// ─── CORS for dev mode ─────────────────────────────────────────────
|
|
14
|
+
app.use((_req, res, next) => {
|
|
15
|
+
res.header('Access-Control-Allow-Origin', '*');
|
|
16
|
+
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
|
|
17
|
+
res.header('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
|
18
|
+
if (_req.method === 'OPTIONS') {
|
|
19
|
+
res.sendStatus(204);
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
next();
|
|
23
|
+
});
|
|
24
|
+
// ─── JSON body parser ──────────────────────────────────────────────
|
|
25
|
+
app.use(express.json({ limit: '10mb' }));
|
|
26
|
+
// ─── Health check ──────────────────────────────────────────────────
|
|
27
|
+
app.get('/api/health', (_req, res) => {
|
|
28
|
+
res.json({ status: 'ok', timestamp: new Date().toISOString() });
|
|
29
|
+
});
|
|
30
|
+
// ─── API routes ──────────────────────────────────────────────────────
|
|
31
|
+
app.use('/api/providers', createProviderRoutes(deps.storage, deps.logger));
|
|
32
|
+
app.use('/api/scenarios', createScenarioRoutes(deps.storage, deps.logger));
|
|
33
|
+
if (deps.runner) {
|
|
34
|
+
app.use('/api/runs', createRunRoutes(deps.storage, deps.runner, deps.logger, deps.evaluator));
|
|
35
|
+
}
|
|
36
|
+
if (deps.evaluator) {
|
|
37
|
+
app.use('/api/evaluations', createEvaluationRoutes(deps.storage, deps.evaluator, deps.logger));
|
|
38
|
+
}
|
|
39
|
+
// ─── Static files (production) ─────────────────────────────────────
|
|
40
|
+
// Try multiple resolution paths: dist/server/../web, or cwd/dist/web
|
|
41
|
+
const webDistCandidates = [
|
|
42
|
+
path.resolve(__dirname, '..', 'web'), // from dist/server/ → dist/web/
|
|
43
|
+
path.resolve(process.cwd(), 'dist', 'web'), // from project root
|
|
44
|
+
];
|
|
45
|
+
const webDistPath = webDistCandidates.find((p) => {
|
|
46
|
+
try {
|
|
47
|
+
return fs.statSync(p).isDirectory();
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
}) ?? webDistCandidates[0];
|
|
53
|
+
app.use(express.static(webDistPath));
|
|
54
|
+
// SPA fallback: serve index.html for non-API routes
|
|
55
|
+
app.get(/^\/(?!api\/).*/, (_req, res, next) => {
|
|
56
|
+
res.sendFile(path.join(webDistPath, 'index.html'), (err) => {
|
|
57
|
+
if (err) {
|
|
58
|
+
next();
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
// ─── Error handling middleware ──────────────────────────────────────
|
|
63
|
+
app.use((err, _req, res, _next) => {
|
|
64
|
+
logger.error('Unhandled error', {
|
|
65
|
+
error: err.message,
|
|
66
|
+
stack: err.stack,
|
|
67
|
+
});
|
|
68
|
+
res.status(500).json({ error: 'Internal server error' });
|
|
69
|
+
});
|
|
70
|
+
return app;
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/server/index.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAKzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAS/D,MAAM,UAAU,SAAS,CAAC,IAAa;IACrC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACxB,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC;IAEtB,sEAAsE;IACtE,GAAG,CAAC,GAAG,CAAC,CAAC,IAAa,EAAE,GAAa,EAAE,IAAkB,EAAE,EAAE;QAC3D,GAAG,CAAC,MAAM,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;QAC/C,GAAG,CAAC,MAAM,CAAC,8BAA8B,EAAE,iCAAiC,CAAC,CAAC;QAC9E,GAAG,CAAC,MAAM,CAAC,8BAA8B,EAAE,6BAA6B,CAAC,CAAC;QAC1E,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YAC9B,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;YACpB,OAAO;QACT,CAAC;QACD,IAAI,EAAE,CAAC;IACT,CAAC,CAAC,CAAC;IAEH,sEAAsE;IACtE,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;IAEzC,sEAAsE;IACtE,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,EAAE;QACtD,GAAG,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,wEAAwE;IACxE,GAAG,CAAC,GAAG,CAAC,gBAAgB,EAAE,oBAAoB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3E,GAAG,CAAC,GAAG,CAAC,gBAAgB,EAAE,oBAAoB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3E,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,eAAe,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAChG,CAAC;IACD,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,kBAAkB,EAAE,sBAAsB,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACjG,CAAC;IAED,sEAAsE;IACtE,qEAAqE;IACrE,MAAM,iBAAiB,GAAG;QACxB,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC,EAAY,gCAAgC;QAChF,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,EAAM,oBAAoB;KACrE,CAAC;IACF,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE;QAC/C,IAAI,CAAC;YAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,KAAK,CAAC;QAAC,CAAC;IACtE,CAAC,CAAC,IAAI,iBAAiB,CAAC,CAAC,CAAC,CAAC;IAC3B,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;IAErC,oDAAoD;IACpD,GAAG,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,IAAa,EAAE,GAAa,EAAE,IAAkB,EAAE,EAAE;QAC7E,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE;YACzD,IAAI,GAAG,EAAE,CAAC;gBACR,IAAI,EAAE,CAAC;YACT,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,uEAAuE;IACvE,GAAG,CAAC,GAAG,CAAC,CAAC,GAAU,EAAE,IAAa,EAAE,GAAa,EAAE,KAAmB,EAAE,EAAE;QACxE,MAAM,CAAC,KAAK,CAAC,iBAAiB,EAAE;YAC9B,KAAK,EAAE,GAAG,CAAC,OAAO;YAClB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;QACH,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { Provider, Scenario, Run, Evaluation, EvaluationRequest, EvaluationStatus, SDKMessageRecord } from '../types/index.js';
|
|
2
|
+
export interface EvalMessageInfo {
|
|
3
|
+
readonly phase: 'score' | 'compliance' | 'debate' | 'synthesis';
|
|
4
|
+
readonly evaluatorRole: string;
|
|
5
|
+
readonly roundNumber: number;
|
|
6
|
+
}
|
|
7
|
+
export interface EvaluationCallbacks {
|
|
8
|
+
onStatusChange(status: EvaluationStatus): void;
|
|
9
|
+
onProgress(step: string, detail?: string): void;
|
|
10
|
+
onMessage(info: EvalMessageInfo, message: SDKMessageRecord): void;
|
|
11
|
+
}
|
|
12
|
+
export interface IEvaluator {
|
|
13
|
+
evaluateRun(run: Run, scenario: Scenario, provider: Provider, request: EvaluationRequest, callbacks: EvaluationCallbacks): Promise<Evaluation>;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../../../src/server/interfaces/evaluator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,QAAQ,EACR,QAAQ,EACR,GAAG,EACH,UAAU,EACV,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,KAAK,EAAE,OAAO,GAAG,YAAY,GAAG,QAAQ,GAAG,WAAW,CAAC;IAChE,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,mBAAmB;IAClC,cAAc,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC/C,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAChD,SAAS,CAAC,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;CACnE;AAED,MAAM,WAAW,UAAU;IACzB,WAAW,CACT,GAAG,EAAE,GAAG,EACR,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,EAClB,OAAO,EAAE,iBAAiB,EAC1B,SAAS,EAAE,mBAAmB,GAC7B,OAAO,CAAC,UAAU,CAAC,CAAC;CACxB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../../../src/server/interfaces/evaluator.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
2
|
+
export interface ILogger {
|
|
3
|
+
debug(msg: string, attrs?: Record<string, unknown>): void;
|
|
4
|
+
info(msg: string, attrs?: Record<string, unknown>): void;
|
|
5
|
+
warn(msg: string, attrs?: Record<string, unknown>): void;
|
|
6
|
+
error(msg: string, attrs?: Record<string, unknown>): void;
|
|
7
|
+
child(attrs: Record<string, unknown>): ILogger;
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=logger.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../../../../src/server/interfaces/logger.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;AAE3D,MAAM,WAAW,OAAO;IACtB,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IAC1D,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IACzD,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IAC1D,KAAK,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC;CAChD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../../../src/server/interfaces/logger.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Provider, Scenario, Run, RunStatus, SDKMessageRecord } from '../types/index.js';
|
|
2
|
+
export interface RunCallbacks {
|
|
3
|
+
onMessage(message: SDKMessageRecord): void;
|
|
4
|
+
onStatusChange(status: RunStatus): void;
|
|
5
|
+
}
|
|
6
|
+
export interface IRunner {
|
|
7
|
+
executeRun(provider: Provider, scenario: Scenario, run: Run, callbacks: RunCallbacks, abortController?: AbortController): Promise<Run>;
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../../../src/server/interfaces/runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAE9F,MAAM,WAAW,YAAY;IAC3B,SAAS,CAAC,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC3C,cAAc,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAAC;CACzC;AAED,MAAM,WAAW,OAAO;IACtB,UAAU,CACR,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,QAAQ,EAClB,GAAG,EAAE,GAAG,EACR,SAAS,EAAE,YAAY,EACvB,eAAe,CAAC,EAAE,eAAe,GAChC,OAAO,CAAC,GAAG,CAAC,CAAC;CACjB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../../../src/server/interfaces/runner.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { Provider, Scenario, ScenarioCategory, Run, RunStatus, Evaluation, EvaluationStatus } from '../types/index.js';
|
|
2
|
+
export interface ProviderFilter {
|
|
3
|
+
readonly provider?: string;
|
|
4
|
+
readonly model?: string;
|
|
5
|
+
}
|
|
6
|
+
export interface ScenarioFilter {
|
|
7
|
+
readonly category?: ScenarioCategory;
|
|
8
|
+
}
|
|
9
|
+
export interface RunFilter {
|
|
10
|
+
readonly providerId?: string;
|
|
11
|
+
readonly scenarioId?: string;
|
|
12
|
+
readonly status?: RunStatus;
|
|
13
|
+
}
|
|
14
|
+
export interface EvaluationFilter {
|
|
15
|
+
readonly runId?: string;
|
|
16
|
+
readonly status?: EvaluationStatus;
|
|
17
|
+
}
|
|
18
|
+
export interface IStorage {
|
|
19
|
+
getProvider(id: string): Promise<Provider | undefined>;
|
|
20
|
+
listProviders(filter?: ProviderFilter): Promise<readonly Provider[]>;
|
|
21
|
+
saveProvider(provider: Provider): Promise<void>;
|
|
22
|
+
deleteProvider(id: string): Promise<boolean>;
|
|
23
|
+
getScenario(id: string): Promise<Scenario | undefined>;
|
|
24
|
+
listScenarios(filter?: ScenarioFilter): Promise<readonly Scenario[]>;
|
|
25
|
+
saveScenario(scenario: Scenario): Promise<void>;
|
|
26
|
+
deleteScenario(id: string): Promise<boolean>;
|
|
27
|
+
getRun(id: string): Promise<Run | undefined>;
|
|
28
|
+
listRuns(filter?: RunFilter): Promise<readonly Run[]>;
|
|
29
|
+
saveRun(run: Run): Promise<void>;
|
|
30
|
+
deleteRun(id: string): Promise<boolean>;
|
|
31
|
+
getEvaluation(id: string): Promise<Evaluation | undefined>;
|
|
32
|
+
listEvaluations(filter?: EvaluationFilter): Promise<readonly Evaluation[]>;
|
|
33
|
+
saveEvaluation(evaluation: Evaluation): Promise<void>;
|
|
34
|
+
deleteEvaluation(id: string): Promise<boolean>;
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=storage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"storage.d.ts","sourceRoot":"","sources":["../../../../src/server/interfaces/storage.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,QAAQ,EACR,QAAQ,EACR,gBAAgB,EAChB,GAAG,EACH,SAAS,EACT,UAAU,EACV,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CACtC;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,MAAM,CAAC,EAAE,gBAAgB,CAAC;CACpC;AAED,MAAM,WAAW,QAAQ;IAEvB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;IACvD,aAAa,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC,CAAC;IACrE,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChD,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAG7C,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC;IACvD,aAAa,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,SAAS,QAAQ,EAAE,CAAC,CAAC;IACrE,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChD,cAAc,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAG7C,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC;IAC7C,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjC,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAGxC,aAAa,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,GAAG,SAAS,CAAC,CAAC;IAC3D,eAAe,CAAC,MAAM,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,SAAS,UAAU,EAAE,CAAC,CAAC;IAC3E,cAAc,CAAC,UAAU,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,gBAAgB,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CAChD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"storage.js","sourceRoot":"","sources":["../../../../src/server/interfaces/storage.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { Evaluation } from '../types/index.js';
|
|
2
|
+
/** Validated evaluator entry from the request body (providerId-based). */
|
|
3
|
+
export interface EvalEntry {
|
|
4
|
+
readonly providerId: string;
|
|
5
|
+
readonly role: string;
|
|
6
|
+
}
|
|
7
|
+
/** Validate an evaluator entry from request body. Returns parsed entry or error string. */
|
|
8
|
+
export declare function validateEvalEntry(raw: unknown): EvalEntry | string;
|
|
9
|
+
export interface EvalQueueEntry {
|
|
10
|
+
evaluation: Evaluation;
|
|
11
|
+
execute: () => Promise<void>;
|
|
12
|
+
}
|
|
13
|
+
export declare class EvalQueue {
|
|
14
|
+
private readonly queue;
|
|
15
|
+
private active;
|
|
16
|
+
private readonly maxConcurrency;
|
|
17
|
+
constructor(maxConcurrency?: number);
|
|
18
|
+
enqueue(entry: EvalQueueEntry): void;
|
|
19
|
+
get pendingCount(): number;
|
|
20
|
+
get activeCount(): number;
|
|
21
|
+
private drain;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=eval-queue.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-queue.d.ts","sourceRoot":"","sources":["../../../../src/server/routes/eval-queue.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAMpD,0EAA0E;AAC1E,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,2FAA2F;AAC3F,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,OAAO,GAAG,SAAS,GAAG,MAAM,CAMlE;AAMD,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,UAAU,CAAC;IACvB,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC9B;AAED,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAwB;IAC9C,OAAO,CAAC,MAAM,CAAK;IACnB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,cAAc,SAAI;IAI9B,OAAO,CAAC,KAAK,EAAE,cAAc,GAAG,IAAI;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,WAAW,IAAI,MAAM,CAExB;YAEa,KAAK;CAWpB"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// EvalQueue — concurrency-limited queue for evaluation execution
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
/** Validate an evaluator entry from request body. Returns parsed entry or error string. */
|
|
5
|
+
export function validateEvalEntry(raw) {
|
|
6
|
+
if (!raw || typeof raw !== 'object')
|
|
7
|
+
return 'Each evaluator must be an object';
|
|
8
|
+
const obj = raw;
|
|
9
|
+
if (!obj.role || typeof obj.role !== 'string')
|
|
10
|
+
return 'Each evaluator must have a string role';
|
|
11
|
+
if (!obj.providerId || typeof obj.providerId !== 'string')
|
|
12
|
+
return 'Each evaluator must have a string providerId';
|
|
13
|
+
return { providerId: obj.providerId, role: obj.role };
|
|
14
|
+
}
|
|
15
|
+
export class EvalQueue {
|
|
16
|
+
queue = [];
|
|
17
|
+
active = 0;
|
|
18
|
+
maxConcurrency;
|
|
19
|
+
constructor(maxConcurrency = 1) {
|
|
20
|
+
this.maxConcurrency = maxConcurrency;
|
|
21
|
+
}
|
|
22
|
+
enqueue(entry) {
|
|
23
|
+
this.queue.push(entry);
|
|
24
|
+
void this.drain();
|
|
25
|
+
}
|
|
26
|
+
get pendingCount() {
|
|
27
|
+
return this.queue.length;
|
|
28
|
+
}
|
|
29
|
+
get activeCount() {
|
|
30
|
+
return this.active;
|
|
31
|
+
}
|
|
32
|
+
async drain() {
|
|
33
|
+
while (this.active < this.maxConcurrency && this.queue.length > 0) {
|
|
34
|
+
const next = this.queue.shift();
|
|
35
|
+
if (!next)
|
|
36
|
+
break;
|
|
37
|
+
this.active++;
|
|
38
|
+
next.execute().finally(() => {
|
|
39
|
+
this.active--;
|
|
40
|
+
void this.drain();
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=eval-queue.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-queue.js","sourceRoot":"","sources":["../../../../src/server/routes/eval-queue.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iEAAiE;AACjE,8EAA8E;AAc9E,2FAA2F;AAC3F,MAAM,UAAU,iBAAiB,CAAC,GAAY;IAC5C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,kCAAkC,CAAC;IAC/E,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,QAAQ;QAAE,OAAO,wCAAwC,CAAC;IAC/F,IAAI,CAAC,GAAG,CAAC,UAAU,IAAI,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ;QAAE,OAAO,8CAA8C,CAAC;IACjH,OAAO,EAAE,UAAU,EAAE,GAAG,CAAC,UAAoB,EAAE,IAAI,EAAE,GAAG,CAAC,IAAc,EAAE,CAAC;AAC5E,CAAC;AAWD,MAAM,OAAO,SAAS;IACH,KAAK,GAAqB,EAAE,CAAC;IACtC,MAAM,GAAG,CAAC,CAAC;IACF,cAAc,CAAS;IAExC,YAAY,cAAc,GAAG,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;IACvC,CAAC;IAED,OAAO,CAAC,KAAqB;QAC3B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACvB,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC;IACpB,CAAC;IAED,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAC3B,CAAC;IAED,IAAI,WAAW;QACb,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAEO,KAAK,CAAC,KAAK;QACjB,OAAO,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClE,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YAChC,IAAI,CAAC,IAAI;gBAAE,MAAM;YACjB,IAAI,CAAC,MAAM,EAAE,CAAC;YACd,IAAI,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE;gBAC1B,IAAI,CAAC,MAAM,EAAE,CAAC;gBACd,KAAK,IAAI,CAAC,KAAK,EAAE,CAAC;YACpB,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { Router } from 'express';
|
|
2
|
+
import type { IStorage } from '../interfaces/storage.js';
|
|
3
|
+
import type { ILogger } from '../interfaces/logger.js';
|
|
4
|
+
import type { IEvaluator } from '../interfaces/evaluator.js';
|
|
5
|
+
import { EvalQueue } from './eval-queue.js';
|
|
6
|
+
export { EvalQueue, validateEvalEntry } from './eval-queue.js';
|
|
7
|
+
export declare function createEvaluationRoutes(storage: IStorage, evaluator: IEvaluator, logger: ILogger, queue?: EvalQueue): Router;
|
|
8
|
+
//# sourceMappingURL=evaluations.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluations.d.ts","sourceRoot":"","sources":["../../../../src/server/routes/evaluations.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAGjC,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AACzD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AACvD,OAAO,KAAK,EAAE,UAAU,EAAuB,MAAM,4BAA4B,CAAC;AAIlF,OAAO,EAAE,SAAS,EAAqB,MAAM,iBAAiB,CAAC;AAI/D,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAuB/D,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,QAAQ,EACjB,SAAS,EAAE,UAAU,EACrB,MAAM,EAAE,OAAO,EACf,KAAK,CAAC,EAAE,SAAS,GAChB,MAAM,CAgNR"}
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Evaluation routes — start, list, get, stream evaluations
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
6
|
+
import { handleSSEConnection, broadcastSSE, closeSSE } from './run-sse.js';
|
|
7
|
+
import { EvalQueue, validateEvalEntry } from './eval-queue.js';
|
|
8
|
+
// Re-export so existing imports from this module continue to work
|
|
9
|
+
export { EvalQueue, validateEvalEntry } from './eval-queue.js';
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Helpers
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
function paramId(req) {
|
|
14
|
+
const raw = req.params.id;
|
|
15
|
+
return Array.isArray(raw) ? raw[0] : raw;
|
|
16
|
+
}
|
|
17
|
+
/** Strip rounds from evaluation for list endpoint summaries. */
|
|
18
|
+
function evalSummary(evaluation) {
|
|
19
|
+
const { rounds: _rounds, ...rest } = evaluation;
|
|
20
|
+
return rest;
|
|
21
|
+
}
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Router factory
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
export function createEvaluationRoutes(storage, evaluator, logger, queue) {
|
|
26
|
+
const router = Router();
|
|
27
|
+
const evalQueue = queue ?? new EvalQueue(1);
|
|
28
|
+
const sseSubscribers = new Map();
|
|
29
|
+
// POST / — start a new evaluation
|
|
30
|
+
router.post('/', async (req, res) => {
|
|
31
|
+
try {
|
|
32
|
+
const body = req.body;
|
|
33
|
+
const runId = body.runId;
|
|
34
|
+
if (!runId) {
|
|
35
|
+
res.status(400).json({ error: 'runId is required' });
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
const run = await storage.getRun(runId);
|
|
39
|
+
if (!run) {
|
|
40
|
+
res.status(404).json({ error: 'Run not found' });
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
// Validate evaluator entries (providerId + role)
|
|
44
|
+
const rawEvaluators = body.evaluators;
|
|
45
|
+
if (!Array.isArray(rawEvaluators) || rawEvaluators.length === 0) {
|
|
46
|
+
res.status(400).json({ error: 'evaluators must be a non-empty array' });
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
const entries = [];
|
|
50
|
+
for (const raw of rawEvaluators) {
|
|
51
|
+
const result = validateEvalEntry(raw);
|
|
52
|
+
if (typeof result === 'string') {
|
|
53
|
+
res.status(400).json({ error: result });
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
entries.push(result);
|
|
57
|
+
}
|
|
58
|
+
// Resolve providerIds to real providers
|
|
59
|
+
const evaluators = [];
|
|
60
|
+
for (const entry of entries) {
|
|
61
|
+
const providerRecord = await storage.getProvider(entry.providerId);
|
|
62
|
+
if (!providerRecord) {
|
|
63
|
+
res.status(404).json({ error: `Provider not found: ${entry.providerId}` });
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
evaluators.push({
|
|
67
|
+
providerName: providerRecord.providerName,
|
|
68
|
+
model: providerRecord.model,
|
|
69
|
+
apiKey: providerRecord.apiKey,
|
|
70
|
+
baseUrl: providerRecord.baseUrl,
|
|
71
|
+
role: entry.role,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
// Validate maxRounds
|
|
75
|
+
const maxRounds = typeof body.maxRounds === 'number' ? body.maxRounds : 1;
|
|
76
|
+
if (maxRounds < 1 || maxRounds > 5) {
|
|
77
|
+
res.status(400).json({ error: 'maxRounds must be between 1 and 5' });
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
const maxBudgetUsd = typeof body.maxBudgetUsd === 'number' ? body.maxBudgetUsd : undefined;
|
|
81
|
+
const evalRequest = { runId, evaluators, maxRounds, maxBudgetUsd };
|
|
82
|
+
const now = new Date().toISOString();
|
|
83
|
+
const evaluation = {
|
|
84
|
+
id: uuidv4(),
|
|
85
|
+
runId,
|
|
86
|
+
status: 'pending',
|
|
87
|
+
evaluators,
|
|
88
|
+
rounds: [],
|
|
89
|
+
answerComparison: { matches: false, explanation: '', similarity: 0 },
|
|
90
|
+
criticalResults: [],
|
|
91
|
+
setupCompliance: {
|
|
92
|
+
instructionCompliance: { followed: [], violated: [], notApplicable: [], overallCompliance: 0 },
|
|
93
|
+
},
|
|
94
|
+
synthesis: { dimensionScores: {}, weightedTotal: 0, confidence: 0, dissenting: [] },
|
|
95
|
+
ledger: [],
|
|
96
|
+
totalCostUsd: 0,
|
|
97
|
+
createdAt: now,
|
|
98
|
+
updatedAt: now,
|
|
99
|
+
};
|
|
100
|
+
await storage.saveEvaluation(evaluation);
|
|
101
|
+
// Load related data for the evaluator
|
|
102
|
+
const providerSnapshot = run.providerSnapshot;
|
|
103
|
+
const scenario = run.scenarioSnapshot;
|
|
104
|
+
evalQueue.enqueue({
|
|
105
|
+
evaluation,
|
|
106
|
+
execute: async () => {
|
|
107
|
+
const callbacks = {
|
|
108
|
+
onStatusChange(status) {
|
|
109
|
+
const updatedEval = {
|
|
110
|
+
...evaluation,
|
|
111
|
+
status,
|
|
112
|
+
updatedAt: new Date().toISOString(),
|
|
113
|
+
};
|
|
114
|
+
broadcastSSE(evaluation.id, 'message', { type: 'status', status }, sseSubscribers);
|
|
115
|
+
storage.saveEvaluation(updatedEval).catch((saveErr) => {
|
|
116
|
+
logger.error('Failed to persist evaluation status change', {
|
|
117
|
+
evalId: evaluation.id,
|
|
118
|
+
status,
|
|
119
|
+
error: String(saveErr),
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
},
|
|
123
|
+
onProgress(step, detail) {
|
|
124
|
+
broadcastSSE(evaluation.id, 'message', { type: 'progress', step, detail }, sseSubscribers);
|
|
125
|
+
},
|
|
126
|
+
onMessage(info, msg) {
|
|
127
|
+
broadcastSSE(evaluation.id, 'message', {
|
|
128
|
+
type: 'sdkMessage',
|
|
129
|
+
phase: info.phase,
|
|
130
|
+
evaluatorRole: info.evaluatorRole,
|
|
131
|
+
roundNumber: info.roundNumber,
|
|
132
|
+
timestamp: msg.timestamp,
|
|
133
|
+
message: msg.message,
|
|
134
|
+
}, sseSubscribers);
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
try {
|
|
138
|
+
const result = await evaluator.evaluateRun(run, scenario, providerSnapshot, evalRequest, callbacks);
|
|
139
|
+
const finalEval = {
|
|
140
|
+
...result,
|
|
141
|
+
id: evaluation.id,
|
|
142
|
+
updatedAt: new Date().toISOString(),
|
|
143
|
+
};
|
|
144
|
+
await storage.saveEvaluation(finalEval);
|
|
145
|
+
}
|
|
146
|
+
catch (err) {
|
|
147
|
+
logger.error('Evaluation failed', { evalId: evaluation.id, error: String(err) });
|
|
148
|
+
broadcastSSE(evaluation.id, 'message', { type: 'status', status: 'failed' }, sseSubscribers);
|
|
149
|
+
const failedEval = {
|
|
150
|
+
...evaluation,
|
|
151
|
+
status: 'failed',
|
|
152
|
+
updatedAt: new Date().toISOString(),
|
|
153
|
+
};
|
|
154
|
+
await storage.saveEvaluation(failedEval).catch((saveErr) => {
|
|
155
|
+
logger.error('Failed to save failed evaluation', {
|
|
156
|
+
evalId: evaluation.id,
|
|
157
|
+
error: String(saveErr),
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
finally {
|
|
162
|
+
closeSSE(evaluation.id, sseSubscribers);
|
|
163
|
+
}
|
|
164
|
+
},
|
|
165
|
+
});
|
|
166
|
+
res.status(202).json({ id: evaluation.id, status: 'pending' });
|
|
167
|
+
}
|
|
168
|
+
catch (err) {
|
|
169
|
+
logger.error('Failed to start evaluation', { error: String(err) });
|
|
170
|
+
res.status(500).json({ error: 'Failed to start evaluation' });
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
// GET / — list evaluations
|
|
174
|
+
router.get('/', async (req, res) => {
|
|
175
|
+
try {
|
|
176
|
+
const filter = {};
|
|
177
|
+
if (typeof req.query.runId === 'string')
|
|
178
|
+
filter.runId = req.query.runId;
|
|
179
|
+
const evaluations = await storage.listEvaluations(filter);
|
|
180
|
+
res.json(evaluations.map(evalSummary));
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
logger.error('Failed to list evaluations', { error: String(err) });
|
|
184
|
+
res.status(500).json({ error: 'Failed to list evaluations' });
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
// GET /:id — get full evaluation
|
|
188
|
+
router.get('/:id', async (req, res) => {
|
|
189
|
+
try {
|
|
190
|
+
const id = paramId(req);
|
|
191
|
+
const evaluation = await storage.getEvaluation(id);
|
|
192
|
+
if (!evaluation) {
|
|
193
|
+
res.status(404).json({ error: 'Evaluation not found' });
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
res.json(evaluation);
|
|
197
|
+
}
|
|
198
|
+
catch (err) {
|
|
199
|
+
logger.error('Failed to get evaluation', { id: paramId(req), error: String(err) });
|
|
200
|
+
res.status(500).json({ error: 'Failed to get evaluation' });
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
// GET /:id/stream — SSE endpoint
|
|
204
|
+
router.get('/:id/stream', async (req, res) => {
|
|
205
|
+
try {
|
|
206
|
+
const id = paramId(req);
|
|
207
|
+
const evaluation = await storage.getEvaluation(id);
|
|
208
|
+
if (!evaluation) {
|
|
209
|
+
res.status(404).json({ error: 'Evaluation not found' });
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
handleSSEConnection(req, res, id, sseSubscribers);
|
|
213
|
+
}
|
|
214
|
+
catch (err) {
|
|
215
|
+
logger.error('Failed to start eval SSE stream', { id: paramId(req), error: String(err) });
|
|
216
|
+
res.status(500).json({ error: 'Failed to start stream' });
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
return router;
|
|
220
|
+
}
|
|
221
|
+
//# sourceMappingURL=evaluations.js.map
|