sammy-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -0
- package/dist/cli/commands/dev.d.ts +3 -0
- package/dist/cli/commands/dev.d.ts.map +1 -0
- package/dist/cli/commands/dev.js +13 -0
- package/dist/cli/commands/dev.js.map +1 -0
- package/dist/cli/commands/eval.d.ts +3 -0
- package/dist/cli/commands/eval.d.ts.map +1 -0
- package/dist/cli/commands/eval.js +28 -0
- package/dist/cli/commands/eval.js.map +1 -0
- package/dist/cli/commands/generate.d.ts +3 -0
- package/dist/cli/commands/generate.d.ts.map +1 -0
- package/dist/cli/commands/generate.js +10 -0
- package/dist/cli/commands/generate.js.map +1 -0
- package/dist/cli/commands/init.d.ts +3 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +9 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +17 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cloud/sammy-cloud.d.ts +10 -0
- package/dist/cloud/sammy-cloud.d.ts.map +1 -0
- package/dist/cloud/sammy-cloud.js +113 -0
- package/dist/cloud/sammy-cloud.js.map +1 -0
- package/dist/dev/chat-ui.d.ts +6 -0
- package/dist/dev/chat-ui.d.ts.map +1 -0
- package/dist/dev/chat-ui.js +95 -0
- package/dist/dev/chat-ui.js.map +1 -0
- package/dist/dev/server.d.ts +5 -0
- package/dist/dev/server.d.ts.map +1 -0
- package/dist/dev/server.js +87 -0
- package/dist/dev/server.js.map +1 -0
- package/dist/dev/watcher.d.ts +2 -0
- package/dist/dev/watcher.d.ts.map +1 -0
- package/dist/dev/watcher.js +19 -0
- package/dist/dev/watcher.js.map +1 -0
- package/dist/discovery/ast/call-route-finder.d.ts +16 -0
- package/dist/discovery/ast/call-route-finder.d.ts.map +1 -0
- package/dist/discovery/ast/call-route-finder.js +106 -0
- package/dist/discovery/ast/call-route-finder.js.map +1 -0
- package/dist/discovery/ast/handler-detector.d.ts +8 -0
- package/dist/discovery/ast/handler-detector.d.ts.map +1 -0
- package/dist/discovery/ast/handler-detector.js +56 -0
- package/dist/discovery/ast/handler-detector.js.map +1 -0
- package/dist/discovery/ast/named-export-finder.d.ts +7 -0
- package/dist/discovery/ast/named-export-finder.d.ts.map +1 -0
- package/dist/discovery/ast/named-export-finder.js +21 -0
- package/dist/discovery/ast/named-export-finder.js.map +1 -0
- package/dist/discovery/ast/param-extractor.d.ts +7 -0
- package/dist/discovery/ast/param-extractor.d.ts.map +1 -0
- package/dist/discovery/ast/param-extractor.js +236 -0
- package/dist/discovery/ast/param-extractor.js.map +1 -0
- package/dist/discovery/ast/project.d.ts +8 -0
- package/dist/discovery/ast/project.d.ts.map +1 -0
- package/dist/discovery/ast/project.js +66 -0
- package/dist/discovery/ast/project.js.map +1 -0
- package/dist/discovery/ast/resolve.d.ts +5 -0
- package/dist/discovery/ast/resolve.d.ts.map +1 -0
- package/dist/discovery/ast/resolve.js +60 -0
- package/dist/discovery/ast/resolve.js.map +1 -0
- package/dist/discovery/ast/side-effect-tracer.d.ts +4 -0
- package/dist/discovery/ast/side-effect-tracer.d.ts.map +1 -0
- package/dist/discovery/ast/side-effect-tracer.js +100 -0
- package/dist/discovery/ast/side-effect-tracer.js.map +1 -0
- package/dist/discovery/ast/source-files.d.ts +3 -0
- package/dist/discovery/ast/source-files.d.ts.map +1 -0
- package/dist/discovery/ast/source-files.js +37 -0
- package/dist/discovery/ast/source-files.js.map +1 -0
- package/dist/discovery/config-generator.d.ts +5 -0
- package/dist/discovery/config-generator.d.ts.map +1 -0
- package/dist/discovery/config-generator.js +71 -0
- package/dist/discovery/config-generator.js.map +1 -0
- package/dist/discovery/extractors/auth-detector.d.ts +3 -0
- package/dist/discovery/extractors/auth-detector.d.ts.map +1 -0
- package/dist/discovery/extractors/auth-detector.js +97 -0
- package/dist/discovery/extractors/auth-detector.js.map +1 -0
- package/dist/discovery/extractors/http-call-extractor.d.ts +5 -0
- package/dist/discovery/extractors/http-call-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/http-call-extractor.js +122 -0
- package/dist/discovery/extractors/http-call-extractor.js.map +1 -0
- package/dist/discovery/extractors/model-extractor.d.ts +4 -0
- package/dist/discovery/extractors/model-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/model-extractor.js +256 -0
- package/dist/discovery/extractors/model-extractor.js.map +1 -0
- package/dist/discovery/extractors/nestjs-extractor.d.ts +4 -0
- package/dist/discovery/extractors/nestjs-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/nestjs-extractor.js +156 -0
- package/dist/discovery/extractors/nestjs-extractor.js.map +1 -0
- package/dist/discovery/extractors/remix-extractor.d.ts +5 -0
- package/dist/discovery/extractors/remix-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/remix-extractor.js +118 -0
- package/dist/discovery/extractors/remix-extractor.js.map +1 -0
- package/dist/discovery/extractors/route-extractor.d.ts +4 -0
- package/dist/discovery/extractors/route-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/route-extractor.js +108 -0
- package/dist/discovery/extractors/route-extractor.js.map +1 -0
- package/dist/discovery/extractors/server-action-extractor.d.ts +4 -0
- package/dist/discovery/extractors/server-action-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/server-action-extractor.js +129 -0
- package/dist/discovery/extractors/server-action-extractor.js.map +1 -0
- package/dist/discovery/extractors/service-detector.d.ts +3 -0
- package/dist/discovery/extractors/service-detector.d.ts.map +1 -0
- package/dist/discovery/extractors/service-detector.js +114 -0
- package/dist/discovery/extractors/service-detector.js.map +1 -0
- package/dist/discovery/extractors/sveltekit-extractor.d.ts +5 -0
- package/dist/discovery/extractors/sveltekit-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/sveltekit-extractor.js +129 -0
- package/dist/discovery/extractors/sveltekit-extractor.js.map +1 -0
- package/dist/discovery/extractors/trpc-extractor.d.ts +4 -0
- package/dist/discovery/extractors/trpc-extractor.d.ts.map +1 -0
- package/dist/discovery/extractors/trpc-extractor.js +191 -0
- package/dist/discovery/extractors/trpc-extractor.js.map +1 -0
- package/dist/discovery/framework-detector.d.ts +9 -0
- package/dist/discovery/framework-detector.d.ts.map +1 -0
- package/dist/discovery/framework-detector.js +68 -0
- package/dist/discovery/framework-detector.js.map +1 -0
- package/dist/discovery/init.d.ts +4 -0
- package/dist/discovery/init.d.ts.map +1 -0
- package/dist/discovery/init.js +102 -0
- package/dist/discovery/init.js.map +1 -0
- package/dist/discovery/llm-analyzer.d.ts +32 -0
- package/dist/discovery/llm-analyzer.d.ts.map +1 -0
- package/dist/discovery/llm-analyzer.js +162 -0
- package/dist/discovery/llm-analyzer.js.map +1 -0
- package/dist/discovery/orchestrator.d.ts +4 -0
- package/dist/discovery/orchestrator.d.ts.map +1 -0
- package/dist/discovery/orchestrator.js +47 -0
- package/dist/discovery/orchestrator.js.map +1 -0
- package/dist/discovery/scanners/express-scanner.d.ts +3 -0
- package/dist/discovery/scanners/express-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/express-scanner.js +10 -0
- package/dist/discovery/scanners/express-scanner.js.map +1 -0
- package/dist/discovery/scanners/fastify-scanner.d.ts +3 -0
- package/dist/discovery/scanners/fastify-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/fastify-scanner.js +10 -0
- package/dist/discovery/scanners/fastify-scanner.js.map +1 -0
- package/dist/discovery/scanners/hono-scanner.d.ts +3 -0
- package/dist/discovery/scanners/hono-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/hono-scanner.js +10 -0
- package/dist/discovery/scanners/hono-scanner.js.map +1 -0
- package/dist/discovery/scanners/nestjs-scanner.d.ts +3 -0
- package/dist/discovery/scanners/nestjs-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/nestjs-scanner.js +10 -0
- package/dist/discovery/scanners/nestjs-scanner.js.map +1 -0
- package/dist/discovery/scanners/nextjs-scanner.d.ts +3 -0
- package/dist/discovery/scanners/nextjs-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/nextjs-scanner.js +15 -0
- package/dist/discovery/scanners/nextjs-scanner.js.map +1 -0
- package/dist/discovery/scanners/registry.d.ts +3 -0
- package/dist/discovery/scanners/registry.d.ts.map +1 -0
- package/dist/discovery/scanners/registry.js +22 -0
- package/dist/discovery/scanners/registry.js.map +1 -0
- package/dist/discovery/scanners/remix-scanner.d.ts +3 -0
- package/dist/discovery/scanners/remix-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/remix-scanner.js +13 -0
- package/dist/discovery/scanners/remix-scanner.js.map +1 -0
- package/dist/discovery/scanners/sveltekit-scanner.d.ts +3 -0
- package/dist/discovery/scanners/sveltekit-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/sveltekit-scanner.js +10 -0
- package/dist/discovery/scanners/sveltekit-scanner.js.map +1 -0
- package/dist/discovery/scanners/trpc-scanner.d.ts +3 -0
- package/dist/discovery/scanners/trpc-scanner.d.ts.map +1 -0
- package/dist/discovery/scanners/trpc-scanner.js +21 -0
- package/dist/discovery/scanners/trpc-scanner.js.map +1 -0
- package/dist/discovery/scanners/types.d.ts +18 -0
- package/dist/discovery/scanners/types.d.ts.map +1 -0
- package/dist/discovery/scanners/types.js +2 -0
- package/dist/discovery/scanners/types.js.map +1 -0
- package/dist/discovery/types.d.ts +60 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +2 -0
- package/dist/discovery/types.js.map +1 -0
- package/dist/eval/diagnoser.d.ts +4 -0
- package/dist/eval/diagnoser.d.ts.map +1 -0
- package/dist/eval/diagnoser.js +97 -0
- package/dist/eval/diagnoser.js.map +1 -0
- package/dist/eval/judge.d.ts +8 -0
- package/dist/eval/judge.d.ts.map +1 -0
- package/dist/eval/judge.js +71 -0
- package/dist/eval/judge.js.map +1 -0
- package/dist/eval/loop-guard.d.ts +12 -0
- package/dist/eval/loop-guard.d.ts.map +1 -0
- package/dist/eval/loop-guard.js +45 -0
- package/dist/eval/loop-guard.js.map +1 -0
- package/dist/eval/refiner.d.ts +5 -0
- package/dist/eval/refiner.d.ts.map +1 -0
- package/dist/eval/refiner.js +149 -0
- package/dist/eval/refiner.js.map +1 -0
- package/dist/eval/runner.d.ts +27 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +198 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/scenario-generator.d.ts +5 -0
- package/dist/eval/scenario-generator.d.ts.map +1 -0
- package/dist/eval/scenario-generator.js +185 -0
- package/dist/eval/scenario-generator.js.map +1 -0
- package/dist/eval/scorer.d.ts +9 -0
- package/dist/eval/scorer.d.ts.map +1 -0
- package/dist/eval/scorer.js +189 -0
- package/dist/eval/scorer.js.map +1 -0
- package/dist/eval/types.d.ts +135 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +37 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/generator/agent-generator.d.ts +3 -0
- package/dist/generator/agent-generator.d.ts.map +1 -0
- package/dist/generator/agent-generator.js +29 -0
- package/dist/generator/agent-generator.js.map +1 -0
- package/dist/generator/generate.d.ts +5 -0
- package/dist/generator/generate.d.ts.map +1 -0
- package/dist/generator/generate.js +119 -0
- package/dist/generator/generate.js.map +1 -0
- package/dist/generator/handler-generator.d.ts +3 -0
- package/dist/generator/handler-generator.d.ts.map +1 -0
- package/dist/generator/handler-generator.js +66 -0
- package/dist/generator/handler-generator.js.map +1 -0
- package/dist/generator/index-generator.d.ts +3 -0
- package/dist/generator/index-generator.d.ts.map +1 -0
- package/dist/generator/index-generator.js +28 -0
- package/dist/generator/index-generator.js.map +1 -0
- package/dist/generator/merge-logic.d.ts +15 -0
- package/dist/generator/merge-logic.d.ts.map +1 -0
- package/dist/generator/merge-logic.js +52 -0
- package/dist/generator/merge-logic.js.map +1 -0
- package/dist/generator/router-generator.d.ts +3 -0
- package/dist/generator/router-generator.d.ts.map +1 -0
- package/dist/generator/router-generator.js +55 -0
- package/dist/generator/router-generator.js.map +1 -0
- package/dist/generator/schema-generator.d.ts +3 -0
- package/dist/generator/schema-generator.d.ts.map +1 -0
- package/dist/generator/schema-generator.js +58 -0
- package/dist/generator/schema-generator.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -0
- package/dist/runtime/agent-orchestrator.d.ts +19 -0
- package/dist/runtime/agent-orchestrator.d.ts.map +1 -0
- package/dist/runtime/agent-orchestrator.js +96 -0
- package/dist/runtime/agent-orchestrator.js.map +1 -0
- package/dist/runtime/agent-runner.d.ts +22 -0
- package/dist/runtime/agent-runner.d.ts.map +1 -0
- package/dist/runtime/agent-runner.js +59 -0
- package/dist/runtime/agent-runner.js.map +1 -0
- package/dist/runtime/config-loader.d.ts +12 -0
- package/dist/runtime/config-loader.d.ts.map +1 -0
- package/dist/runtime/config-loader.js +42 -0
- package/dist/runtime/config-loader.js.map +1 -0
- package/dist/runtime/conversation-manager.d.ts +16 -0
- package/dist/runtime/conversation-manager.d.ts.map +1 -0
- package/dist/runtime/conversation-manager.js +33 -0
- package/dist/runtime/conversation-manager.js.map +1 -0
- package/dist/runtime/sammy.d.ts +17 -0
- package/dist/runtime/sammy.d.ts.map +1 -0
- package/dist/runtime/sammy.js +97 -0
- package/dist/runtime/sammy.js.map +1 -0
- package/dist/runtime/tool-executor.d.ts +14 -0
- package/dist/runtime/tool-executor.d.ts.map +1 -0
- package/dist/runtime/tool-executor.js +58 -0
- package/dist/runtime/tool-executor.js.map +1 -0
- package/dist/runtime/tool-types.d.ts +26 -0
- package/dist/runtime/tool-types.d.ts.map +1 -0
- package/dist/runtime/tool-types.js +2 -0
- package/dist/runtime/tool-types.js.map +1 -0
- package/dist/runtime/types.d.ts +100 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/runtime/zod-to-json-schema.d.ts +3 -0
- package/dist/runtime/zod-to-json-schema.d.ts.map +1 -0
- package/dist/runtime/zod-to-json-schema.js +82 -0
- package/dist/runtime/zod-to-json-schema.js.map +1 -0
- package/package.json +82 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,MAAM,aAAa,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7B,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;IACpC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;IACnC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;IACvC,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;IACtC,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;IAC/B,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;IAClC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;IACrB,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;CACjC,CAAC,CAAC;AAEH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAsB,EACtB,aAAqB,EACrB,SAAmE,EACnE,UAAsB,EACtB,KAAuB;IAEvB,MAAM,MAAM,GAAG;;;QAGT,QAAQ,CAAC,IAAI;gBACL,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;qBAC/D,QAAQ,CAAC,gBAAgB,IAAI,mBAAmB;;;EAGnE,aAAa;;;EAGb,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;;;;;;;;;;;;;;;;;;;EAmBzG,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,UAAU,CAAC;QACtC,IAAI,EAAE,UAAU,CAAC,KAAK,CAAC,KAAK;QAC5B,OAAO,EAAE,OAAO;QAChB,QAAQ,EAAE;YACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,oGAAoG,EAAE;YACjI,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;SAClC;QACD,WAAW,EAAE,UAAU,CAAC,KAAK,CAAC,WAAW;QACzC,cAAc,EAAE,MAAM;KACvB,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,OAAO,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,SAAS,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;YACnE,OAAO,EAAE,CAAC,EAAE,SAAS,EAAE,gCAAgC,EAAE,WAAW,EAAE,EAAE;SACzE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,UAAU,WAAW,CACzB,OAAqB,EACrB,QAAsB,EACtB,SAAiB,EACjB,WAAmB;IAEnB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,MAAM,EAAE,gBAAgB,EAAE,aAAa,SAAS,EAAE,CAAC,CAAC;IACjG,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACvC,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AAC5F,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { LoopState, LoopGuardConfig, EvaluationScorecard, TerminationReason, DimensionName } from "./types.js";
|
|
2
|
+
export interface GuardResult {
|
|
3
|
+
shouldContinue: boolean;
|
|
4
|
+
reason?: TerminationReason;
|
|
5
|
+
}
|
|
6
|
+
export declare function checkGuards(state: LoopState, config: LoopGuardConfig): GuardResult;
|
|
7
|
+
export declare function updateLockedDimensions(state: LoopState, scorecard: EvaluationScorecard): void;
|
|
8
|
+
export declare function checkRegression(state: LoopState, scorecard: EvaluationScorecard, config: LoopGuardConfig): {
|
|
9
|
+
regressed: boolean;
|
|
10
|
+
dimension?: DimensionName;
|
|
11
|
+
};
|
|
12
|
+
//# sourceMappingURL=loop-guard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loop-guard.d.ts","sourceRoot":"","sources":["../../src/eval/loop-guard.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEpH,MAAM,WAAW,WAAW;IAC1B,cAAc,EAAE,OAAO,CAAC;IACxB,MAAM,CAAC,EAAE,iBAAiB,CAAC;CAC5B;AAED,wBAAgB,WAAW,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,eAAe,GAAG,WAAW,CA4BlF;AAGD,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,mBAAmB,GAC7B,IAAI,CAMN;AAGD,wBAAgB,eAAe,CAC7B,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,mBAAmB,EAC9B,MAAM,EAAE,eAAe,GACtB;IAAE,SAAS,EAAE,OAAO,CAAC;IAAC,SAAS,CAAC,EAAE,aAAa,CAAA;CAAE,CAWnD"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
export function checkGuards(state, config) {
|
|
2
|
+
// Guard 1: Max iterations
|
|
3
|
+
if (state.currentIteration >= config.maxIterations) {
|
|
4
|
+
return { shouldContinue: false, reason: "max_iterations" };
|
|
5
|
+
}
|
|
6
|
+
// Guard 2: Diminishing returns
|
|
7
|
+
if (state.scoreHistory.length >= 2) {
|
|
8
|
+
const current = state.scoreHistory[state.scoreHistory.length - 1];
|
|
9
|
+
const previous = state.scoreHistory[state.scoreHistory.length - 2];
|
|
10
|
+
const improvement = (current.overallScore - previous.overallScore) * 100;
|
|
11
|
+
if (improvement < config.minImprovementPercent && improvement >= 0) {
|
|
12
|
+
return { shouldContinue: false, reason: "diminishing_returns" };
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
// Guard 5: Token budget
|
|
16
|
+
if (state.tokensUsed >= config.maxTokenBudget) {
|
|
17
|
+
return { shouldContinue: false, reason: "budget_exhausted" };
|
|
18
|
+
}
|
|
19
|
+
// Guard 6: All remaining issues need human intervention
|
|
20
|
+
if (state.escalations.length > 0 && state.appliedFixes.length === 0) {
|
|
21
|
+
return { shouldContinue: false, reason: "all_remaining_need_human" };
|
|
22
|
+
}
|
|
23
|
+
return { shouldContinue: true };
|
|
24
|
+
}
|
|
25
|
+
// Guard 3: Lock passing dimensions
|
|
26
|
+
export function updateLockedDimensions(state, scorecard) {
|
|
27
|
+
for (const [dim, score] of Object.entries(scorecard.dimensions)) {
|
|
28
|
+
if (score.passed) {
|
|
29
|
+
state.lockedDimensions.add(dim);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// Guard 4: Regression detection
|
|
34
|
+
export function checkRegression(state, scorecard, config) {
|
|
35
|
+
if (!config.enableRegressionRollback)
|
|
36
|
+
return { regressed: false };
|
|
37
|
+
for (const dim of state.lockedDimensions) {
|
|
38
|
+
const current = scorecard.dimensions[dim];
|
|
39
|
+
if (current && !current.passed) {
|
|
40
|
+
return { regressed: true, dimension: dim };
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return { regressed: false };
|
|
44
|
+
}
|
|
45
|
+
//# sourceMappingURL=loop-guard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loop-guard.js","sourceRoot":"","sources":["../../src/eval/loop-guard.ts"],"names":[],"mappings":"AAOA,MAAM,UAAU,WAAW,CAAC,KAAgB,EAAE,MAAuB;IACnE,0BAA0B;IAC1B,IAAI,KAAK,CAAC,gBAAgB,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QACnD,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;IAC7D,CAAC;IAED,+BAA+B;IAC/B,IAAI,KAAK,CAAC,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnE,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,YAAY,GAAG,QAAQ,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC;QAEzE,IAAI,WAAW,GAAG,MAAM,CAAC,qBAAqB,IAAI,WAAW,IAAI,CAAC,EAAE,CAAC;YACnE,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,qBAAqB,EAAE,CAAC;QAClE,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,IAAI,KAAK,CAAC,UAAU,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;QAC9C,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC;IAC/D,CAAC;IAED,wDAAwD;IACxD,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpE,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,0BAA0B,EAAE,CAAC;IACvE,CAAC;IAED,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,CAAC;AAClC,CAAC;AAED,mCAAmC;AACnC,MAAM,UAAU,sBAAsB,CACpC,KAAgB,EAChB,SAA8B;IAE9B,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,CAAC;QAChE,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;YACjB,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAoB,CAAC,CAAC;QACnD,CAAC;IACH,CAAC;AACH,CAAC;AAED,gCAAgC;AAChC,MAAM,UAAU,eAAe,CAC7B,KAAgB,EAChB,SAA8B,EAC9B,MAAuB;IAEvB,IAAI,CAAC,MAAM,CAAC,wBAAwB;QAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IAElE,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC/B,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { SammyCloudClient } from "../cloud/sammy-cloud.js";
|
|
2
|
+
import type { SammyConfig } from "../runtime/types.js";
|
|
3
|
+
import type { DiagnosisReport, RefinementAction, AutoRefineConfig, LoopState } from "./types.js";
|
|
4
|
+
export declare function applyRefinements(diagnosis: DiagnosisReport, config: SammyConfig, autoRefine: AutoRefineConfig, cloud: SammyCloudClient, state: LoopState, projectRoot: string): Promise<RefinementAction[]>;
|
|
5
|
+
//# sourceMappingURL=refiner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"refiner.d.ts","sourceRoot":"","sources":["../../src/eval/refiner.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EACV,eAAe,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,SAAS,EAC/D,MAAM,YAAY,CAAC;AAWpB,wBAAsB,gBAAgB,CACpC,SAAS,EAAE,eAAe,EAC1B,MAAM,EAAE,WAAW,EACnB,UAAU,EAAE,gBAAgB,EAC5B,KAAK,EAAE,gBAAgB,EACvB,KAAK,EAAE,SAAS,EAChB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA+C7B"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import crypto from "node:crypto";
|
|
4
|
+
const PRIORITY_ORDER = [
|
|
5
|
+
"SCHEMA_MISMATCH",
|
|
6
|
+
"TOOL_DESCRIPTION",
|
|
7
|
+
"PROMPT_CLARITY",
|
|
8
|
+
"ROUTING_AMBIGUITY",
|
|
9
|
+
"ARCHITECTURE",
|
|
10
|
+
"MODEL_CAPABILITY",
|
|
11
|
+
];
|
|
12
|
+
export async function applyRefinements(diagnosis, config, autoRefine, cloud, state, projectRoot) {
|
|
13
|
+
const actions = [];
|
|
14
|
+
// Sort root causes by priority
|
|
15
|
+
const sorted = [...diagnosis.rootCauses].sort((a, b) => {
|
|
16
|
+
const aIdx = PRIORITY_ORDER.indexOf(a.type);
|
|
17
|
+
const bIdx = PRIORITY_ORDER.indexOf(b.type);
|
|
18
|
+
return (aIdx === -1 ? 99 : aIdx) - (bIdx === -1 ? 99 : bIdx);
|
|
19
|
+
});
|
|
20
|
+
for (const cause of sorted) {
|
|
21
|
+
// Check if this fix type is allowed
|
|
22
|
+
if (!isAllowed(cause.type, autoRefine))
|
|
23
|
+
continue;
|
|
24
|
+
// Check for same-fix detection
|
|
25
|
+
const fixHash = crypto.createHash("sha256")
|
|
26
|
+
.update(`${cause.type}:${cause.tool || ""}:${cause.dimension}:${cause.suggestedFix}`)
|
|
27
|
+
.digest("hex").slice(0, 16);
|
|
28
|
+
if (state.skippedFixes.includes(fixHash))
|
|
29
|
+
continue;
|
|
30
|
+
const existingSameFixCount = state.appliedFixes.filter(f => crypto.createHash("sha256")
|
|
31
|
+
.update(`${f.rootCause}:${f.target.tool || ""}:${f.target.type}:${f.reason}`)
|
|
32
|
+
.digest("hex").slice(0, 16) === fixHash).length;
|
|
33
|
+
if (existingSameFixCount >= 2) {
|
|
34
|
+
state.skippedFixes.push(fixHash);
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
const action = await applyFix(cause, config, cloud, state.currentIteration, projectRoot);
|
|
38
|
+
if (action) {
|
|
39
|
+
actions.push(action);
|
|
40
|
+
state.appliedFixes.push(action);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
// Save refinement history
|
|
44
|
+
const historyPath = path.join(projectRoot, ".sammy", "eval", "refinement-history.json");
|
|
45
|
+
const existing = fs.existsSync(historyPath) ? JSON.parse(fs.readFileSync(historyPath, "utf-8")) : [];
|
|
46
|
+
existing.push(...actions);
|
|
47
|
+
fs.mkdirSync(path.dirname(historyPath), { recursive: true });
|
|
48
|
+
fs.writeFileSync(historyPath, JSON.stringify(existing, null, 2));
|
|
49
|
+
return actions;
|
|
50
|
+
}
|
|
51
|
+
async function applyFix(cause, config, cloud, iteration, projectRoot) {
|
|
52
|
+
const timestamp = new Date().toISOString();
|
|
53
|
+
switch (cause.type) {
|
|
54
|
+
case "SCHEMA_MISMATCH": {
|
|
55
|
+
const domain = config.domains.find(d => d.tools.some(t => t.name === cause.tool));
|
|
56
|
+
const tool = domain?.tools.find(t => t.name === cause.tool);
|
|
57
|
+
if (!tool)
|
|
58
|
+
return null;
|
|
59
|
+
const before = JSON.stringify(tool.parameters);
|
|
60
|
+
// Use LLM to suggest tighter constraints
|
|
61
|
+
const response = await cloud.completion({
|
|
62
|
+
tier: "balanced",
|
|
63
|
+
purpose: "eval",
|
|
64
|
+
messages: [
|
|
65
|
+
{ role: "system", content: "You refine Zod parameter schemas. Return ONLY a JSON object with the improved parameters." },
|
|
66
|
+
{ role: "user", content: `Current parameters: ${before}\nIssue: ${cause.detail}\nSuggestion: ${cause.suggestedFix}\n\nReturn the improved parameters JSON.` },
|
|
67
|
+
],
|
|
68
|
+
temperature: 0,
|
|
69
|
+
responseFormat: "json",
|
|
70
|
+
});
|
|
71
|
+
try {
|
|
72
|
+
const improved = JSON.parse(response.content);
|
|
73
|
+
tool.parameters = improved;
|
|
74
|
+
saveConfig(config, projectRoot);
|
|
75
|
+
return { iteration, timestamp, rootCause: cause.type, target: { type: "schema", tool: cause.tool }, before, after: JSON.stringify(improved), reason: cause.suggestedFix, triggeredBy: cause.scenario };
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
case "TOOL_DESCRIPTION": {
|
|
82
|
+
const domain = config.domains.find(d => d.tools.some(t => t.name === cause.tool));
|
|
83
|
+
const tool = domain?.tools.find(t => t.name === cause.tool);
|
|
84
|
+
if (!tool)
|
|
85
|
+
return null;
|
|
86
|
+
const before = tool.description;
|
|
87
|
+
const response = await cloud.completion({
|
|
88
|
+
tier: "balanced",
|
|
89
|
+
purpose: "eval",
|
|
90
|
+
messages: [
|
|
91
|
+
{ role: "system", content: "You improve tool descriptions for AI agents. Return ONLY the improved description text, nothing else." },
|
|
92
|
+
{ role: "user", content: `Current: "${before}"\nIssue: ${cause.detail}\nImprove it to be clearer about when to use this tool and expected inputs.` },
|
|
93
|
+
],
|
|
94
|
+
temperature: 0,
|
|
95
|
+
});
|
|
96
|
+
tool.description = response.content.trim().replace(/^["']|["']$/g, "");
|
|
97
|
+
saveConfig(config, projectRoot);
|
|
98
|
+
return { iteration, timestamp, rootCause: cause.type, target: { type: "tool_description", tool: cause.tool }, before, after: tool.description, reason: cause.suggestedFix, triggeredBy: cause.scenario };
|
|
99
|
+
}
|
|
100
|
+
case "PROMPT_CLARITY": {
|
|
101
|
+
const agent = config.architecture.agents.find(a => a.name === cause.tool || a.domains.includes(cause.dimension));
|
|
102
|
+
if (!agent)
|
|
103
|
+
return null;
|
|
104
|
+
const before = agent.systemPrompt || "";
|
|
105
|
+
const response = await cloud.completion({
|
|
106
|
+
tier: "balanced",
|
|
107
|
+
purpose: "eval",
|
|
108
|
+
messages: [
|
|
109
|
+
{ role: "system", content: "You improve AI agent system prompts. Return ONLY the improved prompt text." },
|
|
110
|
+
{ role: "user", content: `Current prompt: "${before}"\nIssue: ${cause.detail}\nAdd specific instructions or few-shot examples to address this.` },
|
|
111
|
+
],
|
|
112
|
+
temperature: 0,
|
|
113
|
+
});
|
|
114
|
+
agent.systemPrompt = response.content.trim();
|
|
115
|
+
saveConfig(config, projectRoot);
|
|
116
|
+
return { iteration, timestamp, rootCause: cause.type, target: { type: "system_prompt", agent: agent.name }, before, after: agent.systemPrompt, reason: cause.suggestedFix, triggeredBy: cause.scenario };
|
|
117
|
+
}
|
|
118
|
+
case "MODEL_CAPABILITY": {
|
|
119
|
+
const agent = config.architecture.agents.find(a => a.name === cause.tool || a.domains.some(d => cause.dimension.includes(d)));
|
|
120
|
+
if (!agent)
|
|
121
|
+
return null;
|
|
122
|
+
const before = agent.model;
|
|
123
|
+
const upgrades = { fast: "balanced", balanced: "powerful" };
|
|
124
|
+
const newModel = upgrades[agent.model];
|
|
125
|
+
if (!newModel)
|
|
126
|
+
return null;
|
|
127
|
+
agent.model = newModel;
|
|
128
|
+
saveConfig(config, projectRoot);
|
|
129
|
+
return { iteration, timestamp, rootCause: cause.type, target: { type: "model", agent: agent.name }, before, after: newModel, reason: cause.suggestedFix, triggeredBy: cause.scenario };
|
|
130
|
+
}
|
|
131
|
+
default:
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function isAllowed(type, config) {
|
|
136
|
+
switch (type) {
|
|
137
|
+
case "SCHEMA_MISMATCH": return config.schemas;
|
|
138
|
+
case "TOOL_DESCRIPTION": return config.toolDescriptions;
|
|
139
|
+
case "PROMPT_CLARITY": return config.systemPrompts;
|
|
140
|
+
case "ROUTING_AMBIGUITY": return config.routerPrompt;
|
|
141
|
+
case "ARCHITECTURE": return config.architecture;
|
|
142
|
+
case "MODEL_CAPABILITY": return config.modelUpgrades;
|
|
143
|
+
default: return false;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
function saveConfig(config, projectRoot) {
|
|
147
|
+
fs.writeFileSync(path.join(projectRoot, "sammy.config.json"), JSON.stringify(config, null, 2));
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=refiner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"refiner.js","sourceRoot":"","sources":["../../src/eval/refiner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,MAAM,MAAM,aAAa,CAAC;AAOjC,MAAM,cAAc,GAAG;IACrB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,mBAAmB;IACnB,cAAc;IACd,kBAAkB;CACV,CAAC;AAEX,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,SAA0B,EAC1B,MAAmB,EACnB,UAA4B,EAC5B,KAAuB,EACvB,KAAgB,EAChB,WAAmB;IAEnB,MAAM,OAAO,GAAuB,EAAE,CAAC;IAEvC,+BAA+B;IAC/B,MAAM,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACrD,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,IAAW,CAAC,CAAC;QACnD,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,IAAW,CAAC,CAAC;QACnD,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC/D,CAAC,CAAC,CAAC;IAEH,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,oCAAoC;QACpC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,CAAC;YAAE,SAAS;QAEjD,+BAA+B;QAC/B,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC;aACxC,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,IAAI,EAAE,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;aACpF,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAE9B,IAAI,KAAK,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC;YAAE,SAAS;QAEnD,MAAM,oBAAoB,GAAG,KAAK,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACzD,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC;aACxB,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;aAC5E,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,OAAO,CAC1C,CAAC,MAAM,CAAC;QAET,IAAI,oBAAoB,IAAI,CAAC,EAAE,CAAC;YAC9B,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACjC,SAAS;QACX,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,gBAAgB,EAAE,WAAW,CAAC,CAAC;QACzF,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,0BAA0B;IAC1B,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,MAAM,EAAE,yBAAyB,CAAC,CAAC;IACxF,MAAM,QAAQ,GAAG,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACrG,QAAQ,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC;IAC1B,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7D,EAAE,CAAC,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAEjE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,KAAuC,EACvC,MAAmB,EACnB,KAAuB,EACvB,SAAiB,EACjB,WAAmB;IAEnB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE3C,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,iBAAiB,CAAC,CAAC,CAAC;YACvB,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;YAClF,MAAM,IAAI,GAAG,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5D,IAAI,CAAC,IAAI;gBAAE,OAAO,IAAI,CAAC;YAEvB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC/C,yCAAyC;YACzC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,UAAU,CAAC;gBACtC,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,MAAM;gBACf,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,2FAA2F,EAAE;oBACxH,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,uBAAuB,MAAM,YAAY,KAAK,CAAC,MAAM,iBAAiB,KAAK,CAAC,YAAY,0CAA0C,EAAE;iBAC9J;gBACD,WAAW,EAAE,CAAC;gBACd,cAAc,EAAE,MAAM;aACvB,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBAC9C,IAAI,CAAC,UAAU,GAAG,QAAQ,CAAC;gBAC3B,UAAU,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;gBAChC,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,WAAW,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC;YACzM,CAAC;YAAC,MAAM,CAAC;gBAAC,OAAO,IAAI,CAAC;YAAC,CAAC;QAC1B,CAAC;QAED,KAAK,kBAAkB,CAAC,CAAC,CAAC;YACxB,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;YAClF,MAAM,IAAI,GAAG,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5D,IAAI,CAAC,IAAI;gBAAE,OAAO,IAAI,CAAC;YAEvB,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC;YAChC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,UAAU,CAAC;gBACtC,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,MAAM;gBACf,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,uGAAuG,EAAE;oBACpI,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,MAAM,aAAa,KAAK,CAAC,MAAM,6EAA6E,EAAE;iBACrJ;gBACD,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;YAEH,IAAI,CAAC,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;YACvE,UAAU,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;YAChC,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,WAAW,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC3M,CAAC;QAED,KAAK,gBAAgB,CAAC,CAAC,CAAC;YACtB,MAAM,KAAK,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;YACjH,IAAI,CAAC,KAAK;gBAAE,OAAO,IAAI,CAAC;YAExB,MAAM,MAAM,GAAG,KAAK,CAAC,YAAY,IAAI,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,UAAU,CAAC;gBACtC,IAAI,EAAE,UAAU;gBAChB,OAAO,EAAE,MAAM;gBACf,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,4EAA4E,EAAE;oBACzG,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,oBAAoB,MAAM,aAAa,KAAK,CAAC,MAAM,mEAAmE,EAAE;iBAClJ;gBACD,WAAW,EAAE,CAAC;aACf,CAAC,CAAC;YAEH,KAAK,CAAC,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YAC7C,UAAU,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;YAChC,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,eAAe,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,YAAY,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,WAAW,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC3M,CAAC;QAED,KAAK,kBAAkB,CAAC,CAAC,CAAC;YACxB,MAAM,KAAK,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9H,IAAI,CAAC,KAAK;gBAAE,OAAO,IAAI,CAAC;YAExB,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;YAC3B,MAAM,QAAQ,GAA2B,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;YACpF,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACvC,IAAI,CAAC,QAAQ;gBAAE,OAAO,IAAI,CAAC;YAE3B,KAAK,CAAC,KAAK,GAAG,QAAe,CAAC;YAC9B,UAAU,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;YAChC,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,WAAW,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC;QACzL,CAAC;QAED;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,MAAwB;IACvD,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,iBAAiB,CAAC,CAAC,OAAO,MAAM,CAAC,OAAO,CAAC;QAC9C,KAAK,kBAAkB,CAAC,CAAC,OAAO,MAAM,CAAC,gBAAgB,CAAC;QACxD,KAAK,gBAAgB,CAAC,CAAC,OAAO,MAAM,CAAC,aAAa,CAAC;QACnD,KAAK,mBAAmB,CAAC,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC;QACrD,KAAK,cAAc,CAAC,CAAC,OAAO,MAAM,CAAC,YAAY,CAAC;QAChD,KAAK,kBAAkB,CAAC,CAAC,OAAO,MAAM,CAAC,aAAa,CAAC;QACrD,OAAO,CAAC,CAAC,OAAO,KAAK,CAAC;IACxB,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,MAAmB,EAAE,WAAmB;IAC1D,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,mBAAmB,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACjG,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export interface EvalOptions {
|
|
2
|
+
maxIterations?: number;
|
|
3
|
+
dimensions?: string[];
|
|
4
|
+
agent?: string;
|
|
5
|
+
noRefine?: boolean;
|
|
6
|
+
ci?: boolean;
|
|
7
|
+
budget?: number;
|
|
8
|
+
generateOnly?: boolean;
|
|
9
|
+
fromIteration?: number;
|
|
10
|
+
dryRun?: boolean;
|
|
11
|
+
}
|
|
12
|
+
export interface EvalReport {
|
|
13
|
+
iterations: number;
|
|
14
|
+
passed: boolean;
|
|
15
|
+
terminationReason: string;
|
|
16
|
+
tokensUsed: number;
|
|
17
|
+
refinementsApplied: number;
|
|
18
|
+
escalations: number;
|
|
19
|
+
finalScores: Record<string, {
|
|
20
|
+
score: number;
|
|
21
|
+
passed: boolean;
|
|
22
|
+
}>;
|
|
23
|
+
}
|
|
24
|
+
export declare class EvalRunner {
|
|
25
|
+
run(options: EvalOptions): Promise<EvalReport>;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAiBA,MAAM,WAAW,WAAW;IAC1B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,EAAE,CAAC,EAAE,OAAO,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,CAAC;IAChB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;CACjE;AAED,qBAAa,UAAU;IACf,GAAG,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC;CA4NrD"}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import ora from "ora";
|
|
5
|
+
import { loadRuntimeConfig } from "../runtime/config-loader.js";
|
|
6
|
+
import { ToolExecutor } from "../runtime/tool-executor.js";
|
|
7
|
+
import { generateScenarios } from "./scenario-generator.js";
|
|
8
|
+
import { scoreIteration } from "./scorer.js";
|
|
9
|
+
import { diagnoseFailures } from "./diagnoser.js";
|
|
10
|
+
import { applyRefinements } from "./refiner.js";
|
|
11
|
+
import { checkGuards, updateLockedDimensions, checkRegression } from "./loop-guard.js";
|
|
12
|
+
import { DEFAULT_EVAL_CONFIG, } from "./types.js";
|
|
13
|
+
export class EvalRunner {
|
|
14
|
+
async run(options) {
|
|
15
|
+
const projectRoot = process.cwd();
|
|
16
|
+
console.log(chalk.bold("\n Sammy Evaluation\n"));
|
|
17
|
+
// Load runtime config
|
|
18
|
+
const loadSpinner = ora("Loading configuration...").start();
|
|
19
|
+
let runtimeConfig;
|
|
20
|
+
try {
|
|
21
|
+
runtimeConfig = await loadRuntimeConfig({});
|
|
22
|
+
loadSpinner.succeed("Configuration loaded");
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
25
|
+
loadSpinner.fail(`Failed to load config: ${err.message}`);
|
|
26
|
+
process.exit(1);
|
|
27
|
+
}
|
|
28
|
+
const config = runtimeConfig.config;
|
|
29
|
+
const evalConfig = {
|
|
30
|
+
...DEFAULT_EVAL_CONFIG,
|
|
31
|
+
...config.evaluation,
|
|
32
|
+
};
|
|
33
|
+
if (options.maxIterations)
|
|
34
|
+
evalConfig.guards.maxIterations = options.maxIterations;
|
|
35
|
+
if (options.budget)
|
|
36
|
+
evalConfig.guards.maxTokenBudget = options.budget;
|
|
37
|
+
// Build agent configs
|
|
38
|
+
const toolExecutor = new ToolExecutor(runtimeConfig.tools, {}, config.settings?.toolTimeout || 30000);
|
|
39
|
+
const agentConfigs = new Map();
|
|
40
|
+
for (const agent of config.architecture.agents) {
|
|
41
|
+
const agentTools = runtimeConfig.tools.filter(t => agent.domains.some(d => t.domain === d));
|
|
42
|
+
const domainDescs = config.domains
|
|
43
|
+
.filter(d => agent.domains.includes(d.name))
|
|
44
|
+
.map(d => `${d.name}: ${d.description}`)
|
|
45
|
+
.join("; ");
|
|
46
|
+
agentConfigs.set(agent.name, {
|
|
47
|
+
name: agent.name,
|
|
48
|
+
systemPrompt: agent.systemPrompt || `You specialize in: ${domainDescs}. Use the available tools.`,
|
|
49
|
+
tools: agentTools,
|
|
50
|
+
model: agent.model,
|
|
51
|
+
maxToolCalls: agent.maxToolCalls,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
// Filter to specific agent if requested
|
|
55
|
+
if (options.agent) {
|
|
56
|
+
const kept = agentConfigs.get(options.agent);
|
|
57
|
+
agentConfigs.clear();
|
|
58
|
+
if (kept)
|
|
59
|
+
agentConfigs.set(options.agent, kept);
|
|
60
|
+
}
|
|
61
|
+
// Generate scenarios
|
|
62
|
+
const scenarioSpinner = ora("Generating test scenarios...").start();
|
|
63
|
+
const scenarios = await generateScenarios(config, evalConfig, runtimeConfig.cloudClient, projectRoot);
|
|
64
|
+
scenarioSpinner.succeed(`Generated ${scenarios.length} test scenarios`);
|
|
65
|
+
if (options.generateOnly) {
|
|
66
|
+
console.log(chalk.green(`\n ✓ Scenarios saved to .sammy/eval/scenarios/generated.json\n`));
|
|
67
|
+
return {
|
|
68
|
+
iterations: 0, passed: false, terminationReason: "generate_only",
|
|
69
|
+
tokensUsed: 0, refinementsApplied: 0, escalations: 0, finalScores: {},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
// Initialize loop state
|
|
73
|
+
const state = {
|
|
74
|
+
currentIteration: options.fromIteration || 1,
|
|
75
|
+
tokensUsed: 0,
|
|
76
|
+
scoreHistory: [],
|
|
77
|
+
lockedDimensions: new Set(),
|
|
78
|
+
appliedFixes: [],
|
|
79
|
+
skippedFixes: [],
|
|
80
|
+
escalations: [],
|
|
81
|
+
};
|
|
82
|
+
let lastScorecard = null;
|
|
83
|
+
// Main evaluation loop
|
|
84
|
+
while (true) {
|
|
85
|
+
console.log(chalk.bold(`\n ITERATION ${state.currentIteration} / ${evalConfig.guards.maxIterations}`));
|
|
86
|
+
console.log(" " + "─".repeat(50));
|
|
87
|
+
// Score
|
|
88
|
+
const evalSpinner = ora(`Running ${scenarios.length} scenarios...`).start();
|
|
89
|
+
const { scorecard, tokensUsed } = await scoreIteration(scenarios, agentConfigs, toolExecutor, runtimeConfig.cloudClient, evalConfig, state.currentIteration, projectRoot);
|
|
90
|
+
state.tokensUsed += tokensUsed;
|
|
91
|
+
state.scoreHistory.push(scorecard);
|
|
92
|
+
lastScorecard = scorecard;
|
|
93
|
+
evalSpinner.succeed("Scenarios scored");
|
|
94
|
+
// Print scores
|
|
95
|
+
for (const [dim, score] of Object.entries(scorecard.dimensions)) {
|
|
96
|
+
const icon = score.passed ? chalk.green("✓") : chalk.red("✗");
|
|
97
|
+
const label = dim.padEnd(25);
|
|
98
|
+
const pct = dim === "responseQuality"
|
|
99
|
+
? `${score.score.toFixed(1)} / 10`
|
|
100
|
+
: `${(score.score * 100).toFixed(1)}%`;
|
|
101
|
+
const extra = !score.passed ? chalk.dim(` [needs ≥${score.threshold}]`) : "";
|
|
102
|
+
console.log(` ${icon} ${label} ${pct}${extra}`);
|
|
103
|
+
}
|
|
104
|
+
// Check if all pass
|
|
105
|
+
if (scorecard.overallPassed) {
|
|
106
|
+
state.terminationReason = "all_passed";
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
// Check guards
|
|
110
|
+
const guard = checkGuards(state, evalConfig.guards);
|
|
111
|
+
if (!guard.shouldContinue) {
|
|
112
|
+
state.terminationReason = guard.reason;
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
// No-refine mode
|
|
116
|
+
if (options.noRefine) {
|
|
117
|
+
state.terminationReason = "no_refine";
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
// Diagnose
|
|
121
|
+
const diagSpinner = ora("Diagnosing failures...").start();
|
|
122
|
+
const diagnosis = await diagnoseFailures(scorecard, runtimeConfig.cloudClient, evalConfig, state.currentIteration, projectRoot);
|
|
123
|
+
diagSpinner.succeed(`Found ${diagnosis.rootCauses.length} root causes, ${diagnosis.escalations.length} escalations`);
|
|
124
|
+
state.escalations.push(...diagnosis.escalations);
|
|
125
|
+
// Print diagnosis summary
|
|
126
|
+
for (const cause of diagnosis.rootCauses.slice(0, 5)) {
|
|
127
|
+
console.log(chalk.dim(` ├── ${cause.type} — ${cause.detail.slice(0, 60)}`));
|
|
128
|
+
}
|
|
129
|
+
// Refine
|
|
130
|
+
if (!options.dryRun) {
|
|
131
|
+
const refineSpinner = ora("Applying refinements...").start();
|
|
132
|
+
const actions = await applyRefinements(diagnosis, config, evalConfig.autoRefine, runtimeConfig.cloudClient, state, projectRoot);
|
|
133
|
+
refineSpinner.succeed(`Applied ${actions.length} refinements`);
|
|
134
|
+
for (const action of actions) {
|
|
135
|
+
console.log(chalk.dim(` ├── [${action.target.type}] ${action.reason.slice(0, 50)}`));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Update locked dimensions
|
|
139
|
+
updateLockedDimensions(state, scorecard);
|
|
140
|
+
// Check regression
|
|
141
|
+
const regression = checkRegression(state, scorecard, evalConfig.guards);
|
|
142
|
+
if (regression.regressed) {
|
|
143
|
+
console.log(chalk.yellow(` ⚠ Regression detected in ${regression.dimension}`));
|
|
144
|
+
if (state.appliedFixes.length > 0) {
|
|
145
|
+
console.log(chalk.dim(" Rolling back last refinement..."));
|
|
146
|
+
state.appliedFixes.pop();
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
state.currentIteration++;
|
|
150
|
+
// Final guard check
|
|
151
|
+
const postGuard = checkGuards(state, evalConfig.guards);
|
|
152
|
+
if (!postGuard.shouldContinue) {
|
|
153
|
+
state.terminationReason = postGuard.reason;
|
|
154
|
+
break;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Final report
|
|
158
|
+
const passed = state.terminationReason === "all_passed";
|
|
159
|
+
const report = {
|
|
160
|
+
iterations: state.currentIteration,
|
|
161
|
+
passed,
|
|
162
|
+
terminationReason: state.terminationReason || "unknown",
|
|
163
|
+
tokensUsed: state.tokensUsed,
|
|
164
|
+
refinementsApplied: state.appliedFixes.length,
|
|
165
|
+
escalations: state.escalations.length,
|
|
166
|
+
finalScores: lastScorecard
|
|
167
|
+
? Object.fromEntries(Object.entries(lastScorecard.dimensions).map(([k, v]) => [k, { score: v.score, passed: v.passed }]))
|
|
168
|
+
: {},
|
|
169
|
+
};
|
|
170
|
+
// Save report
|
|
171
|
+
const reportPath = path.join(projectRoot, ".sammy", "eval", "report.json");
|
|
172
|
+
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
|
|
173
|
+
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
174
|
+
// Print summary
|
|
175
|
+
console.log(chalk.bold(`\n ${"═".repeat(50)}`));
|
|
176
|
+
console.log(chalk.bold(` RESULT: ${passed ? chalk.green("AGENTS READY ✓") : chalk.yellow("NEEDS ATTENTION ⚠")}`));
|
|
177
|
+
console.log();
|
|
178
|
+
console.log(chalk.dim(` Terminated: ${state.terminationReason} (${state.currentIteration} iterations)`));
|
|
179
|
+
console.log(chalk.dim(` Refinements: ${state.appliedFixes.length} applied`));
|
|
180
|
+
console.log(chalk.dim(` Tokens used: ${(state.tokensUsed / 1000).toFixed(0)}K`));
|
|
181
|
+
console.log(chalk.dim(` Escalations: ${state.escalations.length}`));
|
|
182
|
+
console.log(chalk.bold(` ${"═".repeat(50)}\n`));
|
|
183
|
+
if (state.escalations.length > 0) {
|
|
184
|
+
console.log(" Manual fixes needed:");
|
|
185
|
+
for (const esc of state.escalations) {
|
|
186
|
+
console.log(chalk.dim(` ├── ${esc.filePath}: ${esc.detail}`));
|
|
187
|
+
console.log(chalk.dim(` │ Suggestion: ${esc.suggestion}`));
|
|
188
|
+
}
|
|
189
|
+
console.log();
|
|
190
|
+
}
|
|
191
|
+
// CI mode: exit with appropriate code
|
|
192
|
+
if (options.ci) {
|
|
193
|
+
process.exit(passed ? 0 : 1);
|
|
194
|
+
}
|
|
195
|
+
return report;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,GAAG,MAAM,KAAK,CAAC;AACtB,OAAO,EAAE,iBAAiB,EAAE,MAAM,6BAA6B,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAE3D,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,sBAAsB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AACvF,OAAO,EACL,mBAAmB,GAEpB,MAAM,YAAY,CAAC;AAwBpB,MAAM,OAAO,UAAU;IACrB,KAAK,CAAC,GAAG,CAAC,OAAoB;QAC5B,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;QAElC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC,CAAC;QAElD,sBAAsB;QACtB,MAAM,WAAW,GAAG,GAAG,CAAC,0BAA0B,CAAC,CAAC,KAAK,EAAE,CAAC;QAC5D,IAAI,aAAa,CAAC;QAClB,IAAI,CAAC;YACH,aAAa,GAAG,MAAM,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAC5C,WAAW,CAAC,OAAO,CAAC,sBAAsB,CAAC,CAAC;QAC9C,CAAC;QAAC,OAAO,GAAQ,EAAE,CAAC;YAClB,WAAW,CAAC,IAAI,CAAC,0BAA0B,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC;YAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC;QACpC,MAAM,UAAU,GAAe;YAC7B,GAAG,mBAAmB;YACtB,GAAI,MAAc,CAAC,UAAU;SAC9B,CAAC;QAEF,IAAI,OAAO,CAAC,aAAa;YAAE,UAAU,CAAC,MAAM,CAAC,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC;QACnF,IAAI,OAAO,CAAC,MAAM;YAAE,UAAU,CAAC,MAAM,CAAC,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;QAEtE,sBAAsB;QACtB,MAAM,YAAY,GAAG,IAAI,YAAY,CAAC,aAAa,CAAC,KAAK,EAAE,EAAE,EAAE,MAAM,CAAC,QAAQ,EAAE,WAAW,IAAI,KAAK,CAAC,CAAC;QACtG,MAAM,YAAY,GAAG,IAAI,GAAG,EAA0B,CAAC;QAEvD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC;YAC/C,MAAM,UAAU,GAAG,aAAa,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAChD,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CACxC,CAAC;YACF,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO;iBAC/B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;iBAC3C,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;iBACvC,IAAI,CAAC,IAAI,CAAC,CAAC;YAEd,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE;gBAC3B,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,sBAAsB,WAAW,4BAA4B;gBACjG,KAAK,EAAE,UAAU;gBACjB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,YAAY,EAAE,KAAK,CAAC,YAAY;aACjC,CAAC,CAAC;QACL,CAAC;QAED,wCAAwC;QACxC,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,MAAM,IAAI,GAAG,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;YAC7C,YAAY,CAAC,KAAK,EAAE,CAAC;YACrB,IAAI,IAAI;gBAAE,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAClD,CAAC;QAED,qBAAqB;QACrB,MAAM,eAAe,GAAG,GAAG,CAAC,8BAA8B,CAAC,CAAC,KAAK,EAAE,CAAC;QACpE,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,UAAU,EAAE,aAAa,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;QACtG,eAAe,CAAC,OAAO,CAAC,aAAa,SAAS,CAAC,MAAM,iBAAiB,CAAC,CAAC;QAExE,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;YACzB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,iEAAiE,CAAC,CAAC,CAAC;YAC5F,OAAO;gBACL,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,iBAAiB,EAAE,eAAe;gBAChE,UAAU,EAAE,CAAC,EAAE,kBAAkB,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE;aACtE,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,GAAc;YACvB,gBAAgB,EAAE,OAAO,CAAC,aAAa,IAAI,CAAC;YAC5C,UAAU,EAAE,CAAC;YACb,YAAY,EAAE,EAAE;YAChB,gBAAgB,EAAE,IAAI,GAAG,EAAE;YAC3B,YAAY,EAAE,EAAE;YAChB,YAAY,EAAE,EAAE;YAChB,WAAW,EAAE,EAAE;SAChB,CAAC;QAEF,IAAI,aAAa,GAA+B,IAAI,CAAC;QAErD,uBAAuB;QACvB,OAAO,IAAI,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,iBAAiB,KAAK,CAAC,gBAAgB,MAAM,UAAU,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;YACxG,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;YAEnC,QAAQ;YACR,MAAM,WAAW,GAAG,GAAG,CAAC,WAAW,SAAS,CAAC,MAAM,eAAe,CAAC,CAAC,KAAK,EAAE,CAAC;YAC5E,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,MAAM,cAAc,CACpD,SAAS,EAAE,YAAY,EAAE,YAAY,EAAE,aAAa,CAAC,WAAW,EAChE,UAAU,EAAE,KAAK,CAAC,gBAAgB,EAAE,WAAW,CAChD,CAAC;YACF,KAAK,CAAC,UAAU,IAAI,UAAU,CAAC;YAC/B,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACnC,aAAa,GAAG,SAAS,CAAC;YAC1B,WAAW,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;YAExC,eAAe;YACf,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,CAAC;gBAChE,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC9D,MAAM,KAAK,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC7B,MAAM,GAAG,GAAG,GAAG,KAAK,iBAAiB;oBACnC,CAAC,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO;oBAClC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;gBACzC,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7E,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,GAAG,KAAK,EAAE,CAAC,CAAC;YACnD,CAAC;YAED,oBAAoB;YACpB,IAAI,SAAS,CAAC,aAAa,EAAE,CAAC;gBAC5B,KAAK,CAAC,iBAAiB,GAAG,YAAY,CAAC;gBACvC,MAAM;YACR,CAAC;YAED,eAAe;YACf,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;YACpD,IAAI,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;gBAC1B,KAAK,CAAC,iBAAiB,GAAG,KAAK,CAAC,MAAM,CAAC;gBACvC,MAAM;YACR,CAAC;YAED,iBAAiB;YACjB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACrB,KAAK,CAAC,iBAAiB,GAAG,WAAW,CAAC;gBACtC,MAAM;YACR,CAAC;YAED,WAAW;YACX,MAAM,WAAW,GAAG,GAAG,CAAC,wBAAwB,CAAC,CAAC,KAAK,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,SAAS,EAAE,aAAa,CAAC,WAAW,EAAE,UAAU,EAAE,KAAK,CAAC,gBAAgB,EAAE,WAAW,CAAC,CAAC;YAChI,WAAW,CAAC,OAAO,CAAC,SAAS,SAAS,CAAC,UAAU,CAAC,MAAM,iBAAiB,SAAS,CAAC,WAAW,CAAC,MAAM,cAAc,CAAC,CAAC;YAErH,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,WAAW,CAAC,CAAC;YAEjD,0BAA0B;YAC1B,KAAK,MAAM,KAAK,IAAI,SAAS,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,KAAK,CAAC,IAAI,MAAM,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YAC/E,CAAC;YAED,SAAS;YACT,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;gBACpB,MAAM,aAAa,GAAG,GAAG,CAAC,yBAAyB,CAAC,CAAC,KAAK,EAAE,CAAC;gBAC7D,MAAM,OAAO,GAAG,MAAM,gBAAgB,CACpC,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,UAAU,EACxC,aAAa,CAAC,WAAW,EAAE,KAAK,EAAE,WAAW,CAC9C,CAAC;gBACF,aAAa,CAAC,OAAO,CAAC,WAAW,OAAO,CAAC,MAAM,cAAc,CAAC,CAAC;gBAE/D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;oBAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;gBACxF,CAAC;YACH,CAAC;YAED,2BAA2B;YAC3B,sBAAsB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YAEzC,mBAAmB;YACnB,MAAM,UAAU,GAAG,eAAe,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;YACxE,IAAI,UAAU,CAAC,SAAS,EAAE,CAAC;gBACzB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,8BAA8B,UAAU,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;gBAChF,IAAI,KAAK,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAClC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC,CAAC;oBAC5D,KAAK,CAAC,YAAY,CAAC,GAAG,EAAE,CAAC;gBAC3B,CAAC;YACH,CAAC;YAED,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAEzB,oBAAoB;YACpB,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;YACxD,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,CAAC;gBAC9B,KAAK,CAAC,iBAAiB,GAAG,SAAS,CAAC,MAAM,CAAC;gBAC3C,MAAM;YACR,CAAC;QACH,CAAC;QAED,eAAe;QACf,MAAM,MAAM,GAAG,KAAK,CAAC,iBAAiB,KAAK,YAAY,CAAC;QACxD,MAAM,MAAM,GAAe;YACzB,UAAU,EAAE,KAAK,CAAC,gBAAgB;YAClC,MAAM;YACN,iBAAiB,EAAE,KAAK,CAAC,iBAAiB,IAAI,SAAS;YACvD,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,kBAAkB,EAAE,KAAK,CAAC,YAAY,CAAC,MAAM;YAC7C,WAAW,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM;YACrC,WAAW,EAAE,aAAa;gBACxB,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;gBACzH,CAAC,CAAC,EAAE;SACP,CAAC;QAEF,cAAc;QACd,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;QAC3E,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAE9D,gBAAgB;QAChB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QACjD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,aAAa,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC;QACnH,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,KAAK,CAAC,iBAAiB,KAAK,KAAK,CAAC,gBAAgB,cAAc,CAAC,CAAC,CAAC;QAC7G,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,KAAK,CAAC,YAAY,CAAC,MAAM,UAAU,CAAC,CAAC,CAAC;QAChF,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,CAAC,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACpF,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,KAAK,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACvE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC;QAEjD,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;YACtC,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,WAAW,EAAE,CAAC;gBACpC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,GAAG,CAAC,QAAQ,KAAK,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,qBAAqB,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;YAChE,CAAC;YACD,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;QAED,sCAAsC;QACtC,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { SammyConfig } from "../runtime/types.js";
|
|
2
|
+
import type { SammyCloudClient } from "../cloud/sammy-cloud.js";
|
|
3
|
+
import type { EvalScenario, EvalConfig } from "./types.js";
|
|
4
|
+
export declare function generateScenarios(config: SammyConfig, evalConfig: EvalConfig, cloud: SammyCloudClient, projectRoot: string): Promise<EvalScenario[]>;
|
|
5
|
+
//# sourceMappingURL=scenario-generator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scenario-generator.d.ts","sourceRoot":"","sources":["../../src/eval/scenario-generator.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAChE,OAAO,KAAK,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAiB3D,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,WAAW,EACnB,UAAU,EAAE,UAAU,EACtB,KAAK,EAAE,gBAAgB,EACvB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,YAAY,EAAE,CAAC,CAiLzB"}
|