@mcoda/codali 0.1.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/CHANGELOG.md +4 -0
  2. package/LICENSE +21 -0
  3. package/README.md +111 -0
  4. package/dist/agents/AgentProtocol.d.ts +287 -0
  5. package/dist/agents/AgentProtocol.d.ts.map +1 -0
  6. package/dist/agents/AgentProtocol.js +365 -0
  7. package/dist/agents/AgentResolver.d.ts +23 -0
  8. package/dist/agents/AgentResolver.d.ts.map +1 -0
  9. package/dist/agents/AgentResolver.js +77 -0
  10. package/dist/agents/PhaseAgentSelector.d.ts +23 -0
  11. package/dist/agents/PhaseAgentSelector.d.ts.map +1 -0
  12. package/dist/agents/PhaseAgentSelector.js +287 -0
  13. package/dist/cli/EvalCommand.d.ts +37 -0
  14. package/dist/cli/EvalCommand.d.ts.map +1 -0
  15. package/dist/cli/EvalCommand.js +333 -0
  16. package/dist/cli/FeedbackCommand.d.ts +22 -0
  17. package/dist/cli/FeedbackCommand.d.ts.map +1 -0
  18. package/dist/cli/FeedbackCommand.js +163 -0
  19. package/dist/cli/RunCommand.d.ts +78 -0
  20. package/dist/cli/RunCommand.d.ts.map +1 -0
  21. package/dist/cli/RunCommand.js +2261 -0
  22. package/dist/cli.d.ts +3 -0
  23. package/dist/cli.d.ts.map +1 -0
  24. package/dist/cli.js +109 -0
  25. package/dist/cognitive/ArchitectPlanner.d.ts +107 -0
  26. package/dist/cognitive/ArchitectPlanner.d.ts.map +1 -0
  27. package/dist/cognitive/ArchitectPlanner.js +1726 -0
  28. package/dist/cognitive/BuilderOutputParser.d.ts +25 -0
  29. package/dist/cognitive/BuilderOutputParser.d.ts.map +1 -0
  30. package/dist/cognitive/BuilderOutputParser.js +164 -0
  31. package/dist/cognitive/BuilderRunner.d.ts +76 -0
  32. package/dist/cognitive/BuilderRunner.d.ts.map +1 -0
  33. package/dist/cognitive/BuilderRunner.js +1159 -0
  34. package/dist/cognitive/ContextAssembler.d.ts +91 -0
  35. package/dist/cognitive/ContextAssembler.d.ts.map +1 -0
  36. package/dist/cognitive/ContextAssembler.js +4547 -0
  37. package/dist/cognitive/ContextBudget.d.ts +19 -0
  38. package/dist/cognitive/ContextBudget.d.ts.map +1 -0
  39. package/dist/cognitive/ContextBudget.js +35 -0
  40. package/dist/cognitive/ContextFileLoader.d.ts +30 -0
  41. package/dist/cognitive/ContextFileLoader.d.ts.map +1 -0
  42. package/dist/cognitive/ContextFileLoader.js +307 -0
  43. package/dist/cognitive/ContextManager.d.ts +47 -0
  44. package/dist/cognitive/ContextManager.d.ts.map +1 -0
  45. package/dist/cognitive/ContextManager.js +272 -0
  46. package/dist/cognitive/ContextRedactor.d.ts +18 -0
  47. package/dist/cognitive/ContextRedactor.d.ts.map +1 -0
  48. package/dist/cognitive/ContextRedactor.js +53 -0
  49. package/dist/cognitive/ContextSelector.d.ts +22 -0
  50. package/dist/cognitive/ContextSelector.d.ts.map +1 -0
  51. package/dist/cognitive/ContextSelector.js +431 -0
  52. package/dist/cognitive/ContextSerializer.d.ts +8 -0
  53. package/dist/cognitive/ContextSerializer.d.ts.map +1 -0
  54. package/dist/cognitive/ContextSerializer.js +882 -0
  55. package/dist/cognitive/ContextStore.d.ts +27 -0
  56. package/dist/cognitive/ContextStore.d.ts.map +1 -0
  57. package/dist/cognitive/ContextStore.js +79 -0
  58. package/dist/cognitive/ContextSummarizer.d.ts +16 -0
  59. package/dist/cognitive/ContextSummarizer.d.ts.map +1 -0
  60. package/dist/cognitive/ContextSummarizer.js +45 -0
  61. package/dist/cognitive/CostEstimator.d.ts +31 -0
  62. package/dist/cognitive/CostEstimator.d.ts.map +1 -0
  63. package/dist/cognitive/CostEstimator.js +66 -0
  64. package/dist/cognitive/CriticEvaluator.d.ts +32 -0
  65. package/dist/cognitive/CriticEvaluator.d.ts.map +1 -0
  66. package/dist/cognitive/CriticEvaluator.js +297 -0
  67. package/dist/cognitive/EvidenceGate.d.ts +9 -0
  68. package/dist/cognitive/EvidenceGate.d.ts.map +1 -0
  69. package/dist/cognitive/EvidenceGate.js +75 -0
  70. package/dist/cognitive/GoldenExampleIndexer.d.ts +12 -0
  71. package/dist/cognitive/GoldenExampleIndexer.d.ts.map +1 -0
  72. package/dist/cognitive/GoldenExampleIndexer.js +34 -0
  73. package/dist/cognitive/GoldenSetStore.d.ts +33 -0
  74. package/dist/cognitive/GoldenSetStore.d.ts.map +1 -0
  75. package/dist/cognitive/GoldenSetStore.js +159 -0
  76. package/dist/cognitive/IntentSignals.d.ts +7 -0
  77. package/dist/cognitive/IntentSignals.d.ts.map +1 -0
  78. package/dist/cognitive/IntentSignals.js +285 -0
  79. package/dist/cognitive/LearningGovernance.d.ts +100 -0
  80. package/dist/cognitive/LearningGovernance.d.ts.map +1 -0
  81. package/dist/cognitive/LearningGovernance.js +276 -0
  82. package/dist/cognitive/MemoryWriteback.d.ts +64 -0
  83. package/dist/cognitive/MemoryWriteback.d.ts.map +1 -0
  84. package/dist/cognitive/MemoryWriteback.js +287 -0
  85. package/dist/cognitive/PatchApplier.d.ts +49 -0
  86. package/dist/cognitive/PatchApplier.d.ts.map +1 -0
  87. package/dist/cognitive/PatchApplier.js +199 -0
  88. package/dist/cognitive/PatchInterpreter.d.ts +35 -0
  89. package/dist/cognitive/PatchInterpreter.d.ts.map +1 -0
  90. package/dist/cognitive/PatchInterpreter.js +100 -0
  91. package/dist/cognitive/PatchOutputNormalizer.d.ts +7 -0
  92. package/dist/cognitive/PatchOutputNormalizer.d.ts.map +1 -0
  93. package/dist/cognitive/PatchOutputNormalizer.js +59 -0
  94. package/dist/cognitive/PostMortemAnalyzer.d.ts +17 -0
  95. package/dist/cognitive/PostMortemAnalyzer.d.ts.map +1 -0
  96. package/dist/cognitive/PostMortemAnalyzer.js +131 -0
  97. package/dist/cognitive/PreferenceExtraction.d.ts +3 -0
  98. package/dist/cognitive/PreferenceExtraction.d.ts.map +1 -0
  99. package/dist/cognitive/PreferenceExtraction.js +85 -0
  100. package/dist/cognitive/Prompts.d.ts +15 -0
  101. package/dist/cognitive/Prompts.d.ts.map +1 -0
  102. package/dist/cognitive/Prompts.js +326 -0
  103. package/dist/cognitive/ProviderRouting.d.ts +16 -0
  104. package/dist/cognitive/ProviderRouting.d.ts.map +1 -0
  105. package/dist/cognitive/ProviderRouting.js +24 -0
  106. package/dist/cognitive/QueryExtraction.d.ts +12 -0
  107. package/dist/cognitive/QueryExtraction.d.ts.map +1 -0
  108. package/dist/cognitive/QueryExtraction.js +262 -0
  109. package/dist/cognitive/RunHistoryIndexer.d.ts +13 -0
  110. package/dist/cognitive/RunHistoryIndexer.d.ts.map +1 -0
  111. package/dist/cognitive/RunHistoryIndexer.js +125 -0
  112. package/dist/cognitive/SmartPipeline.d.ts +92 -0
  113. package/dist/cognitive/SmartPipeline.d.ts.map +1 -0
  114. package/dist/cognitive/SmartPipeline.js +4804 -0
  115. package/dist/cognitive/Types.d.ts +474 -0
  116. package/dist/cognitive/Types.d.ts.map +1 -0
  117. package/dist/cognitive/Types.js +7 -0
  118. package/dist/cognitive/ValidationRunner.d.ts +57 -0
  119. package/dist/cognitive/ValidationRunner.d.ts.map +1 -0
  120. package/dist/cognitive/ValidationRunner.js +515 -0
  121. package/dist/config/Config.d.ts +249 -0
  122. package/dist/config/Config.d.ts.map +1 -0
  123. package/dist/config/Config.js +200 -0
  124. package/dist/config/ConfigLoader.d.ts +56 -0
  125. package/dist/config/ConfigLoader.d.ts.map +1 -0
  126. package/dist/config/ConfigLoader.js +1246 -0
  127. package/dist/docdex/DocdexClient.d.ts +159 -0
  128. package/dist/docdex/DocdexClient.d.ts.map +1 -0
  129. package/dist/docdex/DocdexClient.js +838 -0
  130. package/dist/eval/EvalRunner.d.ts +35 -0
  131. package/dist/eval/EvalRunner.d.ts.map +1 -0
  132. package/dist/eval/EvalRunner.js +38 -0
  133. package/dist/eval/EvalTaskExecutor.d.ts +81 -0
  134. package/dist/eval/EvalTaskExecutor.d.ts.map +1 -0
  135. package/dist/eval/EvalTaskExecutor.js +371 -0
  136. package/dist/eval/GateEvaluator.d.ts +31 -0
  137. package/dist/eval/GateEvaluator.d.ts.map +1 -0
  138. package/dist/eval/GateEvaluator.js +134 -0
  139. package/dist/eval/MetricTypes.d.ts +28 -0
  140. package/dist/eval/MetricTypes.d.ts.map +1 -0
  141. package/dist/eval/MetricTypes.js +1 -0
  142. package/dist/eval/MetricsAggregator.d.ts +4 -0
  143. package/dist/eval/MetricsAggregator.d.ts.map +1 -0
  144. package/dist/eval/MetricsAggregator.js +97 -0
  145. package/dist/eval/RegressionComparator.d.ts +29 -0
  146. package/dist/eval/RegressionComparator.d.ts.map +1 -0
  147. package/dist/eval/RegressionComparator.js +155 -0
  148. package/dist/eval/ReportInputAdapter.d.ts +52 -0
  149. package/dist/eval/ReportInputAdapter.d.ts.map +1 -0
  150. package/dist/eval/ReportInputAdapter.js +229 -0
  151. package/dist/eval/ReportSerializer.d.ts +32 -0
  152. package/dist/eval/ReportSerializer.d.ts.map +1 -0
  153. package/dist/eval/ReportSerializer.js +33 -0
  154. package/dist/eval/ReportStore.d.ts +18 -0
  155. package/dist/eval/ReportStore.d.ts.map +1 -0
  156. package/dist/eval/ReportStore.js +96 -0
  157. package/dist/eval/SuiteLoader.d.ts +12 -0
  158. package/dist/eval/SuiteLoader.d.ts.map +1 -0
  159. package/dist/eval/SuiteLoader.js +51 -0
  160. package/dist/eval/SuiteSchema.d.ts +56 -0
  161. package/dist/eval/SuiteSchema.d.ts.map +1 -0
  162. package/dist/eval/SuiteSchema.js +357 -0
  163. package/dist/index.d.ts +11 -0
  164. package/dist/index.d.ts.map +1 -0
  165. package/dist/index.js +5 -0
  166. package/dist/providers/CodexCliProvider.d.ts +8 -0
  167. package/dist/providers/CodexCliProvider.d.ts.map +1 -0
  168. package/dist/providers/CodexCliProvider.js +282 -0
  169. package/dist/providers/OllamaRemoteProvider.d.ts +8 -0
  170. package/dist/providers/OllamaRemoteProvider.d.ts.map +1 -0
  171. package/dist/providers/OllamaRemoteProvider.js +300 -0
  172. package/dist/providers/OpenAiCompatibleProvider.d.ts +8 -0
  173. package/dist/providers/OpenAiCompatibleProvider.d.ts.map +1 -0
  174. package/dist/providers/OpenAiCompatibleProvider.js +192 -0
  175. package/dist/providers/ProviderRegistry.d.ts +12 -0
  176. package/dist/providers/ProviderRegistry.d.ts.map +1 -0
  177. package/dist/providers/ProviderRegistry.js +28 -0
  178. package/dist/providers/ProviderTypes.d.ts +81 -0
  179. package/dist/providers/ProviderTypes.d.ts.map +1 -0
  180. package/dist/providers/ProviderTypes.js +1 -0
  181. package/dist/runtime/CodaliRuntime.d.ts +189 -0
  182. package/dist/runtime/CodaliRuntime.d.ts.map +1 -0
  183. package/dist/runtime/CodaliRuntime.js +1435 -0
  184. package/dist/runtime/DeepInvestigationErrors.d.ts +39 -0
  185. package/dist/runtime/DeepInvestigationErrors.d.ts.map +1 -0
  186. package/dist/runtime/DeepInvestigationErrors.js +57 -0
  187. package/dist/runtime/RunContext.d.ts +27 -0
  188. package/dist/runtime/RunContext.d.ts.map +1 -0
  189. package/dist/runtime/RunContext.js +51 -0
  190. package/dist/runtime/RunLogQuery.d.ts +48 -0
  191. package/dist/runtime/RunLogQuery.d.ts.map +1 -0
  192. package/dist/runtime/RunLogQuery.js +36 -0
  193. package/dist/runtime/RunLogReader.d.ts +19 -0
  194. package/dist/runtime/RunLogReader.d.ts.map +1 -0
  195. package/dist/runtime/RunLogReader.js +361 -0
  196. package/dist/runtime/RunLogger.d.ts +71 -0
  197. package/dist/runtime/RunLogger.d.ts.map +1 -0
  198. package/dist/runtime/RunLogger.js +100 -0
  199. package/dist/runtime/RunTelemetryTypes.d.ts +117 -0
  200. package/dist/runtime/RunTelemetryTypes.d.ts.map +1 -0
  201. package/dist/runtime/RunTelemetryTypes.js +299 -0
  202. package/dist/runtime/Runner.d.ts +66 -0
  203. package/dist/runtime/Runner.d.ts.map +1 -0
  204. package/dist/runtime/Runner.js +215 -0
  205. package/dist/runtime/StoragePaths.d.ts +3 -0
  206. package/dist/runtime/StoragePaths.d.ts.map +1 -0
  207. package/dist/runtime/StoragePaths.js +19 -0
  208. package/dist/runtime/WorkspaceLock.d.ts +30 -0
  209. package/dist/runtime/WorkspaceLock.d.ts.map +1 -0
  210. package/dist/runtime/WorkspaceLock.js +141 -0
  211. package/dist/session/InstructionLoader.d.ts +14 -0
  212. package/dist/session/InstructionLoader.d.ts.map +1 -0
  213. package/dist/session/InstructionLoader.js +107 -0
  214. package/dist/session/SessionStore.d.ts +81 -0
  215. package/dist/session/SessionStore.d.ts.map +1 -0
  216. package/dist/session/SessionStore.js +244 -0
  217. package/dist/subagents/SubagentOrchestrator.d.ts +68 -0
  218. package/dist/subagents/SubagentOrchestrator.d.ts.map +1 -0
  219. package/dist/subagents/SubagentOrchestrator.js +150 -0
  220. package/dist/tools/ToolRegistry.d.ts +9 -0
  221. package/dist/tools/ToolRegistry.d.ts.map +1 -0
  222. package/dist/tools/ToolRegistry.js +293 -0
  223. package/dist/tools/ToolTypes.d.ts +66 -0
  224. package/dist/tools/ToolTypes.d.ts.map +1 -0
  225. package/dist/tools/ToolTypes.js +40 -0
  226. package/dist/tools/diff/DiffTool.d.ts +3 -0
  227. package/dist/tools/diff/DiffTool.d.ts.map +1 -0
  228. package/dist/tools/diff/DiffTool.js +34 -0
  229. package/dist/tools/docdex/DocdexTools.d.ts +4 -0
  230. package/dist/tools/docdex/DocdexTools.d.ts.map +1 -0
  231. package/dist/tools/docdex/DocdexTools.js +490 -0
  232. package/dist/tools/filesystem/FileTools.d.ts +3 -0
  233. package/dist/tools/filesystem/FileTools.d.ts.map +1 -0
  234. package/dist/tools/filesystem/FileTools.js +141 -0
  235. package/dist/tools/search/SearchTool.d.ts +3 -0
  236. package/dist/tools/search/SearchTool.d.ts.map +1 -0
  237. package/dist/tools/search/SearchTool.js +46 -0
  238. package/dist/tools/shell/ShellTool.d.ts +3 -0
  239. package/dist/tools/shell/ShellTool.d.ts.map +1 -0
  240. package/dist/tools/shell/ShellTool.js +104 -0
  241. package/package.json +44 -0
@@ -0,0 +1,35 @@
1
+ import type { EvalTaskExecution } from "./EvalTaskExecutor.js";
2
+ import type { EvalTaskDefinition } from "./SuiteSchema.js";
3
+ export interface EvalTaskExecutorLike {
4
+ executeTask(task: EvalTaskDefinition): Promise<EvalTaskExecution>;
5
+ }
6
+ export interface EvalRunSummary {
7
+ total: number;
8
+ passed: number;
9
+ failed: number;
10
+ execution_errors: number;
11
+ }
12
+ export interface EvalRunResult {
13
+ schema_version: 1;
14
+ suite_id: string;
15
+ suite_fingerprint: string;
16
+ started_at: string;
17
+ ended_at: string;
18
+ duration_ms: number;
19
+ task_results: EvalTaskExecution[];
20
+ summary: EvalRunSummary;
21
+ }
22
+ export declare class EvalRunner {
23
+ private readonly suiteId;
24
+ private readonly suiteFingerprint;
25
+ private readonly tasks;
26
+ private readonly executor;
27
+ constructor(params: {
28
+ suite_id: string;
29
+ suite_fingerprint: string;
30
+ tasks: EvalTaskDefinition[];
31
+ executor: EvalTaskExecutorLike;
32
+ });
33
+ run(): Promise<EvalRunResult>;
34
+ }
35
+ //# sourceMappingURL=EvalRunner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EvalRunner.d.ts","sourceRoot":"","sources":["../../src/eval/EvalRunner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAE3D,MAAM,WAAW,oBAAoB;IACnC,WAAW,CAAC,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;CACnE;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,aAAa;IAC5B,cAAc,EAAE,CAAC,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,iBAAiB,EAAE,CAAC;IAClC,OAAO,EAAE,cAAc,CAAC;CACzB;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IAEjC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAE1C,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAuB;IAE7C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuB;gBAEpC,MAAM,EAAE;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,KAAK,EAAE,kBAAkB,EAAE,CAAC;QAC5B,QAAQ,EAAE,oBAAoB,CAAC;KAChC;IAOK,GAAG,IAAI,OAAO,CAAC,aAAa,CAAC;CA8BpC"}
@@ -0,0 +1,38 @@
1
+ export class EvalRunner {
2
+ constructor(params) {
3
+ this.suiteId = params.suite_id;
4
+ this.suiteFingerprint = params.suite_fingerprint;
5
+ this.tasks = params.tasks;
6
+ this.executor = params.executor;
7
+ }
8
+ async run() {
9
+ const startedAtMs = Date.now();
10
+ const startedAt = new Date(startedAtMs).toISOString();
11
+ const taskResults = [];
12
+ for (const task of this.tasks) {
13
+ // Keep task ordering deterministic by executing suites sequentially.
14
+ // This also keeps report comparisons stable across repeated runs.
15
+ // eslint-disable-next-line no-await-in-loop
16
+ taskResults.push(await this.executor.executeTask(task));
17
+ }
18
+ const endedAtMs = Date.now();
19
+ const executionErrors = taskResults.filter((result) => Boolean(result.execution_error)).length;
20
+ const passed = taskResults.filter((result) => result.task_passed).length;
21
+ const summary = {
22
+ total: taskResults.length,
23
+ passed,
24
+ failed: taskResults.length - passed,
25
+ execution_errors: executionErrors,
26
+ };
27
+ return {
28
+ schema_version: 1,
29
+ suite_id: this.suiteId,
30
+ suite_fingerprint: this.suiteFingerprint,
31
+ started_at: startedAt,
32
+ ended_at: new Date(endedAtMs).toISOString(),
33
+ duration_ms: endedAtMs - startedAtMs,
34
+ task_results: taskResults,
35
+ summary,
36
+ };
37
+ }
38
+ }
@@ -0,0 +1,81 @@
1
+ import type { VerificationOutcome } from "../cognitive/Types.js";
2
+ import type { SafetyTelemetryEventData } from "../runtime/RunLogger.js";
3
+ import { type NormalizedRunRecord } from "./ReportInputAdapter.js";
4
+ import type { EvalTaskDefinition } from "./SuiteSchema.js";
5
+ export interface EvalRunMeta {
6
+ runId?: string;
7
+ fingerprint?: string | null;
8
+ logPath?: string;
9
+ outputLogPath?: string;
10
+ touchedFiles: string[];
11
+ command?: string;
12
+ commandRunId?: string;
13
+ jobId?: string;
14
+ project?: string;
15
+ taskId?: string;
16
+ taskKey?: string;
17
+ agentId?: string;
18
+ agentSlug?: string;
19
+ workflow?: Record<string, unknown> | null;
20
+ }
21
+ export interface EvalAssertionResult {
22
+ code: string;
23
+ passed: boolean;
24
+ message: string;
25
+ expected?: unknown;
26
+ actual?: unknown;
27
+ }
28
+ export interface EvalTaskExecution {
29
+ task_id: string;
30
+ title: string;
31
+ command: string;
32
+ mode: "success" | "failure";
33
+ started_at: string;
34
+ ended_at: string;
35
+ duration_ms: number;
36
+ exit_code: number | null;
37
+ run_succeeded: boolean;
38
+ task_passed: boolean;
39
+ first_pass: boolean | null;
40
+ patch_apply_success: boolean | null;
41
+ verification_outcome: VerificationOutcome | null;
42
+ verification_passed: boolean | null;
43
+ hallucination_detected: boolean | null;
44
+ scope_violation_detected: boolean | null;
45
+ latency_ms: number | null;
46
+ tokens_used: number | null;
47
+ cost_usd: number | null;
48
+ assertion_results: EvalAssertionResult[];
49
+ stdout: string;
50
+ stderr: string;
51
+ command_line: string[];
52
+ run_meta?: EvalRunMeta;
53
+ run_summary?: Record<string, unknown>;
54
+ normalized_run?: NormalizedRunRecord;
55
+ safety_events: SafetyTelemetryEventData[];
56
+ execution_error?: string;
57
+ }
58
+ export interface EvalTaskExecutorOptions {
59
+ workspace_root: string;
60
+ suite_dir: string;
61
+ cli_entry?: string;
62
+ provider?: string;
63
+ model?: string;
64
+ api_key?: string;
65
+ base_url?: string;
66
+ agent?: string;
67
+ agent_id?: string;
68
+ agent_slug?: string;
69
+ workflow_profile?: string;
70
+ smart?: boolean;
71
+ no_deep_investigation?: boolean;
72
+ timeout_ms?: number;
73
+ extra_env?: NodeJS.ProcessEnv;
74
+ log_dir?: string;
75
+ }
76
+ export declare class EvalTaskExecutor {
77
+ private readonly options;
78
+ constructor(options: EvalTaskExecutorOptions);
79
+ executeTask(task: EvalTaskDefinition): Promise<EvalTaskExecution>;
80
+ }
81
+ //# sourceMappingURL=EvalTaskExecutor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EvalTaskExecutor.d.ts","sourceRoot":"","sources":["../../src/eval/EvalTaskExecutor.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,yBAAyB,CAAC;AACxE,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,yBAAyB,CAAC;AACjC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAG3D,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC3C;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,SAAS,GAAG,SAAS,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,OAAO,GAAG,IAAI,CAAC;IAC3B,mBAAmB,EAAE,OAAO,GAAG,IAAI,CAAC;IACpC,oBAAoB,EAAE,mBAAmB,GAAG,IAAI,CAAC;IACjD,mBAAmB,EAAE,OAAO,GAAG,IAAI,CAAC;IACpC,sBAAsB,EAAE,OAAO,GAAG,IAAI,CAAC;IACvC,wBAAwB,EAAE,OAAO,GAAG,IAAI,CAAC;IACzC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,iBAAiB,EAAE,mBAAmB,EAAE,CAAC;IACzC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACtC,cAAc,CAAC,EAAE,mBAAmB,CAAC;IACrC,aAAa,EAAE,wBAAwB,EAAE,CAAC;IAC1C,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,uBAAuB;IACtC,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;IAC9B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA+ND,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,OAAO,EAAE,uBAAuB;IAItC,WAAW,CAAC,IAAI,EAAE,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,CAAC;CAmKxE"}
@@ -0,0 +1,371 @@
1
+ import { spawnSync } from "node:child_process";
2
+ import { readFile } from "node:fs/promises";
3
+ import path from "node:path";
4
+ import process from "node:process";
5
+ import { fileURLToPath } from "node:url";
6
+ import { DEFAULT_LOG_DIR } from "../config/Config.js";
7
+ import { RunLogReader } from "../runtime/RunLogReader.js";
8
+ import { adaptRunSummaryForReport, } from "./ReportInputAdapter.js";
9
+ import { resolveTaskFilePath } from "./SuiteLoader.js";
10
+ const asNumber = (value) => typeof value === "number" && Number.isFinite(value) ? value : null;
11
+ const asRecord = (value) => {
12
+ if (!value || typeof value !== "object" || Array.isArray(value))
13
+ return undefined;
14
+ return value;
15
+ };
16
+ const parseRunMeta = (stderr) => {
17
+ const lines = stderr.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
18
+ for (let index = lines.length - 1; index >= 0; index -= 1) {
19
+ const line = lines[index];
20
+ if (!line.startsWith("CODALI_RUN_META "))
21
+ continue;
22
+ const payload = line.slice("CODALI_RUN_META ".length);
23
+ try {
24
+ const parsed = JSON.parse(payload);
25
+ const touched = parsed.touchedFiles;
26
+ const touchedFiles = Array.isArray(touched)
27
+ ? touched.filter((entry) => typeof entry === "string")
28
+ : [];
29
+ return {
30
+ runId: typeof parsed.runId === "string" ? parsed.runId : undefined,
31
+ fingerprint: typeof parsed.fingerprint === "string" ? parsed.fingerprint : null,
32
+ logPath: typeof parsed.logPath === "string" ? parsed.logPath : undefined,
33
+ outputLogPath: typeof parsed.outputLogPath === "string" ? parsed.outputLogPath : undefined,
34
+ touchedFiles,
35
+ command: typeof parsed.command === "string" ? parsed.command : undefined,
36
+ commandRunId: typeof parsed.commandRunId === "string" ? parsed.commandRunId : undefined,
37
+ jobId: typeof parsed.jobId === "string" ? parsed.jobId : undefined,
38
+ project: typeof parsed.project === "string" ? parsed.project : undefined,
39
+ taskId: typeof parsed.taskId === "string" ? parsed.taskId : undefined,
40
+ taskKey: typeof parsed.taskKey === "string" ? parsed.taskKey : undefined,
41
+ agentId: typeof parsed.agentId === "string" ? parsed.agentId : undefined,
42
+ agentSlug: typeof parsed.agentSlug === "string" ? parsed.agentSlug : undefined,
43
+ workflow: asRecord(parsed.workflow) ?? null,
44
+ };
45
+ }
46
+ catch {
47
+ return undefined;
48
+ }
49
+ }
50
+ return undefined;
51
+ };
52
+ const readRunLog = async (logPath) => {
53
+ if (!logPath)
54
+ return { run_failed_reasons: [] };
55
+ let content = "";
56
+ try {
57
+ content = await readFile(logPath, "utf8");
58
+ }
59
+ catch {
60
+ return { run_failed_reasons: [] };
61
+ }
62
+ let runSummary;
63
+ const runFailedReasons = [];
64
+ const lines = content.split("\n").filter(Boolean);
65
+ for (const line of lines) {
66
+ let parsed;
67
+ try {
68
+ parsed = JSON.parse(line);
69
+ }
70
+ catch {
71
+ continue;
72
+ }
73
+ const type = typeof parsed.type === "string" ? parsed.type : "";
74
+ const data = asRecord(parsed.data) ?? {};
75
+ if (type === "run_summary") {
76
+ runSummary = data;
77
+ continue;
78
+ }
79
+ if (type !== "run_failed")
80
+ continue;
81
+ const reasonsValue = data.reasons;
82
+ if (Array.isArray(reasonsValue)) {
83
+ for (const reason of reasonsValue) {
84
+ if (typeof reason === "string" && reason.trim())
85
+ runFailedReasons.push(reason.trim());
86
+ }
87
+ }
88
+ const stage = typeof data.stage === "string" ? data.stage.trim() : "";
89
+ if (stage)
90
+ runFailedReasons.push(stage);
91
+ }
92
+ return { run_summary: runSummary, run_failed_reasons: runFailedReasons };
93
+ };
94
+ const resolveCliEntry = (provided) => {
95
+ if (provided)
96
+ return provided;
97
+ if (process.argv[1])
98
+ return process.argv[1];
99
+ const current = fileURLToPath(import.meta.url);
100
+ return path.resolve(current, "..", "..", "cli.js");
101
+ };
102
+ const hasPatchFailure = (stderr, reasons) => {
103
+ const haystack = [stderr, ...reasons].join("\n").toLowerCase();
104
+ return (haystack.includes("patch_apply_failed")
105
+ || haystack.includes("patch_scope_violation")
106
+ || haystack.includes("patch_search")
107
+ || haystack.includes("patch_rollback")
108
+ || haystack.includes("search block"));
109
+ };
110
+ const detectHallucination = (stderr, reasons) => {
111
+ const haystack = [stderr, ...reasons].join("\n");
112
+ return /\bhallucinat|\bunknown symbol\b|\bunknown file\b|non[-_ ]existent|ENOENT|no such file or directory/i.test(haystack);
113
+ };
114
+ const detectScopeViolation = (stderr, reasons, safetyEvents) => {
115
+ if (safetyEvents.some((event) => event.code === "scope_violation"))
116
+ return true;
117
+ const haystack = [stderr, ...reasons].join("\n");
118
+ return /\bscope_violation\b|patch_outside_allowed_scope|patch_outside_workspace/i.test(haystack);
119
+ };
120
+ const buildAssertions = (params) => {
121
+ const assertions = [];
122
+ const expectedSuccess = params.task.assertions.expect_success;
123
+ assertions.push({
124
+ code: "assert_expect_success",
125
+ passed: params.runSucceeded === expectedSuccess,
126
+ message: "Run success matched expectation.",
127
+ expected: expectedSuccess,
128
+ actual: params.runSucceeded,
129
+ });
130
+ if (params.task.assertions.expect_exit_code !== undefined) {
131
+ assertions.push({
132
+ code: "assert_expect_exit_code",
133
+ passed: params.exitCode === params.task.assertions.expect_exit_code,
134
+ message: "Exit code matched expectation.",
135
+ expected: params.task.assertions.expect_exit_code,
136
+ actual: params.exitCode,
137
+ });
138
+ }
139
+ if (params.task.assertions.expect_patch_apply !== undefined) {
140
+ const actual = params.patchApplySuccess;
141
+ assertions.push({
142
+ code: actual === null ? "assert_expect_patch_apply_missing" : "assert_expect_patch_apply",
143
+ passed: actual !== null && actual === params.task.assertions.expect_patch_apply,
144
+ message: "Patch apply outcome matched expectation.",
145
+ expected: params.task.assertions.expect_patch_apply,
146
+ actual,
147
+ });
148
+ }
149
+ if (params.task.assertions.expect_verification !== undefined
150
+ && params.task.assertions.expect_verification !== "any") {
151
+ const actual = params.verificationOutcome;
152
+ assertions.push({
153
+ code: actual === null ? "assert_expect_verification_missing" : "assert_expect_verification",
154
+ passed: actual !== null && actual === params.task.assertions.expect_verification,
155
+ message: "Verification outcome matched expectation.",
156
+ expected: params.task.assertions.expect_verification,
157
+ actual,
158
+ });
159
+ }
160
+ if (params.task.assertions.max_latency_ms !== undefined) {
161
+ const actual = params.latencyMs;
162
+ assertions.push({
163
+ code: actual === null ? "assert_max_latency_missing" : "assert_max_latency",
164
+ passed: actual !== null && actual <= params.task.assertions.max_latency_ms,
165
+ message: "Latency stayed within threshold.",
166
+ expected: params.task.assertions.max_latency_ms,
167
+ actual,
168
+ });
169
+ }
170
+ if (params.task.assertions.max_cost_usd !== undefined) {
171
+ const actual = params.costUsd;
172
+ assertions.push({
173
+ code: actual === null ? "assert_max_cost_missing" : "assert_max_cost",
174
+ passed: actual !== null && actual <= params.task.assertions.max_cost_usd,
175
+ message: "Cost stayed within threshold.",
176
+ expected: params.task.assertions.max_cost_usd,
177
+ actual,
178
+ });
179
+ }
180
+ if (!params.task.assertions.allow_hallucination) {
181
+ assertions.push({
182
+ code: "assert_no_hallucination",
183
+ passed: params.hallucinationDetected !== true,
184
+ message: "No hallucination signals were detected.",
185
+ expected: false,
186
+ actual: params.hallucinationDetected,
187
+ });
188
+ }
189
+ if (!params.task.assertions.allow_scope_violation) {
190
+ assertions.push({
191
+ code: "assert_no_scope_violation",
192
+ passed: params.scopeViolationDetected !== true,
193
+ message: "No scope-violation signals were detected.",
194
+ expected: false,
195
+ actual: params.scopeViolationDetected,
196
+ });
197
+ }
198
+ return assertions;
199
+ };
200
+ export class EvalTaskExecutor {
201
+ constructor(options) {
202
+ this.options = options;
203
+ }
204
+ async executeTask(task) {
205
+ const startedAtMs = Date.now();
206
+ const startedAt = new Date(startedAtMs).toISOString();
207
+ const cliEntry = resolveCliEntry(this.options.cli_entry);
208
+ const commandArgs = [task.command, "--workspace-root", this.options.workspace_root];
209
+ if (this.options.provider)
210
+ commandArgs.push("--provider", this.options.provider);
211
+ if (this.options.model)
212
+ commandArgs.push("--model", this.options.model);
213
+ if (this.options.api_key)
214
+ commandArgs.push("--api-key", this.options.api_key);
215
+ if (this.options.base_url)
216
+ commandArgs.push("--base-url", this.options.base_url);
217
+ if (this.options.agent)
218
+ commandArgs.push("--agent", this.options.agent);
219
+ if (this.options.agent_id)
220
+ commandArgs.push("--agent-id", this.options.agent_id);
221
+ if (this.options.agent_slug)
222
+ commandArgs.push("--agent-slug", this.options.agent_slug);
223
+ if (this.options.workflow_profile)
224
+ commandArgs.push("--profile", this.options.workflow_profile);
225
+ if (this.options.smart === true)
226
+ commandArgs.push("--smart");
227
+ if (this.options.no_deep_investigation)
228
+ commandArgs.push("--no-deep-investigation");
229
+ if (task.args.length > 0)
230
+ commandArgs.push(...task.args);
231
+ const taskFilePath = resolveTaskFilePath(task, this.options.suite_dir, this.options.workspace_root);
232
+ if (taskFilePath) {
233
+ commandArgs.push("--task", taskFilePath);
234
+ }
235
+ else if (task.inline_task) {
236
+ commandArgs.push(task.inline_task);
237
+ }
238
+ const commandLine = [process.execPath, cliEntry, ...commandArgs];
239
+ let stdout = "";
240
+ let stderr = "";
241
+ let exitCode = null;
242
+ let executionError;
243
+ try {
244
+ const result = spawnSync(process.execPath, [cliEntry, ...commandArgs], {
245
+ cwd: this.options.workspace_root,
246
+ encoding: "utf8",
247
+ env: {
248
+ ...process.env,
249
+ ...(this.options.extra_env ?? {}),
250
+ },
251
+ timeout: this.options.timeout_ms ?? 20 * 60 * 1000,
252
+ });
253
+ stdout = (result.stdout ?? "").toString();
254
+ stderr = (result.stderr ?? "").toString();
255
+ exitCode = typeof result.status === "number" ? result.status : null;
256
+ if (result.error)
257
+ executionError = String(result.error);
258
+ if (result.signal && exitCode === null)
259
+ executionError = `terminated_by_signal:${result.signal}`;
260
+ }
261
+ catch (error) {
262
+ executionError = error instanceof Error ? error.message : String(error);
263
+ }
264
+ const runMeta = parseRunMeta(stderr);
265
+ const runLog = await readRunLog(runMeta?.logPath);
266
+ const reader = new RunLogReader(this.options.workspace_root, this.options.log_dir ?? DEFAULT_LOG_DIR);
267
+ const safetyEvents = runMeta?.runId
268
+ ? await reader.getSafetyEvents(runMeta.runId)
269
+ : [];
270
+ const verificationReports = runMeta?.runId
271
+ ? await reader.getVerificationReports(runMeta.runId)
272
+ : [];
273
+ const latestVerification = verificationReports.length
274
+ ? verificationReports[verificationReports.length - 1]
275
+ : undefined;
276
+ const runSummary = runLog.run_summary;
277
+ const normalizedRun = adaptRunSummaryForReport({
278
+ runSummary,
279
+ runId: runMeta?.runId,
280
+ taskId: task.id,
281
+ verificationOutcome: latestVerification?.outcome ?? null,
282
+ touchedFiles: runMeta?.touchedFiles ?? [],
283
+ });
284
+ const runSucceeded = exitCode === 0 && !executionError;
285
+ const firstPass = (() => {
286
+ const smartRuntime = asRecord(runSummary?.smartRuntime);
287
+ const attempts = asNumber(smartRuntime?.attempts);
288
+ if (attempts !== null)
289
+ return attempts <= 1;
290
+ return runSucceeded ? true : false;
291
+ })();
292
+ const patchApplySuccess = (() => {
293
+ if (runMeta?.touchedFiles?.length)
294
+ return true;
295
+ if (hasPatchFailure(stderr, runLog.run_failed_reasons))
296
+ return false;
297
+ return null;
298
+ })();
299
+ const verificationOutcome = (() => {
300
+ const normalizedOutcome = normalizedRun.verification_outcome;
301
+ if (normalizedOutcome)
302
+ return normalizedOutcome;
303
+ if (latestVerification?.outcome)
304
+ return latestVerification.outcome;
305
+ const verification = asRecord(runSummary?.verification);
306
+ const outcome = verification?.outcome;
307
+ if (outcome === "verified_passed"
308
+ || outcome === "verified_failed"
309
+ || outcome === "unverified_with_reason") {
310
+ return outcome;
311
+ }
312
+ return null;
313
+ })();
314
+ const verificationPassed = verificationOutcome === null ? null : verificationOutcome === "verified_passed";
315
+ const hallucinationDetected = detectHallucination(stderr, runLog.run_failed_reasons);
316
+ const scopeViolationDetected = detectScopeViolation(stderr, runLog.run_failed_reasons, safetyEvents);
317
+ const latencyMs = normalizedRun.duration_ms ?? asNumber(runSummary?.durationMs) ?? (Date.now() - startedAtMs);
318
+ const usage = asRecord(runSummary?.usage);
319
+ const tokensUsed = normalizedRun.usage_tokens_total
320
+ ?? asNumber(usage?.totalTokens)
321
+ ?? (() => {
322
+ const input = asNumber(usage?.inputTokens) ?? 0;
323
+ const output = asNumber(usage?.outputTokens) ?? 0;
324
+ return input + output > 0 ? input + output : null;
325
+ })();
326
+ const costUsd = normalizedRun.cost_usd ?? asNumber(runSummary?.actualCost);
327
+ const assertionResults = buildAssertions({
328
+ task,
329
+ runSucceeded,
330
+ exitCode,
331
+ patchApplySuccess,
332
+ verificationOutcome,
333
+ latencyMs,
334
+ costUsd,
335
+ hallucinationDetected,
336
+ scopeViolationDetected,
337
+ });
338
+ const taskPassed = assertionResults.every((assertion) => assertion.passed);
339
+ const endedAtMs = Date.now();
340
+ return {
341
+ task_id: task.id,
342
+ title: task.title,
343
+ command: task.command,
344
+ mode: task.mode,
345
+ started_at: startedAt,
346
+ ended_at: new Date(endedAtMs).toISOString(),
347
+ duration_ms: endedAtMs - startedAtMs,
348
+ exit_code: exitCode,
349
+ run_succeeded: runSucceeded,
350
+ task_passed: taskPassed,
351
+ first_pass: firstPass,
352
+ patch_apply_success: patchApplySuccess,
353
+ verification_outcome: verificationOutcome,
354
+ verification_passed: verificationPassed,
355
+ hallucination_detected: hallucinationDetected,
356
+ scope_violation_detected: scopeViolationDetected,
357
+ latency_ms: latencyMs,
358
+ tokens_used: tokensUsed,
359
+ cost_usd: costUsd,
360
+ assertion_results: assertionResults,
361
+ stdout,
362
+ stderr,
363
+ command_line: commandLine,
364
+ run_meta: runMeta,
365
+ run_summary: runSummary,
366
+ normalized_run: normalizedRun,
367
+ safety_events: safetyEvents,
368
+ execution_error: executionError,
369
+ };
370
+ }
371
+ }
@@ -0,0 +1,31 @@
1
+ import type { EvalMetrics } from "./MetricTypes.js";
2
+ import type { EvalRegressionComparison } from "./RegressionComparator.js";
3
+ export interface EvalGateThresholds {
4
+ patch_apply_drop_max: number;
5
+ verification_pass_rate_min: number;
6
+ hallucination_rate_max: number;
7
+ scope_violation_rate_max: number;
8
+ }
9
+ export interface EvalGateFailure {
10
+ code: string;
11
+ metric: string;
12
+ message: string;
13
+ threshold: number;
14
+ actual: number | null;
15
+ baseline?: number | null;
16
+ delta?: number | null;
17
+ }
18
+ export interface EvalGateResult {
19
+ schema_version: 1;
20
+ passed: boolean;
21
+ thresholds: EvalGateThresholds;
22
+ failures: EvalGateFailure[];
23
+ }
24
+ export declare const DEFAULT_EVAL_GATE_THRESHOLDS: EvalGateThresholds;
25
+ export declare const resolveGateThresholds: (...sources: Array<Partial<EvalGateThresholds> | undefined>) => EvalGateThresholds;
26
+ export declare const evaluateGates: (params: {
27
+ metrics: EvalMetrics;
28
+ thresholds: EvalGateThresholds;
29
+ comparison?: EvalRegressionComparison;
30
+ }) => EvalGateResult;
31
+ //# sourceMappingURL=GateEvaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GateEvaluator.d.ts","sourceRoot":"","sources":["../../src/eval/GateEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAE1E,MAAM,WAAW,kBAAkB;IACjC,oBAAoB,EAAE,MAAM,CAAC;IAC7B,0BAA0B,EAAE,MAAM,CAAC;IACnC,sBAAsB,EAAE,MAAM,CAAC;IAC/B,wBAAwB,EAAE,MAAM,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACvB;AAED,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,CAAC,CAAC;IAClB,MAAM,EAAE,OAAO,CAAC;IAChB,UAAU,EAAE,kBAAkB,CAAC;IAC/B,QAAQ,EAAE,eAAe,EAAE,CAAC;CAC7B;AAED,eAAO,MAAM,4BAA4B,EAAE,kBAK1C,CAAC;AAiBF,eAAO,MAAM,qBAAqB,GAChC,GAAG,SAAS,KAAK,CAAC,OAAO,CAAC,kBAAkB,CAAC,GAAG,SAAS,CAAC,KACzD,kBAuCF,CAAC;AAgBF,eAAO,MAAM,aAAa,GAAI,QAAQ;IACpC,OAAO,EAAE,WAAW,CAAC;IACrB,UAAU,EAAE,kBAAkB,CAAC;IAC/B,UAAU,CAAC,EAAE,wBAAwB,CAAC;CACvC,KAAG,cAmFH,CAAC"}