@mcoda/mswarm 0.1.57 → 0.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +19 -0
  2. package/dist/codali-executor.d.ts +266 -0
  3. package/dist/codali-executor.d.ts.map +1 -0
  4. package/dist/codali-executor.js +227 -0
  5. package/dist/codali-executor.js.map +1 -0
  6. package/dist/runtime.d.ts +36 -1
  7. package/dist/runtime.d.ts.map +1 -1
  8. package/dist/runtime.js +219 -30
  9. package/dist/runtime.js.map +1 -1
  10. package/dist/server.d.ts.map +1 -1
  11. package/dist/server.js +54 -0
  12. package/dist/server.js.map +1 -1
  13. package/dist/vendor/codali/agents/AgentProtocol.d.ts +287 -0
  14. package/dist/vendor/codali/agents/AgentProtocol.d.ts.map +1 -0
  15. package/dist/vendor/codali/agents/AgentProtocol.js +365 -0
  16. package/dist/vendor/codali/agents/AgentResolver.d.ts +23 -0
  17. package/dist/vendor/codali/agents/AgentResolver.d.ts.map +1 -0
  18. package/dist/vendor/codali/agents/AgentResolver.js +77 -0
  19. package/dist/vendor/codali/agents/PhaseAgentSelector.d.ts +23 -0
  20. package/dist/vendor/codali/agents/PhaseAgentSelector.d.ts.map +1 -0
  21. package/dist/vendor/codali/agents/PhaseAgentSelector.js +287 -0
  22. package/dist/vendor/codali/cli/EvalCommand.d.ts +37 -0
  23. package/dist/vendor/codali/cli/EvalCommand.d.ts.map +1 -0
  24. package/dist/vendor/codali/cli/EvalCommand.js +333 -0
  25. package/dist/vendor/codali/cli/FeedbackCommand.d.ts +22 -0
  26. package/dist/vendor/codali/cli/FeedbackCommand.d.ts.map +1 -0
  27. package/dist/vendor/codali/cli/FeedbackCommand.js +163 -0
  28. package/dist/vendor/codali/cli/RunCommand.d.ts +78 -0
  29. package/dist/vendor/codali/cli/RunCommand.d.ts.map +1 -0
  30. package/dist/vendor/codali/cli/RunCommand.js +2261 -0
  31. package/dist/vendor/codali/cli.d.ts +3 -0
  32. package/dist/vendor/codali/cli.d.ts.map +1 -0
  33. package/dist/vendor/codali/cli.js +109 -0
  34. package/dist/vendor/codali/cognitive/ArchitectPlanner.d.ts +107 -0
  35. package/dist/vendor/codali/cognitive/ArchitectPlanner.d.ts.map +1 -0
  36. package/dist/vendor/codali/cognitive/ArchitectPlanner.js +1726 -0
  37. package/dist/vendor/codali/cognitive/BuilderOutputParser.d.ts +25 -0
  38. package/dist/vendor/codali/cognitive/BuilderOutputParser.d.ts.map +1 -0
  39. package/dist/vendor/codali/cognitive/BuilderOutputParser.js +164 -0
  40. package/dist/vendor/codali/cognitive/BuilderRunner.d.ts +76 -0
  41. package/dist/vendor/codali/cognitive/BuilderRunner.d.ts.map +1 -0
  42. package/dist/vendor/codali/cognitive/BuilderRunner.js +1159 -0
  43. package/dist/vendor/codali/cognitive/ContextAssembler.d.ts +91 -0
  44. package/dist/vendor/codali/cognitive/ContextAssembler.d.ts.map +1 -0
  45. package/dist/vendor/codali/cognitive/ContextAssembler.js +4547 -0
  46. package/dist/vendor/codali/cognitive/ContextBudget.d.ts +19 -0
  47. package/dist/vendor/codali/cognitive/ContextBudget.d.ts.map +1 -0
  48. package/dist/vendor/codali/cognitive/ContextBudget.js +35 -0
  49. package/dist/vendor/codali/cognitive/ContextFileLoader.d.ts +30 -0
  50. package/dist/vendor/codali/cognitive/ContextFileLoader.d.ts.map +1 -0
  51. package/dist/vendor/codali/cognitive/ContextFileLoader.js +307 -0
  52. package/dist/vendor/codali/cognitive/ContextManager.d.ts +47 -0
  53. package/dist/vendor/codali/cognitive/ContextManager.d.ts.map +1 -0
  54. package/dist/vendor/codali/cognitive/ContextManager.js +272 -0
  55. package/dist/vendor/codali/cognitive/ContextRedactor.d.ts +18 -0
  56. package/dist/vendor/codali/cognitive/ContextRedactor.d.ts.map +1 -0
  57. package/dist/vendor/codali/cognitive/ContextRedactor.js +53 -0
  58. package/dist/vendor/codali/cognitive/ContextSelector.d.ts +22 -0
  59. package/dist/vendor/codali/cognitive/ContextSelector.d.ts.map +1 -0
  60. package/dist/vendor/codali/cognitive/ContextSelector.js +431 -0
  61. package/dist/vendor/codali/cognitive/ContextSerializer.d.ts +8 -0
  62. package/dist/vendor/codali/cognitive/ContextSerializer.d.ts.map +1 -0
  63. package/dist/vendor/codali/cognitive/ContextSerializer.js +882 -0
  64. package/dist/vendor/codali/cognitive/ContextStore.d.ts +27 -0
  65. package/dist/vendor/codali/cognitive/ContextStore.d.ts.map +1 -0
  66. package/dist/vendor/codali/cognitive/ContextStore.js +79 -0
  67. package/dist/vendor/codali/cognitive/ContextSummarizer.d.ts +16 -0
  68. package/dist/vendor/codali/cognitive/ContextSummarizer.d.ts.map +1 -0
  69. package/dist/vendor/codali/cognitive/ContextSummarizer.js +45 -0
  70. package/dist/vendor/codali/cognitive/CostEstimator.d.ts +31 -0
  71. package/dist/vendor/codali/cognitive/CostEstimator.d.ts.map +1 -0
  72. package/dist/vendor/codali/cognitive/CostEstimator.js +66 -0
  73. package/dist/vendor/codali/cognitive/CriticEvaluator.d.ts +32 -0
  74. package/dist/vendor/codali/cognitive/CriticEvaluator.d.ts.map +1 -0
  75. package/dist/vendor/codali/cognitive/CriticEvaluator.js +297 -0
  76. package/dist/vendor/codali/cognitive/EvidenceGate.d.ts +9 -0
  77. package/dist/vendor/codali/cognitive/EvidenceGate.d.ts.map +1 -0
  78. package/dist/vendor/codali/cognitive/EvidenceGate.js +75 -0
  79. package/dist/vendor/codali/cognitive/GoldenExampleIndexer.d.ts +12 -0
  80. package/dist/vendor/codali/cognitive/GoldenExampleIndexer.d.ts.map +1 -0
  81. package/dist/vendor/codali/cognitive/GoldenExampleIndexer.js +34 -0
  82. package/dist/vendor/codali/cognitive/GoldenSetStore.d.ts +33 -0
  83. package/dist/vendor/codali/cognitive/GoldenSetStore.d.ts.map +1 -0
  84. package/dist/vendor/codali/cognitive/GoldenSetStore.js +159 -0
  85. package/dist/vendor/codali/cognitive/IntentSignals.d.ts +7 -0
  86. package/dist/vendor/codali/cognitive/IntentSignals.d.ts.map +1 -0
  87. package/dist/vendor/codali/cognitive/IntentSignals.js +285 -0
  88. package/dist/vendor/codali/cognitive/LearningGovernance.d.ts +100 -0
  89. package/dist/vendor/codali/cognitive/LearningGovernance.d.ts.map +1 -0
  90. package/dist/vendor/codali/cognitive/LearningGovernance.js +276 -0
  91. package/dist/vendor/codali/cognitive/MemoryWriteback.d.ts +64 -0
  92. package/dist/vendor/codali/cognitive/MemoryWriteback.d.ts.map +1 -0
  93. package/dist/vendor/codali/cognitive/MemoryWriteback.js +287 -0
  94. package/dist/vendor/codali/cognitive/PatchApplier.d.ts +49 -0
  95. package/dist/vendor/codali/cognitive/PatchApplier.d.ts.map +1 -0
  96. package/dist/vendor/codali/cognitive/PatchApplier.js +199 -0
  97. package/dist/vendor/codali/cognitive/PatchInterpreter.d.ts +35 -0
  98. package/dist/vendor/codali/cognitive/PatchInterpreter.d.ts.map +1 -0
  99. package/dist/vendor/codali/cognitive/PatchInterpreter.js +100 -0
  100. package/dist/vendor/codali/cognitive/PatchOutputNormalizer.d.ts +7 -0
  101. package/dist/vendor/codali/cognitive/PatchOutputNormalizer.d.ts.map +1 -0
  102. package/dist/vendor/codali/cognitive/PatchOutputNormalizer.js +59 -0
  103. package/dist/vendor/codali/cognitive/PostMortemAnalyzer.d.ts +17 -0
  104. package/dist/vendor/codali/cognitive/PostMortemAnalyzer.d.ts.map +1 -0
  105. package/dist/vendor/codali/cognitive/PostMortemAnalyzer.js +131 -0
  106. package/dist/vendor/codali/cognitive/PreferenceExtraction.d.ts +3 -0
  107. package/dist/vendor/codali/cognitive/PreferenceExtraction.d.ts.map +1 -0
  108. package/dist/vendor/codali/cognitive/PreferenceExtraction.js +85 -0
  109. package/dist/vendor/codali/cognitive/Prompts.d.ts +15 -0
  110. package/dist/vendor/codali/cognitive/Prompts.d.ts.map +1 -0
  111. package/dist/vendor/codali/cognitive/Prompts.js +326 -0
  112. package/dist/vendor/codali/cognitive/ProviderRouting.d.ts +16 -0
  113. package/dist/vendor/codali/cognitive/ProviderRouting.d.ts.map +1 -0
  114. package/dist/vendor/codali/cognitive/ProviderRouting.js +24 -0
  115. package/dist/vendor/codali/cognitive/QueryExtraction.d.ts +12 -0
  116. package/dist/vendor/codali/cognitive/QueryExtraction.d.ts.map +1 -0
  117. package/dist/vendor/codali/cognitive/QueryExtraction.js +262 -0
  118. package/dist/vendor/codali/cognitive/RunHistoryIndexer.d.ts +13 -0
  119. package/dist/vendor/codali/cognitive/RunHistoryIndexer.d.ts.map +1 -0
  120. package/dist/vendor/codali/cognitive/RunHistoryIndexer.js +125 -0
  121. package/dist/vendor/codali/cognitive/SmartPipeline.d.ts +92 -0
  122. package/dist/vendor/codali/cognitive/SmartPipeline.d.ts.map +1 -0
  123. package/dist/vendor/codali/cognitive/SmartPipeline.js +4804 -0
  124. package/dist/vendor/codali/cognitive/Types.d.ts +474 -0
  125. package/dist/vendor/codali/cognitive/Types.d.ts.map +1 -0
  126. package/dist/vendor/codali/cognitive/Types.js +7 -0
  127. package/dist/vendor/codali/cognitive/ValidationRunner.d.ts +57 -0
  128. package/dist/vendor/codali/cognitive/ValidationRunner.d.ts.map +1 -0
  129. package/dist/vendor/codali/cognitive/ValidationRunner.js +515 -0
  130. package/dist/vendor/codali/config/Config.d.ts +249 -0
  131. package/dist/vendor/codali/config/Config.d.ts.map +1 -0
  132. package/dist/vendor/codali/config/Config.js +200 -0
  133. package/dist/vendor/codali/config/ConfigLoader.d.ts +56 -0
  134. package/dist/vendor/codali/config/ConfigLoader.d.ts.map +1 -0
  135. package/dist/vendor/codali/config/ConfigLoader.js +1246 -0
  136. package/dist/vendor/codali/docdex/DocdexClient.d.ts +113 -0
  137. package/dist/vendor/codali/docdex/DocdexClient.d.ts.map +1 -0
  138. package/dist/vendor/codali/docdex/DocdexClient.js +524 -0
  139. package/dist/vendor/codali/eval/EvalRunner.d.ts +35 -0
  140. package/dist/vendor/codali/eval/EvalRunner.d.ts.map +1 -0
  141. package/dist/vendor/codali/eval/EvalRunner.js +38 -0
  142. package/dist/vendor/codali/eval/EvalTaskExecutor.d.ts +81 -0
  143. package/dist/vendor/codali/eval/EvalTaskExecutor.d.ts.map +1 -0
  144. package/dist/vendor/codali/eval/EvalTaskExecutor.js +371 -0
  145. package/dist/vendor/codali/eval/GateEvaluator.d.ts +31 -0
  146. package/dist/vendor/codali/eval/GateEvaluator.d.ts.map +1 -0
  147. package/dist/vendor/codali/eval/GateEvaluator.js +134 -0
  148. package/dist/vendor/codali/eval/MetricTypes.d.ts +28 -0
  149. package/dist/vendor/codali/eval/MetricTypes.d.ts.map +1 -0
  150. package/dist/vendor/codali/eval/MetricTypes.js +1 -0
  151. package/dist/vendor/codali/eval/MetricsAggregator.d.ts +4 -0
  152. package/dist/vendor/codali/eval/MetricsAggregator.d.ts.map +1 -0
  153. package/dist/vendor/codali/eval/MetricsAggregator.js +97 -0
  154. package/dist/vendor/codali/eval/RegressionComparator.d.ts +29 -0
  155. package/dist/vendor/codali/eval/RegressionComparator.d.ts.map +1 -0
  156. package/dist/vendor/codali/eval/RegressionComparator.js +155 -0
  157. package/dist/vendor/codali/eval/ReportInputAdapter.d.ts +52 -0
  158. package/dist/vendor/codali/eval/ReportInputAdapter.d.ts.map +1 -0
  159. package/dist/vendor/codali/eval/ReportInputAdapter.js +229 -0
  160. package/dist/vendor/codali/eval/ReportSerializer.d.ts +32 -0
  161. package/dist/vendor/codali/eval/ReportSerializer.d.ts.map +1 -0
  162. package/dist/vendor/codali/eval/ReportSerializer.js +33 -0
  163. package/dist/vendor/codali/eval/ReportStore.d.ts +18 -0
  164. package/dist/vendor/codali/eval/ReportStore.d.ts.map +1 -0
  165. package/dist/vendor/codali/eval/ReportStore.js +96 -0
  166. package/dist/vendor/codali/eval/SuiteLoader.d.ts +12 -0
  167. package/dist/vendor/codali/eval/SuiteLoader.d.ts.map +1 -0
  168. package/dist/vendor/codali/eval/SuiteLoader.js +51 -0
  169. package/dist/vendor/codali/eval/SuiteSchema.d.ts +56 -0
  170. package/dist/vendor/codali/eval/SuiteSchema.d.ts.map +1 -0
  171. package/dist/vendor/codali/eval/SuiteSchema.js +357 -0
  172. package/dist/vendor/codali/index.d.ts +11 -0
  173. package/dist/vendor/codali/index.d.ts.map +1 -0
  174. package/dist/vendor/codali/index.js +5 -0
  175. package/dist/vendor/codali/providers/CodexCliProvider.d.ts +8 -0
  176. package/dist/vendor/codali/providers/CodexCliProvider.d.ts.map +1 -0
  177. package/dist/vendor/codali/providers/CodexCliProvider.js +282 -0
  178. package/dist/vendor/codali/providers/OllamaRemoteProvider.d.ts +8 -0
  179. package/dist/vendor/codali/providers/OllamaRemoteProvider.d.ts.map +1 -0
  180. package/dist/vendor/codali/providers/OllamaRemoteProvider.js +300 -0
  181. package/dist/vendor/codali/providers/OpenAiCompatibleProvider.d.ts +8 -0
  182. package/dist/vendor/codali/providers/OpenAiCompatibleProvider.d.ts.map +1 -0
  183. package/dist/vendor/codali/providers/OpenAiCompatibleProvider.js +192 -0
  184. package/dist/vendor/codali/providers/ProviderRegistry.d.ts +12 -0
  185. package/dist/vendor/codali/providers/ProviderRegistry.d.ts.map +1 -0
  186. package/dist/vendor/codali/providers/ProviderRegistry.js +28 -0
  187. package/dist/vendor/codali/providers/ProviderTypes.d.ts +81 -0
  188. package/dist/vendor/codali/providers/ProviderTypes.d.ts.map +1 -0
  189. package/dist/vendor/codali/providers/ProviderTypes.js +1 -0
  190. package/dist/vendor/codali/runtime/CodaliRuntime.d.ts +183 -0
  191. package/dist/vendor/codali/runtime/CodaliRuntime.d.ts.map +1 -0
  192. package/dist/vendor/codali/runtime/CodaliRuntime.js +1363 -0
  193. package/dist/vendor/codali/runtime/DeepInvestigationErrors.d.ts +39 -0
  194. package/dist/vendor/codali/runtime/DeepInvestigationErrors.d.ts.map +1 -0
  195. package/dist/vendor/codali/runtime/DeepInvestigationErrors.js +57 -0
  196. package/dist/vendor/codali/runtime/RunContext.d.ts +27 -0
  197. package/dist/vendor/codali/runtime/RunContext.d.ts.map +1 -0
  198. package/dist/vendor/codali/runtime/RunContext.js +51 -0
  199. package/dist/vendor/codali/runtime/RunLogQuery.d.ts +48 -0
  200. package/dist/vendor/codali/runtime/RunLogQuery.d.ts.map +1 -0
  201. package/dist/vendor/codali/runtime/RunLogQuery.js +36 -0
  202. package/dist/vendor/codali/runtime/RunLogReader.d.ts +19 -0
  203. package/dist/vendor/codali/runtime/RunLogReader.d.ts.map +1 -0
  204. package/dist/vendor/codali/runtime/RunLogReader.js +361 -0
  205. package/dist/vendor/codali/runtime/RunLogger.d.ts +71 -0
  206. package/dist/vendor/codali/runtime/RunLogger.d.ts.map +1 -0
  207. package/dist/vendor/codali/runtime/RunLogger.js +100 -0
  208. package/dist/vendor/codali/runtime/RunTelemetryTypes.d.ts +117 -0
  209. package/dist/vendor/codali/runtime/RunTelemetryTypes.d.ts.map +1 -0
  210. package/dist/vendor/codali/runtime/RunTelemetryTypes.js +299 -0
  211. package/dist/vendor/codali/runtime/Runner.d.ts +66 -0
  212. package/dist/vendor/codali/runtime/Runner.d.ts.map +1 -0
  213. package/dist/vendor/codali/runtime/Runner.js +215 -0
  214. package/dist/vendor/codali/runtime/StoragePaths.d.ts +3 -0
  215. package/dist/vendor/codali/runtime/StoragePaths.d.ts.map +1 -0
  216. package/dist/vendor/codali/runtime/StoragePaths.js +19 -0
  217. package/dist/vendor/codali/runtime/WorkspaceLock.d.ts +30 -0
  218. package/dist/vendor/codali/runtime/WorkspaceLock.d.ts.map +1 -0
  219. package/dist/vendor/codali/runtime/WorkspaceLock.js +141 -0
  220. package/dist/vendor/codali/session/InstructionLoader.d.ts +14 -0
  221. package/dist/vendor/codali/session/InstructionLoader.d.ts.map +1 -0
  222. package/dist/vendor/codali/session/InstructionLoader.js +107 -0
  223. package/dist/vendor/codali/session/SessionStore.d.ts +81 -0
  224. package/dist/vendor/codali/session/SessionStore.d.ts.map +1 -0
  225. package/dist/vendor/codali/session/SessionStore.js +244 -0
  226. package/dist/vendor/codali/subagents/SubagentOrchestrator.d.ts +68 -0
  227. package/dist/vendor/codali/subagents/SubagentOrchestrator.d.ts.map +1 -0
  228. package/dist/vendor/codali/subagents/SubagentOrchestrator.js +150 -0
  229. package/dist/vendor/codali/tools/ToolRegistry.d.ts +9 -0
  230. package/dist/vendor/codali/tools/ToolRegistry.d.ts.map +1 -0
  231. package/dist/vendor/codali/tools/ToolRegistry.js +263 -0
  232. package/dist/vendor/codali/tools/ToolTypes.d.ts +66 -0
  233. package/dist/vendor/codali/tools/ToolTypes.d.ts.map +1 -0
  234. package/dist/vendor/codali/tools/ToolTypes.js +32 -0
  235. package/dist/vendor/codali/tools/diff/DiffTool.d.ts +3 -0
  236. package/dist/vendor/codali/tools/diff/DiffTool.d.ts.map +1 -0
  237. package/dist/vendor/codali/tools/diff/DiffTool.js +34 -0
  238. package/dist/vendor/codali/tools/docdex/DocdexTools.d.ts +4 -0
  239. package/dist/vendor/codali/tools/docdex/DocdexTools.d.ts.map +1 -0
  240. package/dist/vendor/codali/tools/docdex/DocdexTools.js +453 -0
  241. package/dist/vendor/codali/tools/filesystem/FileTools.d.ts +3 -0
  242. package/dist/vendor/codali/tools/filesystem/FileTools.d.ts.map +1 -0
  243. package/dist/vendor/codali/tools/filesystem/FileTools.js +141 -0
  244. package/dist/vendor/codali/tools/search/SearchTool.d.ts +3 -0
  245. package/dist/vendor/codali/tools/search/SearchTool.d.ts.map +1 -0
  246. package/dist/vendor/codali/tools/search/SearchTool.js +46 -0
  247. package/dist/vendor/codali/tools/shell/ShellTool.d.ts +3 -0
  248. package/dist/vendor/codali/tools/shell/ShellTool.d.ts.map +1 -0
  249. package/dist/vendor/codali/tools/shell/ShellTool.js +104 -0
  250. package/package.json +5 -3
@@ -0,0 +1,28 @@
1
+ export interface RateMetric {
2
+ numerator: number;
3
+ denominator: number;
4
+ missing: number;
5
+ value: number | null;
6
+ }
7
+ export interface PercentileMetric {
8
+ sample_size: number;
9
+ missing: number;
10
+ median: number | null;
11
+ p95: number | null;
12
+ }
13
+ export interface EvalMetrics {
14
+ schema_version: 1;
15
+ generated_at: string;
16
+ task_count: number;
17
+ m001_task_success_rate: RateMetric;
18
+ m002_first_pass_success_rate: RateMetric;
19
+ m003_patch_apply_success_rate: RateMetric;
20
+ m004_verification_pass_rate: RateMetric;
21
+ m005_hallucination_rate: RateMetric;
22
+ m006_scope_violation_rate: RateMetric;
23
+ m007_latency_ms: PercentileMetric;
24
+ m008_success_tokens: PercentileMetric;
25
+ m008_success_cost_usd: PercentileMetric;
26
+ }
27
+ export type EvalMetricKey = "m001_task_success_rate" | "m002_first_pass_success_rate" | "m003_patch_apply_success_rate" | "m004_verification_pass_rate" | "m005_hallucination_rate" | "m006_scope_violation_rate" | "m007_latency_ms.median" | "m007_latency_ms.p95" | "m008_success_tokens.median" | "m008_success_tokens.p95" | "m008_success_cost_usd.median" | "m008_success_cost_usd.p95";
28
+ //# sourceMappingURL=MetricTypes.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MetricTypes.d.ts","sourceRoot":"","sources":["../../src/eval/MetricTypes.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,UAAU;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,cAAc,EAAE,CAAC,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,sBAAsB,EAAE,UAAU,CAAC;IACnC,4BAA4B,EAAE,UAAU,CAAC;IACzC,6BAA6B,EAAE,UAAU,CAAC;IAC1C,2BAA2B,EAAE,UAAU,CAAC;IACxC,uBAAuB,EAAE,UAAU,CAAC;IACpC,yBAAyB,EAAE,UAAU,CAAC;IACtC,eAAe,EAAE,gBAAgB,CAAC;IAClC,mBAAmB,EAAE,gBAAgB,CAAC;IACtC,qBAAqB,EAAE,gBAAgB,CAAC;CACzC;AAED,MAAM,MAAM,aAAa,GACrB,wBAAwB,GACxB,8BAA8B,GAC9B,+BAA+B,GAC/B,6BAA6B,GAC7B,yBAAyB,GACzB,2BAA2B,GAC3B,wBAAwB,GACxB,qBAAqB,GACrB,4BAA4B,GAC5B,yBAAyB,GACzB,8BAA8B,GAC9B,2BAA2B,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,4 @@
1
+ import type { EvalMetrics } from "./MetricTypes.js";
2
+ import type { EvalRunResult } from "./EvalRunner.js";
3
+ export declare const aggregateMetrics: (run: EvalRunResult) => EvalMetrics;
4
+ //# sourceMappingURL=MetricsAggregator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MetricsAggregator.d.ts","sourceRoot":"","sources":["../../src/eval/MetricsAggregator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAgC,MAAM,kBAAkB,CAAC;AAClF,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AA0ErD,eAAO,MAAM,gBAAgB,GAAI,KAAK,aAAa,KAAG,WA0BrD,CAAC"}
@@ -0,0 +1,97 @@
1
+ const computeRate = (samples) => {
2
+ let numerator = 0;
3
+ let denominator = 0;
4
+ let missing = 0;
5
+ for (const sample of samples) {
6
+ if (sample === null) {
7
+ missing += 1;
8
+ continue;
9
+ }
10
+ denominator += 1;
11
+ if (sample)
12
+ numerator += 1;
13
+ }
14
+ return {
15
+ numerator,
16
+ denominator,
17
+ missing,
18
+ value: denominator > 0 ? numerator / denominator : null,
19
+ };
20
+ };
21
+ const percentile = (values, fraction) => {
22
+ if (values.length === 0)
23
+ return null;
24
+ const sorted = [...values].sort((left, right) => left - right);
25
+ if (sorted.length === 1)
26
+ return sorted[0];
27
+ const index = (sorted.length - 1) * fraction;
28
+ const lower = Math.floor(index);
29
+ const upper = Math.ceil(index);
30
+ const lowerValue = sorted[lower];
31
+ const upperValue = sorted[upper];
32
+ if (lower === upper)
33
+ return lowerValue;
34
+ const weight = index - lower;
35
+ return lowerValue + (upperValue - lowerValue) * weight;
36
+ };
37
+ const computePercentiles = (samples) => {
38
+ const values = samples.filter((entry) => entry !== null);
39
+ return {
40
+ sample_size: values.length,
41
+ missing: samples.length - values.length,
42
+ median: percentile(values, 0.5),
43
+ p95: percentile(values, 0.95),
44
+ };
45
+ };
46
+ const toFirstPassSample = (result) => {
47
+ if (!result.task_passed)
48
+ return false;
49
+ if (result.first_pass === null)
50
+ return null;
51
+ return result.first_pass;
52
+ };
53
+ const toPatchApplySample = (result) => {
54
+ if (result.patch_apply_success !== null)
55
+ return result.patch_apply_success;
56
+ const hasPatchExpectation = result.assertion_results.some((assertion) => assertion.code === "assert_expect_patch_apply");
57
+ if (!hasPatchExpectation)
58
+ return null;
59
+ return false;
60
+ };
61
+ const toVerificationSample = (result) => result.verification_passed;
62
+ const toHallucinationSample = (result) => {
63
+ if (result.hallucination_detected === null)
64
+ return null;
65
+ return result.hallucination_detected;
66
+ };
67
+ const toScopeViolationSample = (result) => {
68
+ if (result.scope_violation_detected === null)
69
+ return null;
70
+ return result.scope_violation_detected;
71
+ };
72
+ export const aggregateMetrics = (run) => {
73
+ const taskSuccess = run.task_results.map((result) => result.task_passed);
74
+ const firstPass = run.task_results.map((result) => toFirstPassSample(result));
75
+ const patchApply = run.task_results.map((result) => toPatchApplySample(result));
76
+ const verificationPass = run.task_results.map((result) => toVerificationSample(result));
77
+ const hallucinationRate = run.task_results.map((result) => toHallucinationSample(result));
78
+ const scopeViolationRate = run.task_results.map((result) => toScopeViolationSample(result));
79
+ const latency = run.task_results.map((result) => result.latency_ms);
80
+ const successfulTasks = run.task_results.filter((result) => result.task_passed);
81
+ const successfulTokens = successfulTasks.map((result) => result.tokens_used);
82
+ const successfulCost = successfulTasks.map((result) => result.cost_usd);
83
+ return {
84
+ schema_version: 1,
85
+ generated_at: new Date().toISOString(),
86
+ task_count: run.summary.total,
87
+ m001_task_success_rate: computeRate(taskSuccess.map((value) => value)),
88
+ m002_first_pass_success_rate: computeRate(firstPass),
89
+ m003_patch_apply_success_rate: computeRate(patchApply),
90
+ m004_verification_pass_rate: computeRate(verificationPass),
91
+ m005_hallucination_rate: computeRate(hallucinationRate),
92
+ m006_scope_violation_rate: computeRate(scopeViolationRate),
93
+ m007_latency_ms: computePercentiles(latency),
94
+ m008_success_tokens: computePercentiles(successfulTokens),
95
+ m008_success_cost_usd: computePercentiles(successfulCost),
96
+ };
97
+ };
@@ -0,0 +1,29 @@
1
+ import type { EvalMetricKey, EvalMetrics } from "./MetricTypes.js";
2
+ export interface EvalMetricDelta {
3
+ key: EvalMetricKey;
4
+ unit: "ratio" | "ms" | "tokens" | "usd";
5
+ higher_is_better: boolean;
6
+ baseline: number | null;
7
+ current: number | null;
8
+ delta: number | null;
9
+ direction: "up" | "down" | "flat" | "unknown";
10
+ regression: boolean;
11
+ improved: boolean;
12
+ }
13
+ export interface EvalRegressionComparison {
14
+ schema_version: 1;
15
+ status: "baseline_missing" | "compared";
16
+ baseline_report_id?: string;
17
+ baseline_created_at?: string;
18
+ deltas: EvalMetricDelta[];
19
+ regression_count: number;
20
+ improved_count: number;
21
+ unchanged_count: number;
22
+ }
23
+ export declare const compareAgainstBaseline: (params: {
24
+ current: EvalMetrics;
25
+ baseline?: EvalMetrics;
26
+ baseline_report_id?: string;
27
+ baseline_created_at?: string;
28
+ }) => EvalRegressionComparison;
29
+ //# sourceMappingURL=RegressionComparator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"RegressionComparator.d.ts","sourceRoot":"","sources":["../../src/eval/RegressionComparator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEnE,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,aAAa,CAAC;IACnB,IAAI,EAAE,OAAO,GAAG,IAAI,GAAG,QAAQ,GAAG,KAAK,CAAC;IACxC,gBAAgB,EAAE,OAAO,CAAC;IAC1B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,SAAS,EAAE,IAAI,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;IAC9C,UAAU,EAAE,OAAO,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,wBAAwB;IACvC,cAAc,EAAE,CAAC,CAAC;IAClB,MAAM,EAAE,kBAAkB,GAAG,UAAU,CAAC;IACxC,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;CACzB;AA0HD,eAAO,MAAM,sBAAsB,GAAI,QAAQ;IAC7C,OAAO,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B,KAAG,wBAiDH,CAAC"}
@@ -0,0 +1,155 @@
1
+ const EPSILON = 1e-12;
2
+ const flattenMetrics = (metrics) => [
3
+ {
4
+ key: "m001_task_success_rate",
5
+ value: metrics.m001_task_success_rate.value,
6
+ unit: "ratio",
7
+ higher_is_better: true,
8
+ },
9
+ {
10
+ key: "m002_first_pass_success_rate",
11
+ value: metrics.m002_first_pass_success_rate.value,
12
+ unit: "ratio",
13
+ higher_is_better: true,
14
+ },
15
+ {
16
+ key: "m003_patch_apply_success_rate",
17
+ value: metrics.m003_patch_apply_success_rate.value,
18
+ unit: "ratio",
19
+ higher_is_better: true,
20
+ },
21
+ {
22
+ key: "m004_verification_pass_rate",
23
+ value: metrics.m004_verification_pass_rate.value,
24
+ unit: "ratio",
25
+ higher_is_better: true,
26
+ },
27
+ {
28
+ key: "m005_hallucination_rate",
29
+ value: metrics.m005_hallucination_rate.value,
30
+ unit: "ratio",
31
+ higher_is_better: false,
32
+ },
33
+ {
34
+ key: "m006_scope_violation_rate",
35
+ value: metrics.m006_scope_violation_rate.value,
36
+ unit: "ratio",
37
+ higher_is_better: false,
38
+ },
39
+ {
40
+ key: "m007_latency_ms.median",
41
+ value: metrics.m007_latency_ms.median,
42
+ unit: "ms",
43
+ higher_is_better: false,
44
+ },
45
+ {
46
+ key: "m007_latency_ms.p95",
47
+ value: metrics.m007_latency_ms.p95,
48
+ unit: "ms",
49
+ higher_is_better: false,
50
+ },
51
+ {
52
+ key: "m008_success_tokens.median",
53
+ value: metrics.m008_success_tokens.median,
54
+ unit: "tokens",
55
+ higher_is_better: false,
56
+ },
57
+ {
58
+ key: "m008_success_tokens.p95",
59
+ value: metrics.m008_success_tokens.p95,
60
+ unit: "tokens",
61
+ higher_is_better: false,
62
+ },
63
+ {
64
+ key: "m008_success_cost_usd.median",
65
+ value: metrics.m008_success_cost_usd.median,
66
+ unit: "usd",
67
+ higher_is_better: false,
68
+ },
69
+ {
70
+ key: "m008_success_cost_usd.p95",
71
+ value: metrics.m008_success_cost_usd.p95,
72
+ unit: "usd",
73
+ higher_is_better: false,
74
+ },
75
+ ];
76
+ const compareMetric = (current, baseline) => {
77
+ if (current.value === null || baseline.value === null) {
78
+ return {
79
+ key: current.key,
80
+ unit: current.unit,
81
+ higher_is_better: current.higher_is_better,
82
+ baseline: baseline.value,
83
+ current: current.value,
84
+ delta: null,
85
+ direction: "unknown",
86
+ regression: false,
87
+ improved: false,
88
+ };
89
+ }
90
+ const delta = current.value - baseline.value;
91
+ const direction = Math.abs(delta) <= EPSILON ? "flat" : (delta > 0 ? "up" : "down");
92
+ const regression = current.higher_is_better ? delta < -EPSILON : delta > EPSILON;
93
+ const improved = current.higher_is_better ? delta > EPSILON : delta < -EPSILON;
94
+ return {
95
+ key: current.key,
96
+ unit: current.unit,
97
+ higher_is_better: current.higher_is_better,
98
+ baseline: baseline.value,
99
+ current: current.value,
100
+ delta: Math.abs(delta) <= EPSILON ? 0 : delta,
101
+ direction,
102
+ regression,
103
+ improved,
104
+ };
105
+ };
106
+ export const compareAgainstBaseline = (params) => {
107
+ const currentEntries = flattenMetrics(params.current);
108
+ if (!params.baseline) {
109
+ return {
110
+ schema_version: 1,
111
+ status: "baseline_missing",
112
+ baseline_report_id: params.baseline_report_id,
113
+ baseline_created_at: params.baseline_created_at,
114
+ deltas: currentEntries.map((entry) => ({
115
+ key: entry.key,
116
+ unit: entry.unit,
117
+ higher_is_better: entry.higher_is_better,
118
+ baseline: null,
119
+ current: entry.value,
120
+ delta: null,
121
+ direction: "unknown",
122
+ regression: false,
123
+ improved: false,
124
+ })),
125
+ regression_count: 0,
126
+ improved_count: 0,
127
+ unchanged_count: currentEntries.length,
128
+ };
129
+ }
130
+ const baselineEntries = flattenMetrics(params.baseline);
131
+ const baselineByKey = new Map();
132
+ for (const entry of baselineEntries) {
133
+ baselineByKey.set(entry.key, entry);
134
+ }
135
+ const deltas = [];
136
+ for (const entry of currentEntries) {
137
+ const baselineEntry = baselineByKey.get(entry.key);
138
+ if (!baselineEntry)
139
+ continue;
140
+ deltas.push(compareMetric(entry, baselineEntry));
141
+ }
142
+ const regressionCount = deltas.filter((entry) => entry.regression).length;
143
+ const improvedCount = deltas.filter((entry) => entry.improved).length;
144
+ const unchangedCount = deltas.length - regressionCount - improvedCount;
145
+ return {
146
+ schema_version: 1,
147
+ status: "compared",
148
+ baseline_report_id: params.baseline_report_id,
149
+ baseline_created_at: params.baseline_created_at,
150
+ deltas,
151
+ regression_count: regressionCount,
152
+ improved_count: improvedCount,
153
+ unchanged_count: unchangedCount,
154
+ };
155
+ };
@@ -0,0 +1,52 @@
1
+ import type { VerificationOutcome } from "../cognitive/Types.js";
2
+ export type NormalizedRunStatus = "pass" | "fail" | "degraded" | "unknown";
3
+ export type NormalizedPhaseStatus = "available" | "missing" | "degraded";
4
+ export interface NormalizedArtifactReference {
5
+ phase: string;
6
+ kind: string;
7
+ status: "present" | "missing";
8
+ path: string | null;
9
+ reason_code: string | null;
10
+ }
11
+ export interface NormalizedPhaseOutcome {
12
+ phase: string;
13
+ status: NormalizedPhaseStatus;
14
+ duration_ms: number | null;
15
+ provider: string | null;
16
+ model: string | null;
17
+ input_tokens: number | null;
18
+ output_tokens: number | null;
19
+ total_tokens: number | null;
20
+ cost_usd: number | null;
21
+ cost_source: string | null;
22
+ missing_usage_reason: string | null;
23
+ missing_cost_reason: string | null;
24
+ }
25
+ export interface NormalizedRunRecord {
26
+ schema_version: 1;
27
+ run_id: string | null;
28
+ task_id: string | null;
29
+ fingerprint: string | null;
30
+ duration_ms: number | null;
31
+ final_status: NormalizedRunStatus;
32
+ failure_class: string | null;
33
+ reason_codes: string[];
34
+ retryable: boolean | null;
35
+ verification_outcome: VerificationOutcome | null;
36
+ touched_files: string[];
37
+ artifact_references: NormalizedArtifactReference[];
38
+ missing_artifacts: string[];
39
+ phase_outcomes: NormalizedPhaseOutcome[];
40
+ usage_tokens_total: number | null;
41
+ cost_usd: number | null;
42
+ missing_data_markers: string[];
43
+ }
44
+ export interface AdaptRunSummaryInput {
45
+ runSummary?: unknown;
46
+ runId?: string;
47
+ taskId?: string;
48
+ verificationOutcome?: VerificationOutcome | null;
49
+ touchedFiles?: string[];
50
+ }
51
+ export declare const adaptRunSummaryForReport: (input?: AdaptRunSummaryInput) => NormalizedRunRecord;
52
+ //# sourceMappingURL=ReportInputAdapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ReportInputAdapter.d.ts","sourceRoot":"","sources":["../../src/eval/ReportInputAdapter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAEjE,MAAM,MAAM,mBAAmB,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,GAAG,SAAS,CAAC;AAC3E,MAAM,MAAM,qBAAqB,GAAG,WAAW,GAAG,SAAS,GAAG,UAAU,CAAC;AAEzE,MAAM,WAAW,2BAA2B;IAC1C,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED,MAAM,WAAW,sBAAsB;IACrC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,qBAAqB,CAAC;IAC9B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,oBAAoB,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC;AAED,MAAM,WAAW,mBAAmB;IAClC,cAAc,EAAE,CAAC,CAAC;IAClB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,YAAY,EAAE,mBAAmB,CAAC;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,EAAE,OAAO,GAAG,IAAI,CAAC;IAC1B,oBAAoB,EAAE,mBAAmB,GAAG,IAAI,CAAC;IACjD,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,mBAAmB,EAAE,2BAA2B,EAAE,CAAC;IACnD,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,cAAc,EAAE,sBAAsB,EAAE,CAAC;IACzC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,oBAAoB,EAAE,MAAM,EAAE,CAAC;CAChC;AAED,MAAM,WAAW,oBAAoB;IACnC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mBAAmB,CAAC,EAAE,mBAAmB,GAAG,IAAI,CAAC;IACjD,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAmMD,eAAO,MAAM,wBAAwB,GAAI,QAAO,oBAAyB,KAAG,mBAsE3E,CAAC"}
@@ -0,0 +1,229 @@
1
+ const asRecord = (value) => {
2
+ if (!value || typeof value !== "object" || Array.isArray(value))
3
+ return undefined;
4
+ return value;
5
+ };
6
+ const asString = (value) => {
7
+ if (typeof value !== "string")
8
+ return undefined;
9
+ const trimmed = value.trim();
10
+ return trimmed.length ? trimmed : undefined;
11
+ };
12
+ const asBoolean = (value) => {
13
+ if (typeof value === "boolean")
14
+ return value;
15
+ if (value === null)
16
+ return null;
17
+ return null;
18
+ };
19
+ const asNumber = (value) => {
20
+ if (typeof value !== "number" || !Number.isFinite(value))
21
+ return null;
22
+ return value;
23
+ };
24
+ const uniqueSortedStrings = (value) => {
25
+ if (!Array.isArray(value))
26
+ return [];
27
+ return Array.from(new Set(value
28
+ .filter((entry) => typeof entry === "string")
29
+ .map((entry) => entry.trim())
30
+ .filter((entry) => entry.length > 0))).sort((left, right) => left.localeCompare(right));
31
+ };
32
+ const normalizeRunStatus = (value) => {
33
+ if (value === "pass" || value === "fail" || value === "degraded")
34
+ return value;
35
+ return "unknown";
36
+ };
37
+ const normalizePhaseStatus = (value) => {
38
+ if (value === "available" || value === "missing" || value === "degraded")
39
+ return value;
40
+ return "missing";
41
+ };
42
+ const normalizeVerificationOutcome = (value) => {
43
+ if (value === "verified_passed"
44
+ || value === "verified_failed"
45
+ || value === "unverified_with_reason") {
46
+ return value;
47
+ }
48
+ return null;
49
+ };
50
+ const normalizeArtifactReferences = (value) => {
51
+ if (!Array.isArray(value))
52
+ return [];
53
+ const results = [];
54
+ for (const entry of value) {
55
+ const record = asRecord(entry);
56
+ if (!record)
57
+ continue;
58
+ const phase = asString(record.phase);
59
+ const kind = asString(record.kind);
60
+ if (!phase || !kind)
61
+ continue;
62
+ results.push({
63
+ phase,
64
+ kind,
65
+ status: record.status === "missing" ? "missing" : "present",
66
+ path: asString(record.path) ?? null,
67
+ reason_code: asString(record.reason_code) ?? null,
68
+ });
69
+ }
70
+ return results.sort((left, right) => `${left.phase}:${left.kind}:${left.path ?? ""}`.localeCompare(`${right.phase}:${right.kind}:${right.path ?? ""}`));
71
+ };
72
+ const normalizePhaseTelemetry = (value) => {
73
+ if (!Array.isArray(value))
74
+ return [];
75
+ const sources = [];
76
+ for (const entry of value) {
77
+ const record = asRecord(entry);
78
+ if (!record)
79
+ continue;
80
+ const phase = asString(record.phase);
81
+ if (!phase)
82
+ continue;
83
+ const usage = asRecord(record.usage);
84
+ const cost = asRecord(record.cost);
85
+ const inputTokens = asNumber(usage?.input_tokens);
86
+ const outputTokens = asNumber(usage?.output_tokens);
87
+ const totalTokens = asNumber(usage?.total_tokens)
88
+ ?? (inputTokens !== null || outputTokens !== null
89
+ ? (inputTokens ?? 0) + (outputTokens ?? 0)
90
+ : null);
91
+ sources.push({
92
+ phase,
93
+ duration_ms: asNumber(record.duration_ms),
94
+ provider: asString(record.provider) ?? null,
95
+ model: asString(record.model) ?? null,
96
+ input_tokens: inputTokens,
97
+ output_tokens: outputTokens,
98
+ total_tokens: totalTokens,
99
+ cost_usd: asNumber(cost?.usd),
100
+ cost_source: asString(cost?.source) ?? null,
101
+ missing_usage_reason: asString(record.missing_usage_reason) ?? null,
102
+ missing_cost_reason: asString(record.missing_cost_reason) ?? null,
103
+ });
104
+ }
105
+ return sources.sort((left, right) => left.phase.localeCompare(right.phase));
106
+ };
107
+ const phaseKeyToSummaryPhase = (key) => {
108
+ if (key === "plan")
109
+ return "plan";
110
+ if (key === "retrieval")
111
+ return "retrieve";
112
+ if (key === "patch")
113
+ return "act";
114
+ if (key === "verification")
115
+ return "verify";
116
+ return undefined;
117
+ };
118
+ const buildPhaseOutcomes = (runSummary, telemetry) => {
119
+ const quality = asRecord(runSummary.quality_dimensions);
120
+ const phaseStatus = new Map();
121
+ if (quality) {
122
+ for (const [key, rawValue] of Object.entries(quality)) {
123
+ const phase = phaseKeyToSummaryPhase(key);
124
+ if (!phase)
125
+ continue;
126
+ phaseStatus.set(phase, normalizePhaseStatus(rawValue));
127
+ }
128
+ }
129
+ for (const entry of telemetry) {
130
+ if (!phaseStatus.has(entry.phase)) {
131
+ phaseStatus.set(entry.phase, "missing");
132
+ }
133
+ }
134
+ if (phaseStatus.size === 0) {
135
+ for (const phase of ["plan", "retrieve", "act", "verify"]) {
136
+ phaseStatus.set(phase, "missing");
137
+ }
138
+ }
139
+ const telemetryByPhase = new Map();
140
+ for (const entry of telemetry) {
141
+ telemetryByPhase.set(entry.phase, entry);
142
+ }
143
+ const outcomes = [];
144
+ for (const [phase, status] of phaseStatus.entries()) {
145
+ const source = telemetryByPhase.get(phase);
146
+ outcomes.push({
147
+ phase,
148
+ status,
149
+ duration_ms: source?.duration_ms ?? null,
150
+ provider: source?.provider ?? null,
151
+ model: source?.model ?? null,
152
+ input_tokens: source?.input_tokens ?? null,
153
+ output_tokens: source?.output_tokens ?? null,
154
+ total_tokens: source?.total_tokens ?? null,
155
+ cost_usd: source?.cost_usd ?? null,
156
+ cost_source: source?.cost_source ?? null,
157
+ missing_usage_reason: source?.missing_usage_reason ?? null,
158
+ missing_cost_reason: source?.missing_cost_reason ?? null,
159
+ });
160
+ }
161
+ return outcomes.sort((left, right) => left.phase.localeCompare(right.phase));
162
+ };
163
+ const sumNullable = (values) => {
164
+ const present = values.filter((entry) => entry !== null);
165
+ if (!present.length)
166
+ return null;
167
+ return present.reduce((sum, value) => sum + value, 0);
168
+ };
169
+ export const adaptRunSummaryForReport = (input = {}) => {
170
+ const runSummary = asRecord(input.runSummary);
171
+ const finalDisposition = asRecord(runSummary?.final_disposition);
172
+ const artifactReferences = normalizeArtifactReferences(runSummary?.artifact_references);
173
+ const phaseTelemetry = normalizePhaseTelemetry(runSummary?.phase_telemetry);
174
+ const phaseOutcomes = buildPhaseOutcomes(runSummary ?? {}, phaseTelemetry);
175
+ const topLevelUsage = asRecord(runSummary?.usage);
176
+ const topLevelTotalTokens = asNumber(topLevelUsage?.totalTokens)
177
+ ?? (asNumber(topLevelUsage?.inputTokens) !== null || asNumber(topLevelUsage?.outputTokens) !== null
178
+ ? (asNumber(topLevelUsage?.inputTokens) ?? 0) + (asNumber(topLevelUsage?.outputTokens) ?? 0)
179
+ : null);
180
+ const usageTokensTotal = topLevelTotalTokens ?? sumNullable(phaseOutcomes.map((phase) => phase.total_tokens));
181
+ const topLevelCost = asNumber(runSummary?.actualCost);
182
+ const phaseCost = sumNullable(phaseOutcomes.map((phase) => phase.cost_usd));
183
+ const costUsd = topLevelCost ?? phaseCost;
184
+ const missingArtifacts = uniqueSortedStrings(runSummary?.missing_artifacts
185
+ ?? artifactReferences
186
+ .filter((entry) => entry.status === "missing")
187
+ .map((entry) => `${entry.phase}:${entry.kind}`));
188
+ const verificationRecord = asRecord(runSummary?.verification);
189
+ const verificationOutcome = normalizeVerificationOutcome(input.verificationOutcome)
190
+ ?? normalizeVerificationOutcome(verificationRecord?.outcome);
191
+ const markers = new Set();
192
+ if (!runSummary)
193
+ markers.add("run_summary_missing");
194
+ if (!asString(runSummary?.run_id ?? runSummary?.runId ?? input.runId))
195
+ markers.add("run_id_missing");
196
+ if (!finalDisposition)
197
+ markers.add("final_disposition_missing");
198
+ if (!phaseTelemetry.length)
199
+ markers.add("phase_telemetry_missing");
200
+ if (verificationOutcome === null)
201
+ markers.add("verification_outcome_missing");
202
+ if (usageTokensTotal === null)
203
+ markers.add("usage_tokens_missing");
204
+ if (costUsd === null)
205
+ markers.add("cost_missing");
206
+ const touchedFiles = Array.from(new Set([
207
+ ...uniqueSortedStrings(runSummary?.touchedFiles),
208
+ ...(Array.isArray(input.touchedFiles) ? input.touchedFiles : []),
209
+ ])).sort((left, right) => left.localeCompare(right));
210
+ return {
211
+ schema_version: 1,
212
+ run_id: asString(runSummary?.run_id ?? runSummary?.runId ?? input.runId) ?? null,
213
+ task_id: asString(runSummary?.task_id ?? runSummary?.taskId ?? input.taskId) ?? null,
214
+ fingerprint: asString(runSummary?.fingerprint) ?? null,
215
+ duration_ms: asNumber(runSummary?.durationMs),
216
+ final_status: normalizeRunStatus(finalDisposition?.status),
217
+ failure_class: asString(finalDisposition?.failure_class ?? finalDisposition?.failureClass) ?? null,
218
+ reason_codes: uniqueSortedStrings(finalDisposition?.reason_codes ?? finalDisposition?.reasons),
219
+ retryable: asBoolean(finalDisposition?.retryable),
220
+ verification_outcome: verificationOutcome,
221
+ touched_files: touchedFiles,
222
+ artifact_references: artifactReferences,
223
+ missing_artifacts: missingArtifacts,
224
+ phase_outcomes: phaseOutcomes,
225
+ usage_tokens_total: usageTokensTotal,
226
+ cost_usd: costUsd,
227
+ missing_data_markers: Array.from(markers).sort((left, right) => left.localeCompare(right)),
228
+ };
229
+ };
@@ -0,0 +1,32 @@
1
+ import type { EvalGateResult } from "./GateEvaluator.js";
2
+ import type { EvalMetrics } from "./MetricTypes.js";
3
+ import type { EvalRegressionComparison } from "./RegressionComparator.js";
4
+ import type { EvalRunResult } from "./EvalRunner.js";
5
+ export interface EvalReport {
6
+ schema_version: 1;
7
+ report_id: string;
8
+ created_at: string;
9
+ suite: {
10
+ suite_id: string;
11
+ suite_name: string;
12
+ suite_path: string;
13
+ suite_fingerprint: string;
14
+ task_count: number;
15
+ };
16
+ summary: {
17
+ exit_code: number;
18
+ passed: boolean;
19
+ gate_passed: boolean;
20
+ task_total: number;
21
+ task_passed: number;
22
+ task_failed: number;
23
+ execution_errors: number;
24
+ };
25
+ run: EvalRunResult;
26
+ metrics: EvalMetrics;
27
+ regression: EvalRegressionComparison;
28
+ gates: EvalGateResult;
29
+ }
30
+ export declare const serializeEvalReport: (report: EvalReport, pretty?: boolean) => string;
31
+ export declare const parseEvalReport: (content: string) => EvalReport;
32
+ //# sourceMappingURL=ReportSerializer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ReportSerializer.d.ts","sourceRoot":"","sources":["../../src/eval/ReportSerializer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,KAAK,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,UAAU;IACzB,cAAc,EAAE,CAAC,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE;QACL,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC;IACF,OAAO,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,OAAO,CAAC;QAChB,WAAW,EAAE,OAAO,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IACF,GAAG,EAAE,aAAa,CAAC;IACnB,OAAO,EAAE,WAAW,CAAC;IACrB,UAAU,EAAE,wBAAwB,CAAC;IACrC,KAAK,EAAE,cAAc,CAAC;CACvB;AAOD,eAAO,MAAM,mBAAmB,GAAI,QAAQ,UAAU,EAAE,gBAAa,KAAG,MAKvE,CAAC;AAEF,eAAO,MAAM,eAAe,GAAI,SAAS,MAAM,KAAG,UAkBjD,CAAC"}