@netlify/axis 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. package/README.md +977 -0
  2. package/dist/adapters/base/acp-adapter.d.ts +44 -0
  3. package/dist/adapters/base/acp-adapter.d.ts.map +1 -0
  4. package/dist/adapters/base/acp-adapter.js +559 -0
  5. package/dist/adapters/base/acp-adapter.js.map +1 -0
  6. package/dist/adapters/base/agent-adapter.d.ts +132 -0
  7. package/dist/adapters/base/agent-adapter.d.ts.map +1 -0
  8. package/dist/adapters/base/agent-adapter.js +212 -0
  9. package/dist/adapters/base/agent-adapter.js.map +1 -0
  10. package/dist/adapters/claude-code.d.ts +3 -0
  11. package/dist/adapters/claude-code.d.ts.map +1 -0
  12. package/dist/adapters/claude-code.js +138 -0
  13. package/dist/adapters/claude-code.js.map +1 -0
  14. package/dist/adapters/claude-sdk.d.ts +11 -0
  15. package/dist/adapters/claude-sdk.d.ts.map +1 -0
  16. package/dist/adapters/claude-sdk.js +46 -0
  17. package/dist/adapters/claude-sdk.js.map +1 -0
  18. package/dist/adapters/codex.d.ts +3 -0
  19. package/dist/adapters/codex.d.ts.map +1 -0
  20. package/dist/adapters/codex.js +183 -0
  21. package/dist/adapters/codex.js.map +1 -0
  22. package/dist/adapters/gemini-acp.d.ts +11 -0
  23. package/dist/adapters/gemini-acp.d.ts.map +1 -0
  24. package/dist/adapters/gemini-acp.js +60 -0
  25. package/dist/adapters/gemini-acp.js.map +1 -0
  26. package/dist/adapters/gemini.d.ts +3 -0
  27. package/dist/adapters/gemini.d.ts.map +1 -0
  28. package/dist/adapters/gemini.js +222 -0
  29. package/dist/adapters/gemini.js.map +1 -0
  30. package/dist/adapters/goose.d.ts +3 -0
  31. package/dist/adapters/goose.d.ts.map +1 -0
  32. package/dist/adapters/goose.js +9 -0
  33. package/dist/adapters/goose.js.map +1 -0
  34. package/dist/adapters/registry.d.ts +7 -0
  35. package/dist/adapters/registry.d.ts.map +1 -0
  36. package/dist/adapters/registry.js +37 -0
  37. package/dist/adapters/registry.js.map +1 -0
  38. package/dist/adapters/utils/mcp.d.ts +23 -0
  39. package/dist/adapters/utils/mcp.d.ts.map +1 -0
  40. package/dist/adapters/utils/mcp.js +114 -0
  41. package/dist/adapters/utils/mcp.js.map +1 -0
  42. package/dist/adapters/utils/resolve.d.ts +20 -0
  43. package/dist/adapters/utils/resolve.d.ts.map +1 -0
  44. package/dist/adapters/utils/resolve.js +48 -0
  45. package/dist/adapters/utils/resolve.js.map +1 -0
  46. package/dist/adapters/utils/skills.d.ts +17 -0
  47. package/dist/adapters/utils/skills.d.ts.map +1 -0
  48. package/dist/adapters/utils/skills.js +52 -0
  49. package/dist/adapters/utils/skills.js.map +1 -0
  50. package/dist/adapters/utils/token-estimator.d.ts +21 -0
  51. package/dist/adapters/utils/token-estimator.d.ts.map +1 -0
  52. package/dist/adapters/utils/token-estimator.js +37 -0
  53. package/dist/adapters/utils/token-estimator.js.map +1 -0
  54. package/dist/baselines/diff.d.ts +9 -0
  55. package/dist/baselines/diff.d.ts.map +1 -0
  56. package/dist/baselines/diff.js +83 -0
  57. package/dist/baselines/diff.js.map +1 -0
  58. package/dist/baselines/index.d.ts +3 -0
  59. package/dist/baselines/index.d.ts.map +1 -0
  60. package/dist/baselines/index.js +3 -0
  61. package/dist/baselines/index.js.map +1 -0
  62. package/dist/baselines/store.d.ts +19 -0
  63. package/dist/baselines/store.d.ts.map +1 -0
  64. package/dist/baselines/store.js +104 -0
  65. package/dist/baselines/store.js.map +1 -0
  66. package/dist/cli.d.ts +3 -0
  67. package/dist/cli.d.ts.map +1 -0
  68. package/dist/cli.js +487 -0
  69. package/dist/cli.js.map +1 -0
  70. package/dist/config/loader.d.ts +8 -0
  71. package/dist/config/loader.d.ts.map +1 -0
  72. package/dist/config/loader.js +99 -0
  73. package/dist/config/loader.js.map +1 -0
  74. package/dist/config/validator.d.ts +11 -0
  75. package/dist/config/validator.d.ts.map +1 -0
  76. package/dist/config/validator.js +203 -0
  77. package/dist/config/validator.js.map +1 -0
  78. package/dist/docs-site/_astro/cli.DDWZtG0-.css +1 -0
  79. package/dist/docs-site/cli/index.html +18 -0
  80. package/dist/docs-site/configuration/index.html +121 -0
  81. package/dist/docs-site/content-assets.mjs +1 -0
  82. package/dist/docs-site/content-modules.mjs +1 -0
  83. package/dist/docs-site/data-store.json +9 -0
  84. package/dist/docs-site/index.html +69 -0
  85. package/dist/docs-site/quickstart/index.html +59 -0
  86. package/dist/docs-site/running/index.html +87 -0
  87. package/dist/docs-site/scoring/index.html +135 -0
  88. package/dist/index.d.ts +19 -0
  89. package/dist/index.d.ts.map +1 -0
  90. package/dist/index.js +15 -0
  91. package/dist/index.js.map +1 -0
  92. package/dist/report-ui/index.html +291 -0
  93. package/dist/report-ui/mock-data.json +298 -0
  94. package/dist/reports/html.d.ts +7 -0
  95. package/dist/reports/html.d.ts.map +1 -0
  96. package/dist/reports/html.js +27 -0
  97. package/dist/reports/html.js.map +1 -0
  98. package/dist/reports/reader.d.ts +21 -0
  99. package/dist/reports/reader.d.ts.map +1 -0
  100. package/dist/reports/reader.js +110 -0
  101. package/dist/reports/reader.js.map +1 -0
  102. package/dist/reports/writer.d.ts +14 -0
  103. package/dist/reports/writer.d.ts.map +1 -0
  104. package/dist/reports/writer.js +106 -0
  105. package/dist/reports/writer.js.map +1 -0
  106. package/dist/runner/lifecycle.d.ts +10 -0
  107. package/dist/runner/lifecycle.d.ts.map +1 -0
  108. package/dist/runner/lifecycle.js +58 -0
  109. package/dist/runner/lifecycle.js.map +1 -0
  110. package/dist/runner/runner.d.ts +34 -0
  111. package/dist/runner/runner.d.ts.map +1 -0
  112. package/dist/runner/runner.js +330 -0
  113. package/dist/runner/runner.js.map +1 -0
  114. package/dist/scoring/category-score.d.ts +52 -0
  115. package/dist/scoring/category-score.d.ts.map +1 -0
  116. package/dist/scoring/category-score.js +157 -0
  117. package/dist/scoring/category-score.js.map +1 -0
  118. package/dist/scoring/composite.d.ts +5 -0
  119. package/dist/scoring/composite.d.ts.map +1 -0
  120. package/dist/scoring/composite.js +24 -0
  121. package/dist/scoring/composite.js.map +1 -0
  122. package/dist/scoring/deep-eval.d.ts +25 -0
  123. package/dist/scoring/deep-eval.d.ts.map +1 -0
  124. package/dist/scoring/deep-eval.js +382 -0
  125. package/dist/scoring/deep-eval.js.map +1 -0
  126. package/dist/scoring/goal-achievement.d.ts +5 -0
  127. package/dist/scoring/goal-achievement.d.ts.map +1 -0
  128. package/dist/scoring/goal-achievement.js +241 -0
  129. package/dist/scoring/goal-achievement.js.map +1 -0
  130. package/dist/scoring/index.d.ts +22 -0
  131. package/dist/scoring/index.d.ts.map +1 -0
  132. package/dist/scoring/index.js +115 -0
  133. package/dist/scoring/index.js.map +1 -0
  134. package/dist/scoring/parse-json.d.ts +6 -0
  135. package/dist/scoring/parse-json.d.ts.map +1 -0
  136. package/dist/scoring/parse-json.js +18 -0
  137. package/dist/scoring/parse-json.js.map +1 -0
  138. package/dist/scoring/sparse-index.d.ts +15 -0
  139. package/dist/scoring/sparse-index.d.ts.map +1 -0
  140. package/dist/scoring/sparse-index.js +338 -0
  141. package/dist/scoring/sparse-index.js.map +1 -0
  142. package/dist/scoring/triage.d.ts +15 -0
  143. package/dist/scoring/triage.d.ts.map +1 -0
  144. package/dist/scoring/triage.js +204 -0
  145. package/dist/scoring/triage.js.map +1 -0
  146. package/dist/skills/resolver.d.ts +19 -0
  147. package/dist/skills/resolver.d.ts.map +1 -0
  148. package/dist/skills/resolver.js +95 -0
  149. package/dist/skills/resolver.js.map +1 -0
  150. package/dist/transcript/categorize.d.ts +24 -0
  151. package/dist/transcript/categorize.d.ts.map +1 -0
  152. package/dist/transcript/categorize.js +233 -0
  153. package/dist/transcript/categorize.js.map +1 -0
  154. package/dist/transcript/classify.d.ts +7 -0
  155. package/dist/transcript/classify.d.ts.map +1 -0
  156. package/dist/transcript/classify.js +32 -0
  157. package/dist/transcript/classify.js.map +1 -0
  158. package/dist/transcript/extract.d.ts +24 -0
  159. package/dist/transcript/extract.d.ts.map +1 -0
  160. package/dist/transcript/extract.js +266 -0
  161. package/dist/transcript/extract.js.map +1 -0
  162. package/dist/transcript/index.d.ts +3 -0
  163. package/dist/transcript/index.d.ts.map +1 -0
  164. package/dist/transcript/index.js +2 -0
  165. package/dist/transcript/index.js.map +1 -0
  166. package/dist/transcript/normalize.d.ts +15 -0
  167. package/dist/transcript/normalize.d.ts.map +1 -0
  168. package/dist/transcript/normalize.js +160 -0
  169. package/dist/transcript/normalize.js.map +1 -0
  170. package/dist/transcript/types.d.ts +92 -0
  171. package/dist/transcript/types.d.ts.map +1 -0
  172. package/dist/transcript/types.js +2 -0
  173. package/dist/transcript/types.js.map +1 -0
  174. package/dist/transcript/urls.d.ts +10 -0
  175. package/dist/transcript/urls.d.ts.map +1 -0
  176. package/dist/transcript/urls.js +31 -0
  177. package/dist/transcript/urls.js.map +1 -0
  178. package/dist/types/agent.d.ts +80 -0
  179. package/dist/types/agent.d.ts.map +1 -0
  180. package/dist/types/agent.js +2 -0
  181. package/dist/types/agent.js.map +1 -0
  182. package/dist/types/baseline.d.ts +65 -0
  183. package/dist/types/baseline.d.ts.map +1 -0
  184. package/dist/types/baseline.js +2 -0
  185. package/dist/types/baseline.js.map +1 -0
  186. package/dist/types/config.d.ts +76 -0
  187. package/dist/types/config.d.ts.map +1 -0
  188. package/dist/types/config.js +2 -0
  189. package/dist/types/config.js.map +1 -0
  190. package/dist/types/index.d.ts +8 -0
  191. package/dist/types/index.d.ts.map +1 -0
  192. package/dist/types/index.js +8 -0
  193. package/dist/types/index.js.map +1 -0
  194. package/dist/types/output.d.ts +70 -0
  195. package/dist/types/output.d.ts.map +1 -0
  196. package/dist/types/output.js +15 -0
  197. package/dist/types/output.js.map +1 -0
  198. package/dist/types/report.d.ts +37 -0
  199. package/dist/types/report.d.ts.map +1 -0
  200. package/dist/types/report.js +2 -0
  201. package/dist/types/report.js.map +1 -0
  202. package/dist/types/scenario.d.ts +23 -0
  203. package/dist/types/scenario.d.ts.map +1 -0
  204. package/dist/types/scenario.js +2 -0
  205. package/dist/types/scenario.js.map +1 -0
  206. package/dist/types/scoring.d.ts +176 -0
  207. package/dist/types/scoring.d.ts.map +1 -0
  208. package/dist/types/scoring.js +2 -0
  209. package/dist/types/scoring.js.map +1 -0
  210. package/dist/ui/AnimatedTokens.d.ts +29 -0
  211. package/dist/ui/AnimatedTokens.d.ts.map +1 -0
  212. package/dist/ui/AnimatedTokens.js +53 -0
  213. package/dist/ui/AnimatedTokens.js.map +1 -0
  214. package/dist/ui/App.d.ts +6 -0
  215. package/dist/ui/App.d.ts.map +1 -0
  216. package/dist/ui/App.js +16 -0
  217. package/dist/ui/App.js.map +1 -0
  218. package/dist/ui/LiveDuration.d.ts +20 -0
  219. package/dist/ui/LiveDuration.d.ts.map +1 -0
  220. package/dist/ui/LiveDuration.js +31 -0
  221. package/dist/ui/LiveDuration.js.map +1 -0
  222. package/dist/ui/LiveStatus.d.ts +7 -0
  223. package/dist/ui/LiveStatus.d.ts.map +1 -0
  224. package/dist/ui/LiveStatus.js +52 -0
  225. package/dist/ui/LiveStatus.js.map +1 -0
  226. package/dist/ui/format.d.ts +29 -0
  227. package/dist/ui/format.d.ts.map +1 -0
  228. package/dist/ui/format.js +514 -0
  229. package/dist/ui/format.js.map +1 -0
  230. package/package.json +65 -0
@@ -0,0 +1,31 @@
1
+ /** Match HTTP(S) URLs in arbitrary text. */
2
+ const URL_REGEX = /https?:\/\/[^\s"'<>)\]},]+/gi;
3
+ /**
4
+ * Extract the domain from a URL string. Returns null on parse failure.
5
+ */
6
+ export function extractDomain(url) {
7
+ try {
8
+ return new URL(url).hostname;
9
+ }
10
+ catch {
11
+ return null;
12
+ }
13
+ }
14
+ /**
15
+ * Extract URLs from a string. Returns deduplicated URLs with parsed domains.
16
+ */
17
+ export function extractUrls(text) {
18
+ const matches = text.match(URL_REGEX);
19
+ if (!matches)
20
+ return [];
21
+ const seen = new Set();
22
+ const results = [];
23
+ for (const url of matches) {
24
+ if (seen.has(url))
25
+ continue;
26
+ seen.add(url);
27
+ results.push({ url, domain: extractDomain(url) });
28
+ }
29
+ return results;
30
+ }
31
+ //# sourceMappingURL=urls.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"urls.js","sourceRoot":"","sources":["../../src/transcript/urls.ts"],"names":[],"mappings":"AAEA,4CAA4C;AAC5C,MAAM,SAAS,GAAG,8BAA8B,CAAC;AAEjD;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACtC,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACpD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,80 @@
1
+ import type { Scenario } from "./scenario.js";
2
+ import type { AgentConfig, McpServerConfig, ResolvedSkill } from "./config.js";
3
+ import type { Logger } from "./output.js";
4
+ import type { TranscriptAnalysis } from "../transcript/types.js";
5
+ export interface AgentAdapter {
6
+ readonly name: string;
7
+ run(input: AgentInput): Promise<AgentOutput>;
8
+ /**
9
+ * Returns adapter-specific environment overrides for workspace isolation.
10
+ * Called by the runner and merged into the job env after universal isolation
11
+ * (HOME, env filtering) is applied.
12
+ */
13
+ isolationEnv?(workspace: string): Record<string, string>;
14
+ /**
15
+ * Returns environment variable names required for the adapter to function
16
+ * (e.g. API keys). The runner validates these are present before launching
17
+ * any jobs and fails early with a clear error message.
18
+ */
19
+ requiredEnv?(): string[];
20
+ /**
21
+ * Resolves and validates the CLI binary for this adapter.
22
+ * Called once during runner pre-flight before any jobs run.
23
+ * If the CLI is not globally installed, falls back to npx.
24
+ */
25
+ ensureInstalled?(logger: Logger): Promise<void>;
26
+ }
27
+ export interface AgentInput {
28
+ prompt: string;
29
+ config: AgentConfig;
30
+ scenario: Scenario;
31
+ workingDirectory: string;
32
+ /** Filtered environment variables for the agent process. If omitted, inherits parent env. */
33
+ env?: Record<string, string>;
34
+ /** Register a cleanup function to be called on process signal (SIGINT/SIGTERM). */
35
+ registerCleanup?: (fn: () => void) => void;
36
+ /** When true, adapters capture raw stdout lines in AgentOutput.rawOutput. */
37
+ captureRawOutput?: boolean;
38
+ /** MCP servers to configure for this agent run (from top-level config). */
39
+ mcpServers?: Record<string, McpServerConfig>;
40
+ /** Resolved skills to install for this agent run. */
41
+ resolvedSkills?: ResolvedSkill[];
42
+ /**
43
+ * Invoked by the adapter with a conservative, monotonically-increasing
44
+ * estimate of tokens consumed so far. Used to drive the live UI counter.
45
+ * Estimates are derived from streamed assistant text, intentionally kept
46
+ * below the true count so the UI never has to reverse.
47
+ */
48
+ onTokenProgress?: (estimatedTokens: number) => void;
49
+ }
50
+ export interface AgentOutput {
51
+ transcript: TranscriptEntry[];
52
+ result: string | null;
53
+ metadata: AgentMetadata;
54
+ /** Raw stdout lines from the agent process (populated when captureRawOutput is set). */
55
+ rawOutput?: string[];
56
+ /** Per-entry extracted signals and aggregate analysis. Populated during scoring. */
57
+ transcriptAnalysis?: TranscriptAnalysis;
58
+ }
59
+ export interface AgentMetadata {
60
+ startTime: string;
61
+ endTime: string;
62
+ durationMs: number;
63
+ tokenUsage?: TokenUsage;
64
+ totalCostUsd?: number;
65
+ exitCode: number;
66
+ sessionId?: string;
67
+ /** Human-readable error description when the agent fails. */
68
+ error?: string;
69
+ }
70
+ export interface TokenUsage {
71
+ input: number;
72
+ output: number;
73
+ cacheReadInput?: number;
74
+ }
75
+ export interface TranscriptEntry {
76
+ type: "assistant" | "user" | "tool_use" | "tool_result" | "system" | "error";
77
+ timestamp: string;
78
+ content: Record<string, unknown>;
79
+ }
80
+ //# sourceMappingURL=agent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../src/types/agent.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC/E,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAEjE,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,GAAG,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAC7C;;;;OAIG;IACH,YAAY,CAAC,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzD;;;;OAIG;IACH,WAAW,CAAC,IAAI,MAAM,EAAE,CAAC;IACzB;;;;OAIG;IACH,eAAe,CAAC,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACjD;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,WAAW,CAAC;IACpB,QAAQ,EAAE,QAAQ,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,6FAA6F;IAC7F,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,mFAAmF;IACnF,eAAe,CAAC,EAAE,CAAC,EAAE,EAAE,MAAM,IAAI,KAAK,IAAI,CAAC;IAC3C,6EAA6E;IAC7E,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,2EAA2E;IAC3E,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAC7C,qDAAqD;IACrD,cAAc,CAAC,EAAE,aAAa,EAAE,CAAC;IACjC;;;;;OAKG;IACH,eAAe,CAAC,EAAE,CAAC,eAAe,EAAE,MAAM,KAAK,IAAI,CAAC;CACrD;AAED,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,QAAQ,EAAE,aAAa,CAAC;IACxB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,oFAAoF;IACpF,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CACzC;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,WAAW,GAAG,MAAM,GAAG,UAAU,GAAG,aAAa,GAAG,QAAQ,GAAG,OAAO,CAAC;IAC7E,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAClC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=agent.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent.js","sourceRoot":"","sources":["../../src/types/agent.ts"],"names":[],"mappings":""}
@@ -0,0 +1,65 @@
1
+ /** Snapshot of a single scenario×agent score for baseline comparison. */
2
+ export interface BaselineEntry {
3
+ axisScore: number;
4
+ goalAchievement: number;
5
+ environment: number;
6
+ service: number;
7
+ agent: number;
8
+ durationMs: number;
9
+ tokens: number;
10
+ fromReportId: string;
11
+ timestamp: string;
12
+ }
13
+ /** Scenario key → agent name → baseline entry. */
14
+ export type BaselineResults = Record<string, Record<string, BaselineEntry>>;
15
+ /** A named baseline — accumulated collection of score snapshots. */
16
+ export interface Baseline {
17
+ name: string;
18
+ createdAt: string;
19
+ updatedAt: string;
20
+ results: BaselineResults;
21
+ }
22
+ /** A single row in a baseline diff comparison. */
23
+ export interface BaselineDiffEntry {
24
+ scenarioKey: string;
25
+ agentName: string;
26
+ baseline: number;
27
+ current: number;
28
+ delta: number;
29
+ categories: {
30
+ goalAchievement: {
31
+ baseline: number;
32
+ current: number;
33
+ delta: number;
34
+ };
35
+ environment: {
36
+ baseline: number;
37
+ current: number;
38
+ delta: number;
39
+ };
40
+ service: {
41
+ baseline: number;
42
+ current: number;
43
+ delta: number;
44
+ };
45
+ agent: {
46
+ baseline: number;
47
+ current: number;
48
+ delta: number;
49
+ };
50
+ };
51
+ }
52
+ /** Result of comparing a report against a baseline. */
53
+ export interface BaselineDiff {
54
+ baselineName: string;
55
+ reportId: string;
56
+ entries: BaselineDiffEntry[];
57
+ summary: {
58
+ improved: number;
59
+ regressed: number;
60
+ unchanged: number;
61
+ /** Scenarios in the report that don't exist in the baseline. */
62
+ newScenarios: number;
63
+ };
64
+ }
65
+ //# sourceMappingURL=baseline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"baseline.d.ts","sourceRoot":"","sources":["../../src/types/baseline.ts"],"names":[],"mappings":"AAAA,yEAAyE;AACzE,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,kDAAkD;AAClD,MAAM,MAAM,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;AAE5E,oEAAoE;AACpE,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,eAAe,CAAC;CAC1B;AAED,kDAAkD;AAClD,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE;QACV,eAAe,EAAE;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC;QACtE,WAAW,EAAE;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC;QAClE,OAAO,EAAE;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC;QAC9D,KAAK,EAAE;YAAE,QAAQ,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC;KAC7D,CAAC;CACH;AAED,uDAAuD;AACvD,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,iBAAiB,EAAE,CAAC;IAC7B,OAAO,EAAE;QACP,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gEAAgE;QAChE,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;CACH"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=baseline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"baseline.js","sourceRoot":"","sources":["../../src/types/baseline.ts"],"names":[],"mappings":""}
@@ -0,0 +1,76 @@
1
+ export interface AxisConfig {
2
+ scenarios: string;
3
+ agents: (string | AgentConfig)[];
4
+ defaults?: DefaultsConfig;
5
+ /** Custom adapter modules. Keys are adapter names, values are paths (relative to config) to JS/TS modules that export an AgentAdapter. */
6
+ adapters?: Record<string, string>;
7
+ /** Environment variables to pass through to agent processes. System vars (PATH, HOME, etc.) are always included. */
8
+ env?: string[];
9
+ /** MCP servers available to all agents during execution. */
10
+ mcp_servers?: Record<string, McpServerConfig>;
11
+ /** Skills available to all agents. Merged with per-agent skills. Each entry is a local path, GitHub shorthand (owner/repo), or GitHub URL. */
12
+ skills?: SkillSource[];
13
+ }
14
+ /** A skill source entry — either a simple string or an object with metadata. */
15
+ export type SkillSource = string | SkillSourceConfig;
16
+ /** Skill source with optional metadata. */
17
+ export interface SkillSourceConfig {
18
+ /** Local path, GitHub shorthand (owner/repo), or GitHub URL. */
19
+ source: string;
20
+ /** Override the AXIS scoring category for interactions driven by this skill. */
21
+ axisCategory?: "environment" | "service";
22
+ }
23
+ /** A skill resolved from its source reference to an on-disk directory. */
24
+ export interface ResolvedSkill {
25
+ /** Skill name (derived from directory name). */
26
+ name: string;
27
+ /** Absolute path to the skill directory containing SKILL.md. */
28
+ path: string;
29
+ /** AXIS scoring category override, if specified in config. */
30
+ axisCategory?: "environment" | "service";
31
+ }
32
+ export interface AgentConfig {
33
+ adapter: string;
34
+ /** Executable command for custom adapters (e.g. "codex", "aider", "./my-agent.sh"). */
35
+ command?: string;
36
+ scenarios?: string[];
37
+ skills?: SkillSource[];
38
+ model?: string;
39
+ /** Adapter-specific CLI flags. Keys are flag names (without --), values are flag values (true for boolean flags). */
40
+ flags?: Record<string, string | boolean>;
41
+ }
42
+ export interface DefaultsConfig {
43
+ scoring_weights?: ScoringWeights;
44
+ /** Maximum number of parallel jobs. Defaults to unlimited (all jobs run simultaneously). */
45
+ concurrency?: number;
46
+ }
47
+ export interface ScoringWeights {
48
+ goal_achievement: number;
49
+ environment: number;
50
+ service: number;
51
+ agent: number;
52
+ }
53
+ export type McpServerConfig = McpStdioServer | McpHttpServer;
54
+ export interface McpStdioServer {
55
+ /** Spawn a local process. */
56
+ type: "stdio";
57
+ /** Command to run. */
58
+ command: string;
59
+ /** Command arguments. */
60
+ args?: string[];
61
+ /** Environment variables for the server process. */
62
+ env?: Record<string, string>;
63
+ /** Override the AXIS scoring category for all tools from this server. Defaults to "service". */
64
+ axisCategory?: "environment" | "service";
65
+ }
66
+ export interface McpHttpServer {
67
+ /** Connect to a remote MCP server over HTTP. */
68
+ type: "http";
69
+ /** URL of the MCP server endpoint. */
70
+ url: string;
71
+ /** HTTP headers (e.g. Authorization). */
72
+ headers?: Record<string, string>;
73
+ /** Override the AXIS scoring category for all tools from this server. Defaults to "service". */
74
+ axisCategory?: "environment" | "service";
75
+ }
76
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/types/config.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,UAAU;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,CAAC,MAAM,GAAG,WAAW,CAAC,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,0IAA0I;IAC1I,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC,oHAAoH;IACpH,GAAG,CAAC,EAAE,MAAM,EAAE,CAAC;IACf,4DAA4D;IAC5D,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAC9C,8IAA8I;IAC9I,MAAM,CAAC,EAAE,WAAW,EAAE,CAAC;CACxB;AAED,gFAAgF;AAChF,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,iBAAiB,CAAC;AAErD,2CAA2C;AAC3C,MAAM,WAAW,iBAAiB;IAChC,gEAAgE;IAChE,MAAM,EAAE,MAAM,CAAC;IACf,gFAAgF;IAChF,YAAY,CAAC,EAAE,aAAa,GAAG,SAAS,CAAC;CAC1C;AAED,0EAA0E;AAC1E,MAAM,WAAW,aAAa;IAC5B,gDAAgD;IAChD,IAAI,EAAE,MAAM,CAAC;IACb,gEAAgE;IAChE,IAAI,EAAE,MAAM,CAAC;IACb,8DAA8D;IAC9D,YAAY,CAAC,EAAE,aAAa,GAAG,SAAS,CAAC;CAC1C;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,uFAAuF;IACvF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,MAAM,CAAC,EAAE,WAAW,EAAE,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qHAAqH;IACrH,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC;CAC1C;AAED,MAAM,WAAW,cAAc;IAC7B,eAAe,CAAC,EAAE,cAAc,CAAC;IACjC,4FAA4F;IAC5F,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,MAAM,eAAe,GAAG,cAAc,GAAG,aAAa,CAAC;AAE7D,MAAM,WAAW,cAAc;IAC7B,6BAA6B;IAC7B,IAAI,EAAE,OAAO,CAAC;IACd,sBAAsB;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,oDAAoD;IACpD,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,gGAAgG;IAChG,YAAY,CAAC,EAAE,aAAa,GAAG,SAAS,CAAC;CAC1C;AAED,MAAM,WAAW,aAAa;IAC5B,gDAAgD;IAChD,IAAI,EAAE,MAAM,CAAC;IACb,sCAAsC;IACtC,GAAG,EAAE,MAAM,CAAC;IACZ,yCAAyC;IACzC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,gGAAgG;IAChG,YAAY,CAAC,EAAE,aAAa,GAAG,SAAS,CAAC;CAC1C"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/types/config.ts"],"names":[],"mappings":""}
@@ -0,0 +1,8 @@
1
+ export * from "./config.js";
2
+ export * from "./scenario.js";
3
+ export * from "./agent.js";
4
+ export * from "./output.js";
5
+ export * from "./scoring.js";
6
+ export * from "./report.js";
7
+ export * from "./baseline.js";
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,aAAa,CAAC;AAC5B,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,eAAe,CAAC"}
@@ -0,0 +1,8 @@
1
+ export * from "./config.js";
2
+ export * from "./scenario.js";
3
+ export * from "./agent.js";
4
+ export * from "./output.js";
5
+ export * from "./scoring.js";
6
+ export * from "./report.js";
7
+ export * from "./baseline.js";
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC;AAC5B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,aAAa,CAAC;AAC5B,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,eAAe,CAAC"}
@@ -0,0 +1,70 @@
1
+ import type { AgentOutput } from "./agent.js";
2
+ import type { AgentConfig } from "./config.js";
3
+ import type { RubricCriterion } from "./scenario.js";
4
+ import type { ScoredRunResult } from "./scoring.js";
5
+ export interface RunOutput {
6
+ version: string;
7
+ timestamp: string;
8
+ durationMs: number;
9
+ results: RunResult[];
10
+ summary: RunSummary;
11
+ }
12
+ /** Shared fields for all run results (scored and unscored). */
13
+ export interface BaseRunResult {
14
+ scenarioKey: string;
15
+ scenarioName: string;
16
+ agentName: string;
17
+ prompt: string;
18
+ rubric: string | RubricCriterion[];
19
+ agentConfig: AgentConfig;
20
+ output: AgentOutput;
21
+ }
22
+ export interface RunResult extends BaseRunResult {
23
+ }
24
+ export interface RunSummary {
25
+ total: number;
26
+ completed: number;
27
+ failed: number;
28
+ }
29
+ export type JobStatus = "pending" | "setup" | "running" | "teardown" | "done" | "failed" | "scoring";
30
+ export interface JobState {
31
+ scenarioKey: string;
32
+ agentName: string;
33
+ status: JobStatus;
34
+ durationMs?: number;
35
+ axisScore?: number;
36
+ /**
37
+ * Live running token estimate for the agent (monotonically non-decreasing).
38
+ * Sourced from streamed assistant text during execution and snapped to the
39
+ * real `metadata.tokenUsage` total at completion. Intentionally conservative
40
+ * so the UI can animate count-up without ever having to reverse.
41
+ */
42
+ liveTokens?: number;
43
+ /**
44
+ * True once `liveTokens` has been replaced with the authoritative total
45
+ * from `metadata.tokenUsage` (input + output + cacheReadInput). The UI
46
+ * uses this to drop the `~` approximation prefix once the animation
47
+ * catches up to the real value.
48
+ */
49
+ tokensFinal?: boolean;
50
+ /**
51
+ * Wall-clock ms-epoch when the agent transitioned to `running`. Used by the
52
+ * live UI to tick an elapsed-duration counter before the job finishes (once
53
+ * finished, `durationMs` takes over as the authoritative value).
54
+ */
55
+ runStartedAt?: number;
56
+ }
57
+ export interface Logger {
58
+ info(message: string): void;
59
+ error(message: string): void;
60
+ /** Detailed per-step logging. Only called when verbose mode is enabled. */
61
+ verbose?(message: string): void;
62
+ /** Called when a job's status changes. Used for live-updating displays. */
63
+ onJobUpdate?(jobs: JobState[]): void;
64
+ }
65
+ export declare const silentLogger: Logger;
66
+ /** Type guard: checks if a run result has been scored. */
67
+ export declare function isScoredResult(result: BaseRunResult): result is ScoredRunResult;
68
+ /** Format an unknown error value into a message string. */
69
+ export declare function formatError(err: unknown): string;
70
+ //# sourceMappingURL=output.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"output.d.ts","sourceRoot":"","sources":["../../src/types/output.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAEpD,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,SAAS,EAAE,CAAC;IACrB,OAAO,EAAE,UAAU,CAAC;CACrB;AAED,+DAA+D;AAC/D,MAAM,WAAW,aAAa;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,GAAG,eAAe,EAAE,CAAC;IACnC,WAAW,EAAE,WAAW,CAAC;IACzB,MAAM,EAAE,WAAW,CAAC;CACrB;AAED,MAAM,WAAW,SAAU,SAAQ,aAAa;CAAG;AAEnD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,OAAO,GAAG,SAAS,GAAG,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;AAErG,MAAM,WAAW,QAAQ;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,SAAS,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,MAAM;IACrB,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,2EAA2E;IAC3E,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,2EAA2E;IAC3E,WAAW,CAAC,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC;CACtC;AAED,eAAO,MAAM,YAAY,EAAE,MAG1B,CAAC;AAEF,0DAA0D;AAC1D,wBAAgB,cAAc,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM,IAAI,eAAe,CAE/E;AAED,2DAA2D;AAC3D,wBAAgB,WAAW,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAGhD"}
@@ -0,0 +1,15 @@
1
+ export const silentLogger = {
2
+ info() { },
3
+ error() { },
4
+ };
5
+ /** Type guard: checks if a run result has been scored. */
6
+ export function isScoredResult(result) {
7
+ return "score" in result && result.score != null;
8
+ }
9
+ /** Format an unknown error value into a message string. */
10
+ export function formatError(err) {
11
+ if (err instanceof Error)
12
+ return err.message;
13
+ return String(err);
14
+ }
15
+ //# sourceMappingURL=output.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"output.js","sourceRoot":"","sources":["../../src/types/output.ts"],"names":[],"mappings":"AAuEA,MAAM,CAAC,MAAM,YAAY,GAAW;IAClC,IAAI,KAAI,CAAC;IACT,KAAK,KAAI,CAAC;CACX,CAAC;AAEF,0DAA0D;AAC1D,MAAM,UAAU,cAAc,CAAC,MAAqB;IAClD,OAAO,OAAO,IAAI,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC;AACnD,CAAC;AAED,2DAA2D;AAC3D,MAAM,UAAU,WAAW,CAAC,GAAY;IACtC,IAAI,GAAG,YAAY,KAAK;QAAE,OAAO,GAAG,CAAC,OAAO,CAAC;IAC7C,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;AACrB,CAAC"}
@@ -0,0 +1,37 @@
1
+ import type { TokenUsage } from "./agent.js";
2
+ import type { AgentConfig } from "./config.js";
3
+ import type { ScoreResult } from "./scoring.js";
4
+ import type { RunSummary } from "./output.js";
5
+ import type { ScoredSummary } from "./scoring.js";
6
+ import type { RubricCriterion } from "./scenario.js";
7
+ /** Lightweight report manifest — no transcripts, just summary data. */
8
+ export interface ReportManifest {
9
+ version: string;
10
+ reportId: string;
11
+ timestamp: string;
12
+ durationMs: number;
13
+ summary: ScoredSummary | RunSummary;
14
+ results: ReportResultEntry[];
15
+ }
16
+ /** Summary of a single scenario×agent result (no transcript). */
17
+ export interface ReportResultEntry {
18
+ scenarioKey: string;
19
+ scenarioName: string;
20
+ agentName: string;
21
+ durationMs: number;
22
+ exitCode: number;
23
+ tokenUsage?: TokenUsage;
24
+ totalCostUsd?: number;
25
+ score?: ScoreResult;
26
+ /** Human-readable error description when the agent fails. */
27
+ error?: string;
28
+ /** Relative path to the full result file within the report directory. */
29
+ file: string;
30
+ /** The prompt given to the agent. */
31
+ prompt?: string;
32
+ /** Rubric used for scoring. */
33
+ rubric?: string | RubricCriterion[];
34
+ /** Agent configuration. */
35
+ agentConfig?: AgentConfig;
36
+ }
37
+ //# sourceMappingURL=report.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/types/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAC7C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAClD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAErD,uEAAuE;AACvE,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,aAAa,GAAG,UAAU,CAAC;IACpC,OAAO,EAAE,iBAAiB,EAAE,CAAC;CAC9B;AAED,iEAAiE;AACjE,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,6DAA6D;IAC7D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yEAAyE;IACzE,IAAI,EAAE,MAAM,CAAC;IACb,qCAAqC;IACrC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,+BAA+B;IAC/B,MAAM,CAAC,EAAE,MAAM,GAAG,eAAe,EAAE,CAAC;IACpC,2BAA2B;IAC3B,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=report.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/types/report.ts"],"names":[],"mappings":""}
@@ -0,0 +1,23 @@
1
+ import type { SkillSource } from "./config.js";
2
+ export interface Scenario {
3
+ /** Stable identifier derived from file path relative to scenarios root, sans .json */
4
+ key: string;
5
+ name: string;
6
+ setup?: LifecycleAction[];
7
+ prompt: string;
8
+ rubric: string | RubricCriterion[];
9
+ teardown?: LifecycleAction[];
10
+ /** When set, only these agents run this scenario (overrides the global agents list). */
11
+ agents?: string[];
12
+ /** Skills specific to this scenario, merged with top-level and per-agent skills. */
13
+ skills?: SkillSource[];
14
+ }
15
+ export interface LifecycleAction {
16
+ action: "run_script";
17
+ command: string;
18
+ }
19
+ export interface RubricCriterion {
20
+ check: string;
21
+ weight?: number;
22
+ }
23
+ //# sourceMappingURL=scenario.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scenario.d.ts","sourceRoot":"","sources":["../../src/types/scenario.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE/C,MAAM,WAAW,QAAQ;IACvB,sFAAsF;IACtF,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,eAAe,EAAE,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,GAAG,eAAe,EAAE,CAAC;IACnC,QAAQ,CAAC,EAAE,eAAe,EAAE,CAAC;IAC7B,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,oFAAoF;IACpF,MAAM,CAAC,EAAE,WAAW,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,YAAY,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=scenario.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scenario.js","sourceRoot":"","sources":["../../src/types/scenario.ts"],"names":[],"mappings":""}