@netlify/axis 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +977 -0
- package/dist/adapters/base/acp-adapter.d.ts +44 -0
- package/dist/adapters/base/acp-adapter.d.ts.map +1 -0
- package/dist/adapters/base/acp-adapter.js +559 -0
- package/dist/adapters/base/acp-adapter.js.map +1 -0
- package/dist/adapters/base/agent-adapter.d.ts +132 -0
- package/dist/adapters/base/agent-adapter.d.ts.map +1 -0
- package/dist/adapters/base/agent-adapter.js +212 -0
- package/dist/adapters/base/agent-adapter.js.map +1 -0
- package/dist/adapters/claude-code.d.ts +3 -0
- package/dist/adapters/claude-code.d.ts.map +1 -0
- package/dist/adapters/claude-code.js +138 -0
- package/dist/adapters/claude-code.js.map +1 -0
- package/dist/adapters/claude-sdk.d.ts +11 -0
- package/dist/adapters/claude-sdk.d.ts.map +1 -0
- package/dist/adapters/claude-sdk.js +46 -0
- package/dist/adapters/claude-sdk.js.map +1 -0
- package/dist/adapters/codex.d.ts +3 -0
- package/dist/adapters/codex.d.ts.map +1 -0
- package/dist/adapters/codex.js +183 -0
- package/dist/adapters/codex.js.map +1 -0
- package/dist/adapters/gemini-acp.d.ts +11 -0
- package/dist/adapters/gemini-acp.d.ts.map +1 -0
- package/dist/adapters/gemini-acp.js +60 -0
- package/dist/adapters/gemini-acp.js.map +1 -0
- package/dist/adapters/gemini.d.ts +3 -0
- package/dist/adapters/gemini.d.ts.map +1 -0
- package/dist/adapters/gemini.js +222 -0
- package/dist/adapters/gemini.js.map +1 -0
- package/dist/adapters/goose.d.ts +3 -0
- package/dist/adapters/goose.d.ts.map +1 -0
- package/dist/adapters/goose.js +9 -0
- package/dist/adapters/goose.js.map +1 -0
- package/dist/adapters/registry.d.ts +7 -0
- package/dist/adapters/registry.d.ts.map +1 -0
- package/dist/adapters/registry.js +37 -0
- package/dist/adapters/registry.js.map +1 -0
- package/dist/adapters/utils/mcp.d.ts +23 -0
- package/dist/adapters/utils/mcp.d.ts.map +1 -0
- package/dist/adapters/utils/mcp.js +114 -0
- package/dist/adapters/utils/mcp.js.map +1 -0
- package/dist/adapters/utils/resolve.d.ts +20 -0
- package/dist/adapters/utils/resolve.d.ts.map +1 -0
- package/dist/adapters/utils/resolve.js +48 -0
- package/dist/adapters/utils/resolve.js.map +1 -0
- package/dist/adapters/utils/skills.d.ts +17 -0
- package/dist/adapters/utils/skills.d.ts.map +1 -0
- package/dist/adapters/utils/skills.js +52 -0
- package/dist/adapters/utils/skills.js.map +1 -0
- package/dist/adapters/utils/token-estimator.d.ts +21 -0
- package/dist/adapters/utils/token-estimator.d.ts.map +1 -0
- package/dist/adapters/utils/token-estimator.js +37 -0
- package/dist/adapters/utils/token-estimator.js.map +1 -0
- package/dist/baselines/diff.d.ts +9 -0
- package/dist/baselines/diff.d.ts.map +1 -0
- package/dist/baselines/diff.js +83 -0
- package/dist/baselines/diff.js.map +1 -0
- package/dist/baselines/index.d.ts +3 -0
- package/dist/baselines/index.d.ts.map +1 -0
- package/dist/baselines/index.js +3 -0
- package/dist/baselines/index.js.map +1 -0
- package/dist/baselines/store.d.ts +19 -0
- package/dist/baselines/store.d.ts.map +1 -0
- package/dist/baselines/store.js +104 -0
- package/dist/baselines/store.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +487 -0
- package/dist/cli.js.map +1 -0
- package/dist/config/loader.d.ts +8 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +99 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/validator.d.ts +11 -0
- package/dist/config/validator.d.ts.map +1 -0
- package/dist/config/validator.js +203 -0
- package/dist/config/validator.js.map +1 -0
- package/dist/docs-site/_astro/cli.DDWZtG0-.css +1 -0
- package/dist/docs-site/cli/index.html +18 -0
- package/dist/docs-site/configuration/index.html +121 -0
- package/dist/docs-site/content-assets.mjs +1 -0
- package/dist/docs-site/content-modules.mjs +1 -0
- package/dist/docs-site/data-store.json +9 -0
- package/dist/docs-site/index.html +69 -0
- package/dist/docs-site/quickstart/index.html +59 -0
- package/dist/docs-site/running/index.html +87 -0
- package/dist/docs-site/scoring/index.html +135 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/report-ui/index.html +291 -0
- package/dist/report-ui/mock-data.json +298 -0
- package/dist/reports/html.d.ts +7 -0
- package/dist/reports/html.d.ts.map +1 -0
- package/dist/reports/html.js +27 -0
- package/dist/reports/html.js.map +1 -0
- package/dist/reports/reader.d.ts +21 -0
- package/dist/reports/reader.d.ts.map +1 -0
- package/dist/reports/reader.js +110 -0
- package/dist/reports/reader.js.map +1 -0
- package/dist/reports/writer.d.ts +14 -0
- package/dist/reports/writer.d.ts.map +1 -0
- package/dist/reports/writer.js +106 -0
- package/dist/reports/writer.js.map +1 -0
- package/dist/runner/lifecycle.d.ts +10 -0
- package/dist/runner/lifecycle.d.ts.map +1 -0
- package/dist/runner/lifecycle.js +58 -0
- package/dist/runner/lifecycle.js.map +1 -0
- package/dist/runner/runner.d.ts +34 -0
- package/dist/runner/runner.d.ts.map +1 -0
- package/dist/runner/runner.js +330 -0
- package/dist/runner/runner.js.map +1 -0
- package/dist/scoring/category-score.d.ts +52 -0
- package/dist/scoring/category-score.d.ts.map +1 -0
- package/dist/scoring/category-score.js +157 -0
- package/dist/scoring/category-score.js.map +1 -0
- package/dist/scoring/composite.d.ts +5 -0
- package/dist/scoring/composite.d.ts.map +1 -0
- package/dist/scoring/composite.js +24 -0
- package/dist/scoring/composite.js.map +1 -0
- package/dist/scoring/deep-eval.d.ts +25 -0
- package/dist/scoring/deep-eval.d.ts.map +1 -0
- package/dist/scoring/deep-eval.js +382 -0
- package/dist/scoring/deep-eval.js.map +1 -0
- package/dist/scoring/goal-achievement.d.ts +5 -0
- package/dist/scoring/goal-achievement.d.ts.map +1 -0
- package/dist/scoring/goal-achievement.js +241 -0
- package/dist/scoring/goal-achievement.js.map +1 -0
- package/dist/scoring/index.d.ts +22 -0
- package/dist/scoring/index.d.ts.map +1 -0
- package/dist/scoring/index.js +115 -0
- package/dist/scoring/index.js.map +1 -0
- package/dist/scoring/parse-json.d.ts +6 -0
- package/dist/scoring/parse-json.d.ts.map +1 -0
- package/dist/scoring/parse-json.js +18 -0
- package/dist/scoring/parse-json.js.map +1 -0
- package/dist/scoring/sparse-index.d.ts +15 -0
- package/dist/scoring/sparse-index.d.ts.map +1 -0
- package/dist/scoring/sparse-index.js +338 -0
- package/dist/scoring/sparse-index.js.map +1 -0
- package/dist/scoring/triage.d.ts +15 -0
- package/dist/scoring/triage.d.ts.map +1 -0
- package/dist/scoring/triage.js +204 -0
- package/dist/scoring/triage.js.map +1 -0
- package/dist/skills/resolver.d.ts +19 -0
- package/dist/skills/resolver.d.ts.map +1 -0
- package/dist/skills/resolver.js +95 -0
- package/dist/skills/resolver.js.map +1 -0
- package/dist/transcript/categorize.d.ts +24 -0
- package/dist/transcript/categorize.d.ts.map +1 -0
- package/dist/transcript/categorize.js +233 -0
- package/dist/transcript/categorize.js.map +1 -0
- package/dist/transcript/classify.d.ts +7 -0
- package/dist/transcript/classify.d.ts.map +1 -0
- package/dist/transcript/classify.js +32 -0
- package/dist/transcript/classify.js.map +1 -0
- package/dist/transcript/extract.d.ts +24 -0
- package/dist/transcript/extract.d.ts.map +1 -0
- package/dist/transcript/extract.js +266 -0
- package/dist/transcript/extract.js.map +1 -0
- package/dist/transcript/index.d.ts +3 -0
- package/dist/transcript/index.d.ts.map +1 -0
- package/dist/transcript/index.js +2 -0
- package/dist/transcript/index.js.map +1 -0
- package/dist/transcript/normalize.d.ts +15 -0
- package/dist/transcript/normalize.d.ts.map +1 -0
- package/dist/transcript/normalize.js +160 -0
- package/dist/transcript/normalize.js.map +1 -0
- package/dist/transcript/types.d.ts +92 -0
- package/dist/transcript/types.d.ts.map +1 -0
- package/dist/transcript/types.js +2 -0
- package/dist/transcript/types.js.map +1 -0
- package/dist/transcript/urls.d.ts +10 -0
- package/dist/transcript/urls.d.ts.map +1 -0
- package/dist/transcript/urls.js +31 -0
- package/dist/transcript/urls.js.map +1 -0
- package/dist/types/agent.d.ts +80 -0
- package/dist/types/agent.d.ts.map +1 -0
- package/dist/types/agent.js +2 -0
- package/dist/types/agent.js.map +1 -0
- package/dist/types/baseline.d.ts +65 -0
- package/dist/types/baseline.d.ts.map +1 -0
- package/dist/types/baseline.js +2 -0
- package/dist/types/baseline.js.map +1 -0
- package/dist/types/config.d.ts +76 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +2 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +8 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +8 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/output.d.ts +70 -0
- package/dist/types/output.d.ts.map +1 -0
- package/dist/types/output.js +15 -0
- package/dist/types/output.js.map +1 -0
- package/dist/types/report.d.ts +37 -0
- package/dist/types/report.d.ts.map +1 -0
- package/dist/types/report.js +2 -0
- package/dist/types/report.js.map +1 -0
- package/dist/types/scenario.d.ts +23 -0
- package/dist/types/scenario.d.ts.map +1 -0
- package/dist/types/scenario.js +2 -0
- package/dist/types/scenario.js.map +1 -0
- package/dist/types/scoring.d.ts +176 -0
- package/dist/types/scoring.d.ts.map +1 -0
- package/dist/types/scoring.js +2 -0
- package/dist/types/scoring.js.map +1 -0
- package/dist/ui/AnimatedTokens.d.ts +29 -0
- package/dist/ui/AnimatedTokens.d.ts.map +1 -0
- package/dist/ui/AnimatedTokens.js +53 -0
- package/dist/ui/AnimatedTokens.js.map +1 -0
- package/dist/ui/App.d.ts +6 -0
- package/dist/ui/App.d.ts.map +1 -0
- package/dist/ui/App.js +16 -0
- package/dist/ui/App.js.map +1 -0
- package/dist/ui/LiveDuration.d.ts +20 -0
- package/dist/ui/LiveDuration.d.ts.map +1 -0
- package/dist/ui/LiveDuration.js +31 -0
- package/dist/ui/LiveDuration.js.map +1 -0
- package/dist/ui/LiveStatus.d.ts +7 -0
- package/dist/ui/LiveStatus.d.ts.map +1 -0
- package/dist/ui/LiveStatus.js +52 -0
- package/dist/ui/LiveStatus.js.map +1 -0
- package/dist/ui/format.d.ts +29 -0
- package/dist/ui/format.d.ts.map +1 -0
- package/dist/ui/format.js +514 -0
- package/dist/ui/format.js.map +1 -0
- package/package.json +65 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as os from "node:os";
|
|
3
|
+
import * as path from "node:path";
|
|
4
|
+
import { loadConfig, discoverScenarios } from "../config/loader.js";
|
|
5
|
+
import { getAdapter, registerAdapter } from "../adapters/registry.js";
|
|
6
|
+
import { executeLifecycleActions } from "./lifecycle.js";
|
|
7
|
+
import { silentLogger as defaultLogger, formatError } from "../types/output.js";
|
|
8
|
+
import { resolveSkills, skillSourceString } from "../skills/resolver.js";
|
|
9
|
+
/** System vars always passed through to isolated environments. */
|
|
10
|
+
const SYSTEM_VARS = ["PATH", "USER", "SHELL", "LANG", "TERM", "TMPDIR"];
|
|
11
|
+
/** Default env when not specified in config. */
|
|
12
|
+
const DEFAULT_PASS_ENV = ["ANTHROPIC_API_KEY", "CODEX_API_KEY", "GEMINI_API_KEY"];
|
|
13
|
+
export async function run(options = {}) {
|
|
14
|
+
const logger = options.logger ?? defaultLogger;
|
|
15
|
+
const runStart = Date.now();
|
|
16
|
+
const { config, configDir } = await loadConfig(options.configPath);
|
|
17
|
+
// --- Load custom adapters from config ---
|
|
18
|
+
if (config.adapters) {
|
|
19
|
+
for (const [name, modulePath] of Object.entries(config.adapters)) {
|
|
20
|
+
const absPath = path.resolve(configDir, modulePath);
|
|
21
|
+
const mod = await import(absPath);
|
|
22
|
+
const adapter = mod.default ?? mod.adapter;
|
|
23
|
+
if (!adapter || typeof adapter.run !== "function") {
|
|
24
|
+
throw new Error(`Custom adapter "${name}" at ${modulePath} must export a valid AgentAdapter ` +
|
|
25
|
+
`(as default export or named "adapter" export).`);
|
|
26
|
+
}
|
|
27
|
+
registerAdapter(name, adapter);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
// --- Discovery phase ---
|
|
31
|
+
const jobs = [];
|
|
32
|
+
const agents = normalizeAgents(config.agents);
|
|
33
|
+
for (const { name: agentName, config: agentConfig } of agents) {
|
|
34
|
+
if (options.agentFilter?.length && !options.agentFilter.includes(agentName)) {
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
const scenarios = await discoverScenarios(configDir, config.scenarios, agentConfig.scenarios);
|
|
38
|
+
const filteredScenarios = options.scenarioFilter?.length
|
|
39
|
+
? scenarios.filter((s) => options.scenarioFilter.some((f) => f === s.key))
|
|
40
|
+
: scenarios;
|
|
41
|
+
for (const scenario of filteredScenarios) {
|
|
42
|
+
// Scenario-level agent override: if set, only listed agents run this scenario
|
|
43
|
+
if (scenario.agents && !scenario.agents.includes(agentName)) {
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
jobs.push({ index: jobs.length, agentName, agentConfig, scenario, configDir, axisConfig: config });
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (jobs.length === 0) {
|
|
50
|
+
logger.info("No jobs discovered.");
|
|
51
|
+
return buildOutput(runStart, []);
|
|
52
|
+
}
|
|
53
|
+
// --- Initialize job state tracker ---
|
|
54
|
+
const jobStates = jobs.map((job) => ({
|
|
55
|
+
scenarioKey: job.scenario.key,
|
|
56
|
+
agentName: job.agentName,
|
|
57
|
+
status: "pending",
|
|
58
|
+
}));
|
|
59
|
+
const updateStatus = (index, status, durationMs) => {
|
|
60
|
+
const patch = { status, durationMs };
|
|
61
|
+
// Stamp the start time on the first transition into "running" so the
|
|
62
|
+
// live UI can tick an elapsed-duration counter.
|
|
63
|
+
if (status === "running" && jobStates[index].runStartedAt === undefined) {
|
|
64
|
+
patch.runStartedAt = Date.now();
|
|
65
|
+
}
|
|
66
|
+
jobStates[index] = { ...jobStates[index], ...patch };
|
|
67
|
+
logger.onJobUpdate?.(jobStates);
|
|
68
|
+
};
|
|
69
|
+
/**
|
|
70
|
+
* Monotonic live-token bump — drops any non-increasing estimates. Setting
|
|
71
|
+
* `final` true stamps `tokensFinal` so the UI knows the number is now the
|
|
72
|
+
* authoritative total (from `metadata.tokenUsage`), not an estimate.
|
|
73
|
+
*/
|
|
74
|
+
const updateTokens = (index, tokens, final = false) => {
|
|
75
|
+
const prev = jobStates[index].liveTokens ?? 0;
|
|
76
|
+
const grew = tokens > prev;
|
|
77
|
+
const newlyFinal = final && !jobStates[index].tokensFinal;
|
|
78
|
+
if (!grew && !newlyFinal)
|
|
79
|
+
return;
|
|
80
|
+
jobStates[index] = {
|
|
81
|
+
...jobStates[index],
|
|
82
|
+
liveTokens: grew ? tokens : prev,
|
|
83
|
+
...(newlyFinal ? { tokensFinal: true } : {}),
|
|
84
|
+
};
|
|
85
|
+
logger.onJobUpdate?.(jobStates);
|
|
86
|
+
};
|
|
87
|
+
// Build filtered environment once for all jobs
|
|
88
|
+
const jobEnv = buildJobEnv(config);
|
|
89
|
+
// --- Validate required env vars and resolve CLI binaries for each adapter ---
|
|
90
|
+
// This runs BEFORE the initial onJobUpdate so that any logger.info calls
|
|
91
|
+
// from ensureInstalled (e.g. npx fallback messages) don't interfere with
|
|
92
|
+
// ink's cursor tracking when it starts rendering the live display.
|
|
93
|
+
const checkedAdapters = new Set();
|
|
94
|
+
for (const job of jobs) {
|
|
95
|
+
if (checkedAdapters.has(job.agentConfig.adapter))
|
|
96
|
+
continue;
|
|
97
|
+
checkedAdapters.add(job.agentConfig.adapter);
|
|
98
|
+
const adapter = getAdapter(job.agentConfig.adapter);
|
|
99
|
+
const required = adapter.requiredEnv?.() ?? [];
|
|
100
|
+
const missing = required.filter((key) => !jobEnv[key]);
|
|
101
|
+
if (missing.length > 0) {
|
|
102
|
+
throw new Error(`The "${job.agentConfig.adapter}" adapter requires environment variable${missing.length > 1 ? "s" : ""} ${missing.join(", ")} ` +
|
|
103
|
+
`but ${missing.length > 1 ? "they are" : "it is"} not set. ` +
|
|
104
|
+
`Add ${missing.length > 1 ? "them" : "it"} to your shell environment or to the "env" array in axis.config.json.`);
|
|
105
|
+
}
|
|
106
|
+
// Resolve CLI binary (direct or npx fallback)
|
|
107
|
+
if (adapter.ensureInstalled) {
|
|
108
|
+
await adapter.ensureInstalled(logger);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
// --- Resolve skills (once, before any jobs start) ---
|
|
112
|
+
const allSkillEntries = new Map();
|
|
113
|
+
for (const entry of config.skills ?? []) {
|
|
114
|
+
const key = skillSourceString(entry);
|
|
115
|
+
if (!allSkillEntries.has(key))
|
|
116
|
+
allSkillEntries.set(key, entry);
|
|
117
|
+
}
|
|
118
|
+
for (const job of jobs) {
|
|
119
|
+
for (const entry of job.agentConfig.skills ?? []) {
|
|
120
|
+
const key = skillSourceString(entry);
|
|
121
|
+
if (!allSkillEntries.has(key))
|
|
122
|
+
allSkillEntries.set(key, entry);
|
|
123
|
+
}
|
|
124
|
+
for (const entry of job.scenario.skills ?? []) {
|
|
125
|
+
const key = skillSourceString(entry);
|
|
126
|
+
if (!allSkillEntries.has(key))
|
|
127
|
+
allSkillEntries.set(key, entry);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const resolvedSkillMap = new Map();
|
|
131
|
+
if (allSkillEntries.size > 0) {
|
|
132
|
+
const entries = [...allSkillEntries.values()];
|
|
133
|
+
const resolved = await resolveSkills({
|
|
134
|
+
sources: entries,
|
|
135
|
+
configDir,
|
|
136
|
+
cacheDir: path.join(configDir, ".axis", "skills-cache"),
|
|
137
|
+
logger,
|
|
138
|
+
refresh: options.refreshSkills,
|
|
139
|
+
});
|
|
140
|
+
const keys = [...allSkillEntries.keys()];
|
|
141
|
+
for (let i = 0; i < keys.length; i++) {
|
|
142
|
+
resolvedSkillMap.set(keys[i], resolved[i]);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Emit initial state after pre-flight so ink's first render is clean
|
|
146
|
+
logger.onJobUpdate?.(jobStates);
|
|
147
|
+
// --- Execute jobs with concurrency control ---
|
|
148
|
+
const concurrency = options.concurrency ?? Infinity;
|
|
149
|
+
const tasks = jobs.map((job) => async () => {
|
|
150
|
+
const { result, cleanup } = await executeJob(job, jobEnv, logger, updateStatus, updateTokens, resolvedSkillMap, options.registerCleanup, options.debug);
|
|
151
|
+
try {
|
|
152
|
+
// Allow external processing (e.g. scoring/verification) before teardown.
|
|
153
|
+
// If onResult returns a Promise, we await it so the judge can verify
|
|
154
|
+
// results before teardown scripts destroy resources.
|
|
155
|
+
if (options.onResult) {
|
|
156
|
+
await options.onResult(result);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
finally {
|
|
160
|
+
await cleanup();
|
|
161
|
+
}
|
|
162
|
+
return result;
|
|
163
|
+
});
|
|
164
|
+
const results = await runWithConcurrency(tasks, concurrency);
|
|
165
|
+
return buildOutput(runStart, results);
|
|
166
|
+
}
|
|
167
|
+
async function executeJob(job, env, logger, updateStatus, updateTokens, resolvedSkillMap, registerCleanup, debug) {
|
|
168
|
+
const { index, agentName, agentConfig, scenario, axisConfig } = job;
|
|
169
|
+
const label = `${scenario.key} (${agentName})`;
|
|
170
|
+
const jobStart = Date.now();
|
|
171
|
+
// Create isolated workspace and point HOME there so agents
|
|
172
|
+
// don't pick up the user's global settings (e.g. ~/.claude/).
|
|
173
|
+
const workspace = createWorkspace();
|
|
174
|
+
const adapter = getAdapter(agentConfig.adapter);
|
|
175
|
+
const adapterIsolation = adapter.isolationEnv?.(workspace) ?? {};
|
|
176
|
+
const jobEnv = { ...adapterIsolation, ...env, HOME: workspace };
|
|
177
|
+
logger.verbose?.(`[${label}] Workspace: ${workspace}`);
|
|
178
|
+
// Register workspace for cleanup on process signal (Ctrl-C)
|
|
179
|
+
registerCleanup?.(() => {
|
|
180
|
+
try {
|
|
181
|
+
fs.rmSync(workspace, { recursive: true, force: true });
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
/* best-effort */
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
const cleanup = async () => {
|
|
188
|
+
if (scenario.teardown?.length) {
|
|
189
|
+
logger.verbose?.(`[${label}] Running teardown...`);
|
|
190
|
+
await executeLifecycleActions(scenario.teardown, workspace, jobEnv).catch((teardownErr) => {
|
|
191
|
+
logger.error(`[${label}] Teardown failed: ${formatError(teardownErr)}`);
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
try {
|
|
195
|
+
fs.rmSync(workspace, { recursive: true, force: true });
|
|
196
|
+
logger.verbose?.(`[${label}] Cleaned up workspace: ${workspace}`);
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
logger.verbose?.(`[${label}] Failed to clean up workspace: ${workspace}`);
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
// Setup
|
|
203
|
+
if (scenario.setup?.length) {
|
|
204
|
+
updateStatus(index, "setup");
|
|
205
|
+
logger.verbose?.(`[${label}] Running setup...`);
|
|
206
|
+
await executeLifecycleActions(scenario.setup, workspace, jobEnv);
|
|
207
|
+
}
|
|
208
|
+
try {
|
|
209
|
+
updateStatus(index, "running");
|
|
210
|
+
logger.verbose?.(`[${label}] Executing agent...`);
|
|
211
|
+
// Merge top-level + per-agent + per-scenario skills, deduplicate by source
|
|
212
|
+
const skillEntries = [...(axisConfig.skills ?? []), ...(agentConfig.skills ?? []), ...(scenario.skills ?? [])];
|
|
213
|
+
const seenSkills = new Set();
|
|
214
|
+
const agentSkills = [];
|
|
215
|
+
for (const entry of skillEntries) {
|
|
216
|
+
const key = skillSourceString(entry);
|
|
217
|
+
if (seenSkills.has(key))
|
|
218
|
+
continue;
|
|
219
|
+
seenSkills.add(key);
|
|
220
|
+
const resolved = resolvedSkillMap.get(key);
|
|
221
|
+
if (resolved)
|
|
222
|
+
agentSkills.push(resolved);
|
|
223
|
+
}
|
|
224
|
+
const output = await adapter.run({
|
|
225
|
+
prompt: scenario.prompt,
|
|
226
|
+
config: agentConfig,
|
|
227
|
+
scenario,
|
|
228
|
+
workingDirectory: workspace,
|
|
229
|
+
env: jobEnv,
|
|
230
|
+
registerCleanup,
|
|
231
|
+
captureRawOutput: !!debug,
|
|
232
|
+
mcpServers: axisConfig.mcp_servers,
|
|
233
|
+
resolvedSkills: agentSkills.length > 0 ? agentSkills : undefined,
|
|
234
|
+
onTokenProgress: (tokens) => updateTokens(index, tokens),
|
|
235
|
+
});
|
|
236
|
+
// Snap the live counter up to the real total (input + output + cache).
|
|
237
|
+
// The UI animates up to this value — it won't exceed it because
|
|
238
|
+
// `updateTokens` is monotonic. Passing `final: true` marks `tokensFinal`
|
|
239
|
+
// so the UI can drop the `~` approximation prefix once the animation
|
|
240
|
+
// catches up.
|
|
241
|
+
const usage = output.metadata.tokenUsage;
|
|
242
|
+
if (usage) {
|
|
243
|
+
const realTotal = (usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheReadInput ?? 0);
|
|
244
|
+
updateTokens(index, realTotal, true);
|
|
245
|
+
}
|
|
246
|
+
const durationMs = output.metadata.durationMs || Date.now() - jobStart;
|
|
247
|
+
const failed = output.metadata.exitCode !== 0 || !!output.metadata.error;
|
|
248
|
+
updateStatus(index, failed ? "failed" : "done", durationMs);
|
|
249
|
+
return {
|
|
250
|
+
result: {
|
|
251
|
+
scenarioKey: scenario.key,
|
|
252
|
+
scenarioName: scenario.name,
|
|
253
|
+
agentName,
|
|
254
|
+
prompt: scenario.prompt,
|
|
255
|
+
rubric: scenario.rubric,
|
|
256
|
+
agentConfig,
|
|
257
|
+
output,
|
|
258
|
+
},
|
|
259
|
+
cleanup,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
catch (err) {
|
|
263
|
+
updateStatus(index, "failed", Date.now() - jobStart);
|
|
264
|
+
// On unexpected errors, clean up immediately (nothing to verify)
|
|
265
|
+
await cleanup();
|
|
266
|
+
throw err;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Run async tasks with a concurrency limit.
|
|
271
|
+
* Results are returned in the same order as the input tasks.
|
|
272
|
+
* When limit is Infinity, all tasks run simultaneously (same as Promise.all).
|
|
273
|
+
*/
|
|
274
|
+
async function runWithConcurrency(tasks, limit) {
|
|
275
|
+
if (tasks.length === 0)
|
|
276
|
+
return [];
|
|
277
|
+
const results = new Array(tasks.length);
|
|
278
|
+
let nextIndex = 0;
|
|
279
|
+
async function worker() {
|
|
280
|
+
while (nextIndex < tasks.length) {
|
|
281
|
+
const i = nextIndex++;
|
|
282
|
+
results[i] = await tasks[i]();
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
const workerCount = Math.min(Number.isFinite(limit) ? limit : tasks.length, tasks.length);
|
|
286
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
287
|
+
return results;
|
|
288
|
+
}
|
|
289
|
+
function createWorkspace() {
|
|
290
|
+
return fs.mkdtempSync(path.join(os.tmpdir(), "axis-"));
|
|
291
|
+
}
|
|
292
|
+
function buildJobEnv(config) {
|
|
293
|
+
const passthrough = config.env ?? DEFAULT_PASS_ENV;
|
|
294
|
+
const allowedKeys = [...SYSTEM_VARS, ...passthrough];
|
|
295
|
+
const env = {};
|
|
296
|
+
for (const key of allowedKeys) {
|
|
297
|
+
if (process.env[key] !== undefined) {
|
|
298
|
+
env[key] = process.env[key];
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
return env;
|
|
302
|
+
}
|
|
303
|
+
function normalizeAgents(agents) {
|
|
304
|
+
const nameCounts = new Map();
|
|
305
|
+
const result = [];
|
|
306
|
+
for (const entry of agents) {
|
|
307
|
+
const config = typeof entry === "string" ? { adapter: entry } : entry;
|
|
308
|
+
const baseName = config.adapter;
|
|
309
|
+
const count = (nameCounts.get(baseName) ?? 0) + 1;
|
|
310
|
+
nameCounts.set(baseName, count);
|
|
311
|
+
const name = count === 1 ? baseName : `${baseName}-${count}`;
|
|
312
|
+
result.push({ name, config });
|
|
313
|
+
}
|
|
314
|
+
return result;
|
|
315
|
+
}
|
|
316
|
+
function buildOutput(runStart, results) {
|
|
317
|
+
const completed = results.filter((r) => r.output.metadata.exitCode === 0 && !r.output.metadata.error).length;
|
|
318
|
+
return {
|
|
319
|
+
version: "0.1.0",
|
|
320
|
+
timestamp: new Date().toISOString(),
|
|
321
|
+
durationMs: Date.now() - runStart,
|
|
322
|
+
results,
|
|
323
|
+
summary: {
|
|
324
|
+
total: results.length,
|
|
325
|
+
completed,
|
|
326
|
+
failed: results.length - completed,
|
|
327
|
+
},
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/runner/runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AACtE,OAAO,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAEzD,OAAO,EAAE,YAAY,IAAI,aAAa,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGhF,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AA4CzE,kEAAkE;AAClE,MAAM,WAAW,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;AAExE,gDAAgD;AAChD,MAAM,gBAAgB,GAAG,CAAC,mBAAmB,EAAE,eAAe,EAAE,gBAAgB,CAAC,CAAC;AAElF,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,UAAsB,EAAE;IAChD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,aAAa,CAAC;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC5B,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IAEnE,2CAA2C;IAC3C,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,KAAK,MAAM,CAAC,IAAI,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;YACpD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC;YAClC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC;YAC3C,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,CAAC,GAAG,KAAK,UAAU,EAAE,CAAC;gBAClD,MAAM,IAAI,KAAK,CACb,mBAAmB,IAAI,QAAQ,UAAU,oCAAoC;oBAC3E,gDAAgD,CACnD,CAAC;YACJ,CAAC;YACD,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,0BAA0B;IAC1B,MAAM,IAAI,GAAU,EAAE,CAAC;IACvB,MAAM,MAAM,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAE9C,KAAK,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,MAAM,EAAE,CAAC;QAC9D,IAAI,OAAO,CAAC,WAAW,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YAC5E,SAAS;QACX,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,CAAC,SAAS,CAAC,CAAC;QAE9F,MAAM,iBAAiB,GAAG,OAAO,CAAC,cAAc,EAAE,MAAM;YACtD,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,cAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC;YAC3E,CAAC,CAAC,SAAS,CAAC;QAEd,KAAK,MAAM,QAAQ,IAAI,iBAAiB,EAAE,CAAC;YACzC,8EAA8E;YAC9E,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC5D,SAAS;YACX,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QACrG,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,MAAM,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACnC,OAAO,WAAW,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,uCAAuC;IACvC,MAAM,SAAS,GAAe,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QAC/C,WAAW,EAAE,GAAG,CAAC,QAAQ,CAAC,GAAG;QAC7B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,MAAM,EAAE,SAAsB;KAC/B,CAAC,CAAC,CAAC;IAEJ,MAAM,YAAY,GAAG,CAAC,KAAa,EAAE,MAAiB,EAAE,UAAmB,EAAE,EAAE;QAC7E,MAAM,KAAK,GAAsB,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;QACxD,qEAAqE;QACrE,gDAAgD;QAChD,IAAI,MAAM,KAAK,SAAS,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC,YAAY,KAAK,SAAS,EAAE,CAAC;YACxE,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAClC,CAAC;QACD,SAAS,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,EAAE,GAAG,KAAK,EAAE,CAAC;QACrD,MAAM,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC,CAAC;IAEF;;;;OAIG;IACH,MAAM,YAAY,GAAG,CAAC,KAAa,EAAE,MAAc,EAAE,KAAK,GAAG,KAAK,EAAE,EAAE;QACpE,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,MAAM,GAAG,IAAI,CAAC;QAC3B,MAAM,UAAU,GAAG,KAAK,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC;QAC1D,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU;YAAE,OAAO;QACjC,SAAS,CAAC,KAAK,CAAC,GAAG;YACjB,GAAG,SAAS,CAAC,KAAK,CAAC;YACnB,UAAU,EAAE,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI;YAChC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC7C,CAAC;QACF,MAAM,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC,CAAC;IAEF,+CAA+C;IAC/C,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;IAEnC,+EAA+E;IAC/E,yEAAyE;IACzE,yEAAyE;IACzE,mEAAmE;IACnE,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAC1C,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,OAAO,CAAC;YAAE,SAAS;QAC3D,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QAE7C,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QACpD,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW,EAAE,EAAE,IAAI,EAAE,CAAC;QAC/C,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QACvD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CACb,QAAQ,GAAG,CAAC,WAAW,CAAC,OAAO,0CAA0C,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;gBAC7H,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,YAAY;gBAC5D,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,uEAAuE,CACnH,CAAC;QACJ,CAAC;QAED,8CAA8C;QAC9C,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5B,MAAM,OAAO,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,MAAM,eAAe,GAAG,IAAI,GAAG,EAAuB,CAAC;IACvD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;QACrC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IACjE,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YACjD,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;YACrC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;YACrC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAyB,CAAC;IAC1D,IAAI,eAAe,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC;YACnC,OAAO,EAAE,OAAO;YAChB,SAAS;YACT,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,cAAc,CAAC;YACvD,MAAM;YACN,OAAO,EAAE,OAAO,CAAC,aAAa;SAC/B,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,CAAC,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,qEAAqE;IACrE,MAAM,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,CAAC;IAEhC,gDAAgD;IAChD,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,QAAQ,CAAC;IACpD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE;QACzC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,UAAU,CAC1C,GAAG,EACH,MAAM,EACN,MAAM,EACN,YAAY,EACZ,YAAY,EACZ,gBAAgB,EAChB,OAAO,CAAC,eAAe,EACvB,OAAO,CAAC,KAAK,CACd,CAAC;QAEF,IAAI,CAAC;YACH,yEAAyE;YACzE,qEAAqE;YACrE,qDAAqD;YACrD,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;gBACrB,MAAM,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,EAAE,CAAC;QAClB,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IAE7D,OAAO,WAAW,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;AACxC,CAAC;AAQD,KAAK,UAAU,UAAU,CACvB,GAAQ,EACR,GAA2B,EAC3B,MAAc,EACd,YAA6E,EAC7E,YAAsE,EACtE,gBAA4C,EAC5C,eAA0C,EAC1C,KAAe;IAEf,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,GAAG,CAAC;IACpE,MAAM,KAAK,GAAG,GAAG,QAAQ,CAAC,GAAG,KAAK,SAAS,GAAG,CAAC;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE5B,2DAA2D;IAC3D,8DAA8D;IAC9D,MAAM,SAAS,GAAG,eAAe,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,UAAU,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,gBAAgB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;IACjE,MAAM,MAAM,GAAG,EAAE,GAAG,gBAAgB,EAAE,GAAG,GAAG,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAChE,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,KAAK,gBAAgB,SAAS,EAAE,CAAC,CAAC;IAEvD,4DAA4D;IAC5D,eAAe,EAAE,CAAC,GAAG,EAAE;QACrB,IAAI,CAAC;YACH,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC;YACP,iBAAiB;QACnB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,KAAK,IAAI,EAAE;QACzB,IAAI,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;YAC9B,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,KAAK,uBAAuB,CAAC,CAAC;YACnD,MAAM,uBAAuB,CAAC,QAAQ,CAAC,QAAQ,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,EAAE;gBACxF,MAAM,CAAC,KAAK,CAAC,IAAI,KAAK,sBAAsB,WAAW,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;YAC1E,CAAC,CAAC,CAAC;QACL,CAAC;QACD,IAAI,CAAC;YACH,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YACvD,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,KAAK,2BAA2B,SAAS,EAAE,CAAC,CAAC;QACpE,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,KAAK,mCAAmC,SAAS,EAAE,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC,CAAC;IAEF,QAAQ;IACR,IAAI,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC;QAC3B,YAAY,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC7B,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,KAAK,oBAAoB,CAAC,CAAC;QAChD,MAAM,uBAAuB,CAAC,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IACnE,CAAC;IAED,IAAI,CAAC;QACH,YAAY,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC/B,MAAM,CAAC,OAAO,EAAE,CAAC,IAAI,KAAK,sBAAsB,CAAC,CAAC;QAElD,2EAA2E;QAC3E,MAAM,YAAY,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,QAAQ,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC;QAC/G,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QACrC,MAAM,WAAW,GAAoB,EAAE,CAAC;QACxC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;YACrC,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,SAAS;YAClC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACpB,MAAM,QAAQ,GAAG,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,IAAI,QAAQ;gBAAE,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC/B,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,MAAM,EAAE,WAAW;YACnB,QAAQ;YACR,gBAAgB,EAAE,SAAS;YAC3B,GAAG,EAAE,MAAM;YACX,eAAe;YACf,gBAAgB,EAAE,CAAC,CAAC,KAAK;YACzB,UAAU,EAAE,UAAU,CAAC,WAAW;YAClC,cAAc,EAAE,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;YAChE,eAAe,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY,CAAC,KAAK,EAAE,MAAM,CAAC;SACzD,CAAC,CAAC;QAEH,uEAAuE;QACvE,gEAAgE;QAChE,yEAAyE;QACzE,qEAAqE;QACrE,cAAc;QACd,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,CAAC,CAAC;YACzF,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QACvC,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,CAAC,UAAU,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC;QACvE,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QACzE,YAAY,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAE5D,OAAO;YACL,MAAM,EAAE;gBACN,WAAW,EAAE,QAAQ,CAAC,GAAG;gBACzB,YAAY,EAAE,QAAQ,CAAC,IAAI;gBAC3B,SAAS;gBACT,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,WAAW;gBACX,MAAM;aACP;YACD,OAAO;SACR,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC,CAAC;QACrD,iEAAiE;QACjE,MAAM,OAAO,EAAE,CAAC;QAChB,MAAM,GAAG,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,KAAK,UAAU,kBAAkB,CAAI,KAA8B,EAAE,KAAa;IAChF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,MAAM,OAAO,GAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC7C,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,UAAU,MAAM;QACnB,OAAO,SAAS,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,CAAC,GAAG,SAAS,EAAE,CAAC;YACtB,OAAO,CAAC,CAAC,CAAC,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QAChC,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAC1F,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACvE,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,eAAe;IACtB,OAAO,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,WAAW,CAAC,MAAkB;IACrC,MAAM,WAAW,GAAG,MAAM,CAAC,GAAG,IAAI,gBAAgB,CAAC;IACnD,MAAM,WAAW,GAAG,CAAC,GAAG,WAAW,EAAE,GAAG,WAAW,CAAC,CAAC;IAErD,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC9B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,SAAS,EAAE,CAAC;YACnC,GAAG,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,eAAe,CAAC,MAAgC;IACvD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,MAAM,MAAM,GAAiD,EAAE,CAAC;IAEhE,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAgB,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAEnF,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC;QAChC,MAAM,KAAK,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAClD,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAEhC,MAAM,IAAI,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB,EAAE,OAAoB;IACzD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;IAE7G,OAAO;QACL,OAAO,EAAE,OAAO;QAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ;QACjC,OAAO;QACP,OAAO,EAAE;YACP,KAAK,EAAE,OAAO,CAAC,MAAM;YACrB,SAAS;YACT,MAAM,EAAE,OAAO,CAAC,MAAM,GAAG,SAAS;SACnC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import type { InteractionCategory, Interaction, InteractionAudit, NecessityJudgment, CategoryScore } from "../types/scoring.js";
|
|
2
|
+
/** How each audit dimension contributes to a category's raw score. */
|
|
3
|
+
export declare const CATEGORY_DIMENSION_WEIGHTS: Record<InteractionCategory, Record<string, number>>;
|
|
4
|
+
/** Calibration parameters for the log-normal CDF mapping. */
|
|
5
|
+
export interface CalibrationParams {
|
|
6
|
+
/** The raw score (0-1) that maps to 50/100. */
|
|
7
|
+
median: number;
|
|
8
|
+
/** Controls the spread — lower = steeper curve. */
|
|
9
|
+
sigma: number;
|
|
10
|
+
}
|
|
11
|
+
/** Initial calibration — to be tuned from real-world data. */
|
|
12
|
+
export declare const DEFAULT_CALIBRATION: Record<InteractionCategory, CalibrationParams>;
|
|
13
|
+
export declare const DEFAULT_AUDIT_SCORES: {
|
|
14
|
+
readonly success: 1;
|
|
15
|
+
readonly speed: 1;
|
|
16
|
+
readonly weight: 1;
|
|
17
|
+
readonly contextRelevance: 1;
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Approximation of the standard normal CDF using Abramowitz & Stegun.
|
|
21
|
+
*/
|
|
22
|
+
export declare function normalCDF(x: number): number;
|
|
23
|
+
/**
|
|
24
|
+
* Map a raw 0-1 score through a log-normal CDF to produce 0-100.
|
|
25
|
+
*
|
|
26
|
+
* The log-normal mapping ensures:
|
|
27
|
+
* - Improving from bad (20) to mediocre (50) is "easier" (smaller raw improvement needed)
|
|
28
|
+
* - Improving from good (80) to great (95) requires significant raw improvement
|
|
29
|
+
* - The mapping is S-shaped, rewarding getting out of the "bad" zone
|
|
30
|
+
*/
|
|
31
|
+
export declare function logNormalScore(rawScore: number, median: number, sigma: number): number;
|
|
32
|
+
/**
|
|
33
|
+
* Severity-weighted average: bad scores pull harder than good scores push.
|
|
34
|
+
*
|
|
35
|
+
* Each value's effective weight is `(1 - value)² + 1`. Perfect scores (1.0)
|
|
36
|
+
* get weight 1, while worse scores get progressively heavier, making outlier
|
|
37
|
+
* problems hard to hide behind many good results.
|
|
38
|
+
*
|
|
39
|
+
* @param values - Scores in the 0-1 range
|
|
40
|
+
*/
|
|
41
|
+
export declare function severityWeightedAverage(values: number[]): number;
|
|
42
|
+
/**
|
|
43
|
+
* Aggregate a single audit dimension across interactions in a category,
|
|
44
|
+
* weighted by each interaction's contextBytes.
|
|
45
|
+
*/
|
|
46
|
+
export declare function aggregateDimension(audits: InteractionAudit[], interactions: Interaction[], dimension: "success" | "speed" | "weight" | "contextRelevance"): number;
|
|
47
|
+
/**
|
|
48
|
+
* Compute the full category score from audits, necessity judgment, and interactions.
|
|
49
|
+
* Applies dimension weights and log-normal mapping.
|
|
50
|
+
*/
|
|
51
|
+
export declare function computeCategoryScore(category: InteractionCategory, audits: InteractionAudit[], necessity: NecessityJudgment, interactions: Interaction[], calibration?: CalibrationParams): CategoryScore;
|
|
52
|
+
//# sourceMappingURL=category-score.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"category-score.d.ts","sourceRoot":"","sources":["../../src/scoring/category-score.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,mBAAmB,EACnB,WAAW,EACX,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACd,MAAM,qBAAqB,CAAC;AAI7B,sEAAsE;AACtE,eAAO,MAAM,0BAA0B,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAsB1F,CAAC;AAIF,6DAA6D;AAC7D,MAAM,WAAW,iBAAiB;IAChC,+CAA+C;IAC/C,MAAM,EAAE,MAAM,CAAC;IACf,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;CACf;AAED,8DAA8D;AAC9D,eAAO,MAAM,mBAAmB,EAAE,MAAM,CAAC,mBAAmB,EAAE,iBAAiB,CAI9E,CAAC;AAKF,eAAO,MAAM,oBAAoB;;;;;CAKvB,CAAC;AAIX;;GAEG;AACH,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAc3C;AAED;;;;;;;GAOG;AACH,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAQtF;AAID;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAahE;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,gBAAgB,EAAE,EAC1B,YAAY,EAAE,WAAW,EAAE,EAC3B,SAAS,EAAE,SAAS,GAAG,OAAO,GAAG,QAAQ,GAAG,kBAAkB,GAC7D,MAAM,CAqBR;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,mBAAmB,EAC7B,MAAM,EAAE,gBAAgB,EAAE,EAC1B,SAAS,EAAE,iBAAiB,EAC5B,YAAY,EAAE,WAAW,EAAE,EAC3B,WAAW,CAAC,EAAE,iBAAiB,GAC9B,aAAa,CAwCf"}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
// --- Dimension weights per category ---
|
|
2
|
+
/** How each audit dimension contributes to a category's raw score. */
|
|
3
|
+
export const CATEGORY_DIMENSION_WEIGHTS = {
|
|
4
|
+
environment: {
|
|
5
|
+
success: 0.35, // env tool failures are critical
|
|
6
|
+
speed: 0.15, // speed matters less for env
|
|
7
|
+
weight: 0.15, // output size is somewhat relevant
|
|
8
|
+
relevance: 0.15, // was the output useful
|
|
9
|
+
necessity: 0.2, // did we need to do this at all
|
|
10
|
+
},
|
|
11
|
+
service: {
|
|
12
|
+
success: 0.25, // service failures matter
|
|
13
|
+
speed: 0.15, // API latency
|
|
14
|
+
weight: 0.2, // did we fetch too much / too little
|
|
15
|
+
relevance: 0.2, // was the API data actionable
|
|
16
|
+
necessity: 0.2, // were these calls needed
|
|
17
|
+
},
|
|
18
|
+
agent: {
|
|
19
|
+
success: 0.15, // agent rarely "fails" explicitly
|
|
20
|
+
speed: 0.15, // thinking time
|
|
21
|
+
weight: 0.2, // was the reasoning concise
|
|
22
|
+
relevance: 0.25, // was the reasoning productive
|
|
23
|
+
necessity: 0.25, // was the reasoning needed
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
/** Initial calibration — to be tuned from real-world data. */
|
|
27
|
+
export const DEFAULT_CALIBRATION = {
|
|
28
|
+
environment: { median: 0.75, sigma: 0.5 },
|
|
29
|
+
service: { median: 0.65, sigma: 0.5 },
|
|
30
|
+
agent: { median: 0.7, sigma: 0.5 },
|
|
31
|
+
};
|
|
32
|
+
// --- Default scores for interactions the LLM missed ---
|
|
33
|
+
// If nothing was evaluated, assume perfect — only real issues lower the score.
|
|
34
|
+
export const DEFAULT_AUDIT_SCORES = {
|
|
35
|
+
success: 1.0,
|
|
36
|
+
speed: 1.0,
|
|
37
|
+
weight: 1.0,
|
|
38
|
+
contextRelevance: 1.0,
|
|
39
|
+
};
|
|
40
|
+
// --- Math utilities ---
|
|
41
|
+
/**
|
|
42
|
+
* Approximation of the standard normal CDF using Abramowitz & Stegun.
|
|
43
|
+
*/
|
|
44
|
+
export function normalCDF(x) {
|
|
45
|
+
const a1 = 0.254829592;
|
|
46
|
+
const a2 = -0.284496736;
|
|
47
|
+
const a3 = 1.421413741;
|
|
48
|
+
const a4 = -1.453152027;
|
|
49
|
+
const a5 = 1.061405429;
|
|
50
|
+
const p = 0.3275911;
|
|
51
|
+
const sign = x < 0 ? -1 : 1;
|
|
52
|
+
const absX = Math.abs(x) / Math.SQRT2;
|
|
53
|
+
const t = 1.0 / (1.0 + p * absX);
|
|
54
|
+
const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX);
|
|
55
|
+
return 0.5 * (1.0 + sign * y);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Map a raw 0-1 score through a log-normal CDF to produce 0-100.
|
|
59
|
+
*
|
|
60
|
+
* The log-normal mapping ensures:
|
|
61
|
+
* - Improving from bad (20) to mediocre (50) is "easier" (smaller raw improvement needed)
|
|
62
|
+
* - Improving from good (80) to great (95) requires significant raw improvement
|
|
63
|
+
* - The mapping is S-shaped, rewarding getting out of the "bad" zone
|
|
64
|
+
*/
|
|
65
|
+
export function logNormalScore(rawScore, median, sigma) {
|
|
66
|
+
if (rawScore <= 0)
|
|
67
|
+
return 0;
|
|
68
|
+
if (rawScore >= 1)
|
|
69
|
+
return 100;
|
|
70
|
+
const z = (Math.log(rawScore) - Math.log(median)) / sigma;
|
|
71
|
+
const cdf = normalCDF(z);
|
|
72
|
+
return Math.round(cdf * 100);
|
|
73
|
+
}
|
|
74
|
+
// --- Aggregation ---
|
|
75
|
+
/**
|
|
76
|
+
* Severity-weighted average: bad scores pull harder than good scores push.
|
|
77
|
+
*
|
|
78
|
+
* Each value's effective weight is `(1 - value)² + 1`. Perfect scores (1.0)
|
|
79
|
+
* get weight 1, while worse scores get progressively heavier, making outlier
|
|
80
|
+
* problems hard to hide behind many good results.
|
|
81
|
+
*
|
|
82
|
+
* @param values - Scores in the 0-1 range
|
|
83
|
+
*/
|
|
84
|
+
export function severityWeightedAverage(values) {
|
|
85
|
+
if (values.length === 0)
|
|
86
|
+
return 1.0;
|
|
87
|
+
let totalWeight = 0;
|
|
88
|
+
let weightedSum = 0;
|
|
89
|
+
for (const v of values) {
|
|
90
|
+
const w = (1 - v) ** 2 + 1;
|
|
91
|
+
weightedSum += v * w;
|
|
92
|
+
totalWeight += w;
|
|
93
|
+
}
|
|
94
|
+
return weightedSum / totalWeight;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Aggregate a single audit dimension across interactions in a category,
|
|
98
|
+
* weighted by each interaction's contextBytes.
|
|
99
|
+
*/
|
|
100
|
+
export function aggregateDimension(audits, interactions, dimension) {
|
|
101
|
+
if (audits.length === 0)
|
|
102
|
+
return DEFAULT_AUDIT_SCORES[dimension];
|
|
103
|
+
// Speed uses severity-weighted average — bad latency should pull harder
|
|
104
|
+
if (dimension === "speed") {
|
|
105
|
+
return severityWeightedAverage(audits.map((a) => a.speed));
|
|
106
|
+
}
|
|
107
|
+
const interactionMap = new Map(interactions.map((i) => [i.id, i]));
|
|
108
|
+
let totalWeight = 0;
|
|
109
|
+
let weightedSum = 0;
|
|
110
|
+
for (const audit of audits) {
|
|
111
|
+
const interaction = interactionMap.get(audit.id);
|
|
112
|
+
const w = Math.max(1, interaction?.contextBytes ?? 1);
|
|
113
|
+
weightedSum += audit[dimension] * w;
|
|
114
|
+
totalWeight += w;
|
|
115
|
+
}
|
|
116
|
+
return totalWeight > 0 ? weightedSum / totalWeight : DEFAULT_AUDIT_SCORES[dimension];
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Compute the full category score from audits, necessity judgment, and interactions.
|
|
120
|
+
* Applies dimension weights and log-normal mapping.
|
|
121
|
+
*/
|
|
122
|
+
export function computeCategoryScore(category, audits, necessity, interactions, calibration) {
|
|
123
|
+
const categoryInteractions = interactions.filter((i) => i.categories.includes(category));
|
|
124
|
+
const categoryAudits = audits.filter((a) => a.categories.includes(category));
|
|
125
|
+
const auditedCount = categoryAudits.filter((a) => a.rationale !== "default").length;
|
|
126
|
+
const weights = CATEGORY_DIMENSION_WEIGHTS[category];
|
|
127
|
+
const cal = calibration ?? DEFAULT_CALIBRATION[category];
|
|
128
|
+
// Aggregate each dimension
|
|
129
|
+
const successRaw = aggregateDimension(categoryAudits, categoryInteractions, "success");
|
|
130
|
+
const speedRaw = aggregateDimension(categoryAudits, categoryInteractions, "speed");
|
|
131
|
+
const weightRaw = aggregateDimension(categoryAudits, categoryInteractions, "weight");
|
|
132
|
+
const relevanceRaw = aggregateDimension(categoryAudits, categoryInteractions, "contextRelevance");
|
|
133
|
+
const necessityRaw = necessity.score;
|
|
134
|
+
// Weighted composite raw score (0-1)
|
|
135
|
+
const rawScore = successRaw * weights.success +
|
|
136
|
+
speedRaw * weights.speed +
|
|
137
|
+
weightRaw * weights.weight +
|
|
138
|
+
relevanceRaw * weights.relevance +
|
|
139
|
+
necessityRaw * weights.necessity;
|
|
140
|
+
// Map through log-normal CDF
|
|
141
|
+
const score = logNormalScore(rawScore, cal.median, cal.sigma);
|
|
142
|
+
return {
|
|
143
|
+
score,
|
|
144
|
+
interactionCount: categoryInteractions.length,
|
|
145
|
+
auditedCount,
|
|
146
|
+
dimensions: {
|
|
147
|
+
success: Math.round(successRaw * 100),
|
|
148
|
+
speed: Math.round(speedRaw * 100),
|
|
149
|
+
weight: Math.round(weightRaw * 100),
|
|
150
|
+
relevance: Math.round(relevanceRaw * 100),
|
|
151
|
+
necessity: Math.round(necessityRaw * 100),
|
|
152
|
+
},
|
|
153
|
+
audits: categoryAudits,
|
|
154
|
+
necessity,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
//# sourceMappingURL=category-score.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"category-score.js","sourceRoot":"","sources":["../../src/scoring/category-score.ts"],"names":[],"mappings":"AAQA,yCAAyC;AAEzC,sEAAsE;AACtE,MAAM,CAAC,MAAM,0BAA0B,GAAwD;IAC7F,WAAW,EAAE;QACX,OAAO,EAAE,IAAI,EAAE,iCAAiC;QAChD,KAAK,EAAE,IAAI,EAAE,6BAA6B;QAC1C,MAAM,EAAE,IAAI,EAAE,mCAAmC;QACjD,SAAS,EAAE,IAAI,EAAE,wBAAwB;QACzC,SAAS,EAAE,GAAG,EAAE,gCAAgC;KACjD;IACD,OAAO,EAAE;QACP,OAAO,EAAE,IAAI,EAAE,0BAA0B;QACzC,KAAK,EAAE,IAAI,EAAE,cAAc;QAC3B,MAAM,EAAE,GAAG,EAAE,qCAAqC;QAClD,SAAS,EAAE,GAAG,EAAE,8BAA8B;QAC9C,SAAS,EAAE,GAAG,EAAE,0BAA0B;KAC3C;IACD,KAAK,EAAE;QACL,OAAO,EAAE,IAAI,EAAE,kCAAkC;QACjD,KAAK,EAAE,IAAI,EAAE,gBAAgB;QAC7B,MAAM,EAAE,GAAG,EAAE,4BAA4B;QACzC,SAAS,EAAE,IAAI,EAAE,+BAA+B;QAChD,SAAS,EAAE,IAAI,EAAE,2BAA2B;KAC7C;CACF,CAAC;AAYF,8DAA8D;AAC9D,MAAM,CAAC,MAAM,mBAAmB,GAAmD;IACjF,WAAW,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE;IACzC,OAAO,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE;IACrC,KAAK,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE;CACnC,CAAC;AAEF,yDAAyD;AACzD,+EAA+E;AAE/E,MAAM,CAAC,MAAM,oBAAoB,GAAG;IAClC,OAAO,EAAE,GAAG;IACZ,KAAK,EAAE,GAAG;IACV,MAAM,EAAE,GAAG;IACX,gBAAgB,EAAE,GAAG;CACb,CAAC;AAEX,yBAAyB;AAEzB;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,CAAS;IACjC,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC;IACxB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC;IACxB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,CAAC,GAAG,SAAS,CAAC;IAEpB,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC;IACtC,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACjC,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAE5F,OAAO,GAAG,GAAG,CAAC,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;AAChC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB,EAAE,MAAc,EAAE,KAAa;IAC5E,IAAI,QAAQ,IAAI,CAAC;QAAE,OAAO,CAAC,CAAC;IAC5B,IAAI,QAAQ,IAAI,CAAC;QAAE,OAAO,GAAG,CAAC;IAE9B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,KAAK,CAAC;IAC1D,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IAEzB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;AAC/B,CAAC;AAED,sBAAsB;AAEtB;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CAAC,MAAgB;IACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAEpC,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC3B,WAAW,IAAI,CAAC,GAAG,CAAC,CAAC;QACrB,WAAW,IAAI,CAAC,CAAC;IACnB,CAAC;IAED,OAAO,WAAW,GAAG,WAAW,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAA0B,EAC1B,YAA2B,EAC3B,SAA8D;IAE9D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,oBAAoB,CAAC,SAAS,CAAC,CAAC;IAEhE,wEAAwE;IACxE,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;QAC1B,OAAO,uBAAuB,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAC7D,CAAC;IAED,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnE,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,WAAW,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACjD,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC;QACtD,WAAW,IAAI,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACpC,WAAW,IAAI,CAAC,CAAC;IACnB,CAAC;IAED,OAAO,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC;AACvF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAClC,QAA6B,EAC7B,MAA0B,EAC1B,SAA4B,EAC5B,YAA2B,EAC3B,WAA+B;IAE/B,MAAM,oBAAoB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;IACzF,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC7E,MAAM,YAAY,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;IAEpF,MAAM,OAAO,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,GAAG,GAAG,WAAW,IAAI,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IAEzD,2BAA2B;IAC3B,MAAM,UAAU,GAAG,kBAAkB,CAAC,cAAc,EAAE,oBAAoB,EAAE,SAAS,CAAC,CAAC;IACvF,MAAM,QAAQ,GAAG,kBAAkB,CAAC,cAAc,EAAE,oBAAoB,EAAE,OAAO,CAAC,CAAC;IACnF,MAAM,SAAS,GAAG,kBAAkB,CAAC,cAAc,EAAE,oBAAoB,EAAE,QAAQ,CAAC,CAAC;IACrF,MAAM,YAAY,GAAG,kBAAkB,CAAC,cAAc,EAAE,oBAAoB,EAAE,kBAAkB,CAAC,CAAC;IAClG,MAAM,YAAY,GAAG,SAAS,CAAC,KAAK,CAAC;IAErC,qCAAqC;IACrC,MAAM,QAAQ,GACZ,UAAU,GAAG,OAAO,CAAC,OAAO;QAC5B,QAAQ,GAAG,OAAO,CAAC,KAAK;QACxB,SAAS,GAAG,OAAO,CAAC,MAAM;QAC1B,YAAY,GAAG,OAAO,CAAC,SAAS;QAChC,YAAY,GAAG,OAAO,CAAC,SAAS,CAAC;IAEnC,6BAA6B;IAC7B,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;IAE9D,OAAO;QACL,KAAK;QACL,gBAAgB,EAAE,oBAAoB,CAAC,MAAM;QAC7C,YAAY;QACZ,UAAU,EAAE;YACV,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,GAAG,CAAC;YACrC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;YACjC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC;YACnC,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC;YACzC,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC;SAC1C;QACD,MAAM,EAAE,cAAc;QACtB,SAAS;KACV,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { ScoringWeights } from "../types/config.js";
|
|
2
|
+
/** Validate that scoring weights are positive and sum to approximately 1.0. */
|
|
3
|
+
export declare function validateWeights(weights: ScoringWeights): void;
|
|
4
|
+
export declare function computeComposite(goalAchievementScore: number, environmentScore: number, serviceScore: number, agentScore: number, weights: ScoringWeights): number;
|
|
5
|
+
//# sourceMappingURL=composite.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"composite.d.ts","sourceRoot":"","sources":["../../src/scoring/composite.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAEzD,+EAA+E;AAC/E,wBAAgB,eAAe,CAAC,OAAO,EAAE,cAAc,GAAG,IAAI,CAkB7D;AAED,wBAAgB,gBAAgB,CAC9B,oBAAoB,EAAE,MAAM,EAC5B,gBAAgB,EAAE,MAAM,EACxB,YAAY,EAAE,MAAM,EACpB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,cAAc,GACtB,MAAM,CAUR"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/** Validate that scoring weights are positive and sum to approximately 1.0. */
|
|
2
|
+
export function validateWeights(weights) {
|
|
3
|
+
const { goal_achievement, environment, service, agent } = weights;
|
|
4
|
+
if (goal_achievement < 0 || environment < 0 || service < 0 || agent < 0) {
|
|
5
|
+
throw new Error("Scoring weights must be non-negative");
|
|
6
|
+
}
|
|
7
|
+
const sum = goal_achievement + environment + service + agent;
|
|
8
|
+
if (sum === 0) {
|
|
9
|
+
throw new Error("Scoring weights must not all be zero");
|
|
10
|
+
}
|
|
11
|
+
if (Math.abs(sum - 1.0) > 0.01) {
|
|
12
|
+
throw new Error(`Scoring weights must sum to 1.0 (got ${sum.toFixed(3)}). ` +
|
|
13
|
+
`Received: goal_achievement=${goal_achievement}, environment=${environment}, service=${service}, agent=${agent}`);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
export function computeComposite(goalAchievementScore, environmentScore, serviceScore, agentScore, weights) {
|
|
17
|
+
validateWeights(weights);
|
|
18
|
+
const weighted = goalAchievementScore * weights.goal_achievement +
|
|
19
|
+
environmentScore * weights.environment +
|
|
20
|
+
serviceScore * weights.service +
|
|
21
|
+
agentScore * weights.agent;
|
|
22
|
+
return Math.round(weighted);
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=composite.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"composite.js","sourceRoot":"","sources":["../../src/scoring/composite.ts"],"names":[],"mappings":"AAEA,+EAA+E;AAC/E,MAAM,UAAU,eAAe,CAAC,OAAuB;IACrD,MAAM,EAAE,gBAAgB,EAAE,WAAW,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC;IAElE,IAAI,gBAAgB,GAAG,CAAC,IAAI,WAAW,GAAG,CAAC,IAAI,OAAO,GAAG,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACxE,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,GAAG,GAAG,gBAAgB,GAAG,WAAW,GAAG,OAAO,GAAG,KAAK,CAAC;IAC7D,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;QACd,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,IAAI,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,wCAAwC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;YACzD,8BAA8B,gBAAgB,iBAAiB,WAAW,aAAa,OAAO,WAAW,KAAK,EAAE,CACnH,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,oBAA4B,EAC5B,gBAAwB,EACxB,YAAoB,EACpB,UAAkB,EAClB,OAAuB;IAEvB,eAAe,CAAC,OAAO,CAAC,CAAC;IAEzB,MAAM,QAAQ,GACZ,oBAAoB,GAAG,OAAO,CAAC,gBAAgB;QAC/C,gBAAgB,GAAG,OAAO,CAAC,WAAW;QACtC,YAAY,GAAG,OAAO,CAAC,OAAO;QAC9B,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC;IAE7B,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { NormalizedTranscript } from "../transcript/types.js";
|
|
2
|
+
import type { RunResult } from "../types/output.js";
|
|
3
|
+
import type { DeepEvalResult, Interaction, SparseIndex, TriageResult } from "../types/scoring.js";
|
|
4
|
+
/**
|
|
5
|
+
* Run the deep evaluation LLM pass.
|
|
6
|
+
*
|
|
7
|
+
* Speed is always computed heuristically from interaction timing data (no LLM needed).
|
|
8
|
+
* The LLM evaluates ALL interactions for success, weight, contextRelevance,
|
|
9
|
+
* and necessity per category.
|
|
10
|
+
*/
|
|
11
|
+
export declare function runDeepEval(result: RunResult, sparseIndex: SparseIndex, triage: TriageResult, normalized: NormalizedTranscript): Promise<DeepEvalResult>;
|
|
12
|
+
/**
|
|
13
|
+
* Compute a heuristic speed score (0-1) for an interaction based on
|
|
14
|
+
* duration and category. Deterministic — no LLM needed.
|
|
15
|
+
*
|
|
16
|
+
* Thresholds are generous to account for system overhead
|
|
17
|
+
* (SDK roundtrips, sandbox setup, process spawning).
|
|
18
|
+
*/
|
|
19
|
+
export declare function computeHeuristicSpeed(interaction: Interaction): number;
|
|
20
|
+
/**
|
|
21
|
+
* Parse the deep eval LLM response.
|
|
22
|
+
* Fills in default audits for interactions the LLM missed and default necessity for missing categories.
|
|
23
|
+
*/
|
|
24
|
+
export declare function parseDeepEvalResponse(responseText: string, sparseIndex: SparseIndex): DeepEvalResult;
|
|
25
|
+
//# sourceMappingURL=deep-eval.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"deep-eval.d.ts","sourceRoot":"","sources":["../../src/scoring/deep-eval.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAmB,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AACpF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,KAAK,EACV,cAAc,EACd,WAAW,EAIX,WAAW,EACX,YAAY,EACb,MAAM,qBAAqB,CAAC;AAa7B;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,MAAM,EAAE,SAAS,EACjB,WAAW,EAAE,WAAW,EACxB,MAAM,EAAE,YAAY,EACpB,UAAU,EAAE,oBAAoB,GAC/B,OAAO,CAAC,cAAc,CAAC,CAoBzB;AAED;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,WAAW,EAAE,WAAW,GAAG,MAAM,CAgCtE;AAqMD;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,GAAG,cAAc,CA+CpG"}
|