agentera 0.0.0 → 3.0.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -45
- package/bundle/.agentera-npx-bundle.json +4 -0
- package/bundle/references/adapters/cursor.md +213 -0
- package/bundle/references/adapters/opencode.md +530 -0
- package/bundle/references/adapters/package-manifest-interface-model.yaml +337 -0
- package/bundle/references/adapters/package-registry.yaml +247 -0
- package/bundle/references/adapters/package-surface-characterization.md +48 -0
- package/bundle/references/adapters/runtime-adapter-characterization.md +79 -0
- package/bundle/references/adapters/runtime-adapter-interface-model.yaml +200 -0
- package/bundle/references/adapters/runtime-adapter-registry.yaml +548 -0
- package/bundle/references/adapters/runtime-feature-parity.md +189 -0
- package/bundle/references/analysis/benchmark.md +267 -0
- package/bundle/references/analysis/startup-measurement-contract.yaml +424 -0
- package/bundle/references/artifacts/artifact-registry-interface-model.yaml +288 -0
- package/bundle/references/cli/agent-ready-state-contract.yaml +950 -0
- package/bundle/references/cli/app-lifecycle-vocabulary.yaml +233 -0
- package/bundle/references/cli/audience-namespace-cli-migration.yaml +355 -0
- package/bundle/references/cli/bundle-skill-vocabulary.yaml +278 -0
- package/bundle/references/cli/capability-instruction-contract.yaml +123 -0
- package/bundle/references/cli/capability-tool-classification.yaml +53 -0
- package/bundle/references/cli/routing-execution-vocabulary.yaml +281 -0
- package/bundle/references/cli/update-channels.yaml +120 -0
- package/bundle/references/cli/vocabulary-index.yaml +160 -0
- package/bundle/references/cli/vocabulary.md +562 -0
- package/bundle/references/meta/documentation-inventory.md +43 -0
- package/bundle/references/v1-section-mapping.md +47 -0
- package/bundle/registry.json +39 -0
- package/bundle/skills/agentera/.claude-plugin/plugin.json +27 -0
- package/bundle/skills/agentera/SKILL.md +470 -0
- package/bundle/skills/agentera/agents/dokumentera.toml +6 -0
- package/bundle/skills/agentera/agents/hej.toml +6 -0
- package/bundle/skills/agentera/agents/inspektera.toml +6 -0
- package/bundle/skills/agentera/agents/inspirera.toml +6 -0
- package/bundle/skills/agentera/agents/optimera.toml +6 -0
- package/bundle/skills/agentera/agents/orkestrera.toml +6 -0
- package/bundle/skills/agentera/agents/planera.toml +6 -0
- package/bundle/skills/agentera/agents/profilera.toml +6 -0
- package/bundle/skills/agentera/agents/realisera.toml +6 -0
- package/bundle/skills/agentera/agents/resonera.toml +6 -0
- package/bundle/skills/agentera/agents/visionera.toml +6 -0
- package/bundle/skills/agentera/agents/visualisera.toml +6 -0
- package/bundle/skills/agentera/capabilities/dokumentera/instructions.md +428 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/artifacts.yaml +73 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/triggers.yaml +35 -0
- package/bundle/skills/agentera/capabilities/dokumentera/schemas/validation.yaml +139 -0
- package/bundle/skills/agentera/capabilities/hej/instructions.md +331 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/artifacts.yaml +69 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/exit.yaml +32 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/triggers.yaml +58 -0
- package/bundle/skills/agentera/capabilities/hej/schemas/validation.yaml +55 -0
- package/bundle/skills/agentera/capabilities/inspektera/instructions.md +514 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/artifacts.yaml +76 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/exit.yaml +36 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/triggers.yaml +38 -0
- package/bundle/skills/agentera/capabilities/inspektera/schemas/validation.yaml +113 -0
- package/bundle/skills/agentera/capabilities/inspirera/instructions.md +280 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/artifacts.yaml +24 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/exit.yaml +33 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/triggers.yaml +34 -0
- package/bundle/skills/agentera/capabilities/inspirera/schemas/validation.yaml +58 -0
- package/bundle/skills/agentera/capabilities/optimera/instructions.md +437 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/artifacts.yaml +69 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/triggers.yaml +39 -0
- package/bundle/skills/agentera/capabilities/optimera/schemas/validation.yaml +91 -0
- package/bundle/skills/agentera/capabilities/orkestrera/instructions.md +433 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/artifacts.yaml +64 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/exit.yaml +34 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/triggers.yaml +42 -0
- package/bundle/skills/agentera/capabilities/orkestrera/schemas/validation.yaml +107 -0
- package/bundle/skills/agentera/capabilities/planera/instructions.md +368 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/artifacts.yaml +62 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/exit.yaml +33 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/triggers.yaml +34 -0
- package/bundle/skills/agentera/capabilities/planera/schemas/validation.yaml +61 -0
- package/bundle/skills/agentera/capabilities/profilera/instructions.md +419 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/artifacts.yaml +18 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/exit.yaml +34 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/triggers.yaml +45 -0
- package/bundle/skills/agentera/capabilities/profilera/schemas/validation.yaml +57 -0
- package/bundle/skills/agentera/capabilities/realisera/instructions.md +403 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/artifacts.yaml +80 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/triggers.yaml +39 -0
- package/bundle/skills/agentera/capabilities/realisera/schemas/validation.yaml +110 -0
- package/bundle/skills/agentera/capabilities/resonera/instructions.md +329 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/artifacts.yaml +47 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/triggers.yaml +46 -0
- package/bundle/skills/agentera/capabilities/resonera/schemas/validation.yaml +77 -0
- package/bundle/skills/agentera/capabilities/visionera/instructions.md +309 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/artifacts.yaml +57 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/exit.yaml +35 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/triggers.yaml +41 -0
- package/bundle/skills/agentera/capabilities/visionera/schemas/validation.yaml +74 -0
- package/bundle/skills/agentera/capabilities/visualisera/instructions.md +400 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/artifacts.yaml +44 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/exit.yaml +34 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/triggers.yaml +33 -0
- package/bundle/skills/agentera/capabilities/visualisera/schemas/validation.yaml +80 -0
- package/bundle/skills/agentera/capability_schema_contract.yaml +385 -0
- package/bundle/skills/agentera/protocol.yaml +463 -0
- package/bundle/skills/agentera/references/contract.md +1039 -0
- package/bundle/skills/agentera/schemas/artifacts/changelog.yaml +60 -0
- package/bundle/skills/agentera/schemas/artifacts/decisions.yaml +461 -0
- package/bundle/skills/agentera/schemas/artifacts/design.yaml +55 -0
- package/bundle/skills/agentera/schemas/artifacts/docs.yaml +402 -0
- package/bundle/skills/agentera/schemas/artifacts/experiments.yaml +373 -0
- package/bundle/skills/agentera/schemas/artifacts/health.yaml +484 -0
- package/bundle/skills/agentera/schemas/artifacts/objective.yaml +399 -0
- package/bundle/skills/agentera/schemas/artifacts/plan.yaml +342 -0
- package/bundle/skills/agentera/schemas/artifacts/progress.yaml +325 -0
- package/bundle/skills/agentera/schemas/artifacts/todo.yaml +110 -0
- package/bundle/skills/agentera/schemas/artifacts/vision.yaml +262 -0
- package/bundle/skills/hej/.claude-plugin/plugin.json +6 -0
- package/bundle/skills/hej/SKILL.md +69 -0
- package/bundle/skills/hej/agents/hej.toml +11 -0
- package/bundle/skills/hej/agents/openai.yaml +8 -0
- package/dist/analytics/extractCorpus.js +1791 -0
- package/dist/analytics/extractCorpus.js.map +1 -0
- package/dist/analytics/usageStats.js +487 -0
- package/dist/analytics/usageStats.js.map +1 -0
- package/dist/bin/agentera.js +4 -0
- package/dist/bin/agentera.js.map +1 -0
- package/dist/cli/appContext.js +226 -0
- package/dist/cli/appContext.js.map +1 -0
- package/dist/cli/argvalidate.js +41 -0
- package/dist/cli/argvalidate.js.map +1 -0
- package/dist/cli/capabilityContext.js +2421 -0
- package/dist/cli/capabilityContext.js.map +1 -0
- package/dist/cli/commands/backfill.js +84 -0
- package/dist/cli/commands/backfill.js.map +1 -0
- package/dist/cli/commands/capability.js +44 -0
- package/dist/cli/commands/capability.js.map +1 -0
- package/dist/cli/commands/compact.js +148 -0
- package/dist/cli/commands/compact.js.map +1 -0
- package/dist/cli/commands/doctor.js +180 -0
- package/dist/cli/commands/doctor.js.map +1 -0
- package/dist/cli/commands/lint.js +179 -0
- package/dist/cli/commands/lint.js.map +1 -0
- package/dist/cli/commands/prime.js +545 -0
- package/dist/cli/commands/prime.js.map +1 -0
- package/dist/cli/commands/query.js +346 -0
- package/dist/cli/commands/query.js.map +1 -0
- package/dist/cli/commands/report.js +210 -0
- package/dist/cli/commands/report.js.map +1 -0
- package/dist/cli/commands/schema.js +306 -0
- package/dist/cli/commands/schema.js.map +1 -0
- package/dist/cli/commands/state.js +1012 -0
- package/dist/cli/commands/state.js.map +1 -0
- package/dist/cli/commands/upgrade.js +49 -0
- package/dist/cli/commands/upgrade.js.map +1 -0
- package/dist/cli/commands/validate.js +519 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/commands/verify.js +204 -0
- package/dist/cli/commands/verify.js.map +1 -0
- package/dist/cli/dispatch.js +962 -0
- package/dist/cli/dispatch.js.map +1 -0
- package/dist/cli/orientation.js +595 -0
- package/dist/cli/orientation.js.map +1 -0
- package/dist/cli/prime-blob.js +3 -0
- package/dist/cli/prime-blob.js.map +1 -0
- package/dist/cli/stateQuery.js +292 -0
- package/dist/cli/stateQuery.js.map +1 -0
- package/dist/cli/structured.js +18 -0
- package/dist/cli/structured.js.map +1 -0
- package/dist/core/difflib.js +274 -0
- package/dist/core/difflib.js.map +1 -0
- package/dist/core/git.js +43 -0
- package/dist/core/git.js.map +1 -0
- package/dist/core/paths.js +50 -0
- package/dist/core/paths.js.map +1 -0
- package/dist/core/pyjson.js +101 -0
- package/dist/core/pyjson.js.map +1 -0
- package/dist/core/sourceRoot.js +72 -0
- package/dist/core/sourceRoot.js.map +1 -0
- package/dist/core/toml.js +11 -0
- package/dist/core/toml.js.map +1 -0
- package/dist/core/yaml.js +25 -0
- package/dist/core/yaml.js.map +1 -0
- package/dist/eval/evalSkills.js +258 -0
- package/dist/eval/evalSkills.js.map +1 -0
- package/dist/eval/semanticEval.js +148 -0
- package/dist/eval/semanticEval.js.map +1 -0
- package/dist/eval/semanticFixtures.js +227 -0
- package/dist/eval/semanticFixtures.js.map +1 -0
- package/dist/hooks/common.js +160 -0
- package/dist/hooks/common.js.map +1 -0
- package/dist/hooks/compaction.js +935 -0
- package/dist/hooks/compaction.js.map +1 -0
- package/dist/hooks/cursorPreToolUse.js +19 -0
- package/dist/hooks/cursorPreToolUse.js.map +1 -0
- package/dist/hooks/cursorSessionStart.js +71 -0
- package/dist/hooks/cursorSessionStart.js.map +1 -0
- package/dist/hooks/sessionStart.js +209 -0
- package/dist/hooks/sessionStart.js.map +1 -0
- package/dist/hooks/sessionStop.js +212 -0
- package/dist/hooks/sessionStop.js.map +1 -0
- package/dist/hooks/validateArtifact.js +933 -0
- package/dist/hooks/validateArtifact.js.map +1 -0
- package/dist/registries/artifactRegistry.js +206 -0
- package/dist/registries/artifactRegistry.js.map +1 -0
- package/dist/registries/capabilityContract.js +310 -0
- package/dist/registries/capabilityContract.js.map +1 -0
- package/dist/registries/packageRegistry.js +641 -0
- package/dist/registries/packageRegistry.js.map +1 -0
- package/dist/registries/runtimeAdapterRegistry.js +315 -0
- package/dist/registries/runtimeAdapterRegistry.js.map +1 -0
- package/dist/setup/codex.js +1052 -0
- package/dist/setup/codex.js.map +1 -0
- package/dist/setup/copilot.js +227 -0
- package/dist/setup/copilot.js.map +1 -0
- package/dist/setup/cursor.js +127 -0
- package/dist/setup/cursor.js.map +1 -0
- package/dist/setup/doctor.js +1269 -0
- package/dist/setup/doctor.js.map +1 -0
- package/dist/state/installRoot.js +279 -0
- package/dist/state/installRoot.js.map +1 -0
- package/dist/state/progressCommit.js +289 -0
- package/dist/state/progressCommit.js.map +1 -0
- package/dist/state/startupAnalysis.js +1953 -0
- package/dist/state/startupAnalysis.js.map +1 -0
- package/dist/upgrade/appModel.js +189 -0
- package/dist/upgrade/appModel.js.map +1 -0
- package/dist/upgrade/channels.js +197 -0
- package/dist/upgrade/channels.js.map +1 -0
- package/dist/upgrade/compatibility.js +197 -0
- package/dist/upgrade/compatibility.js.map +1 -0
- package/dist/upgrade/doctor.js +368 -0
- package/dist/upgrade/doctor.js.map +1 -0
- package/dist/upgrade/migrateArtifactsV2ToV3.js +412 -0
- package/dist/upgrade/migrateArtifactsV2ToV3.js.map +1 -0
- package/dist/upgrade/upgradeCommands.js +40 -0
- package/dist/upgrade/upgradeCommands.js.map +1 -0
- package/dist/upgrade/upgradeOrchestrator.js +280 -0
- package/dist/upgrade/upgradeOrchestrator.js.map +1 -0
- package/dist/validate/appHomeContract.js +150 -0
- package/dist/validate/appHomeContract.js.map +1 -0
- package/dist/validate/capability.js +412 -0
- package/dist/validate/capability.js.map +1 -0
- package/dist/validate/crossCapability.js +145 -0
- package/dist/validate/crossCapability.js.map +1 -0
- package/dist/validate/lifecycleAdapters.js +772 -0
- package/dist/validate/lifecycleAdapters.js.map +1 -0
- package/dist/validate/selfAudit.js +107 -0
- package/dist/validate/selfAudit.js.map +1 -0
- package/package.json +28 -8
- package/LICENSE +0 -201
- package/bin/agentera.mjs +0 -50
- package/lib/exec.mjs +0 -116
- package/lib/resolve.mjs +0 -129
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { spawnSync } from "node:child_process";
|
|
4
|
+
import { resolveSourceRoot } from "../core/sourceRoot.js";
|
|
5
|
+
export const TRIGGER_PROMPTS = {
|
|
6
|
+
dokumentera: "Audit the documentation for this project.",
|
|
7
|
+
hej: "Start a new session and give me a status briefing on this project.",
|
|
8
|
+
inspektera: "Run a codebase health audit.",
|
|
9
|
+
inspirera: "Analyze https://example.com and map patterns to this project.",
|
|
10
|
+
optimera: "Optimize test suite execution time.",
|
|
11
|
+
orkestrera: "Execute the next cycle of the current plan.",
|
|
12
|
+
planera: "Plan the next feature for this project.",
|
|
13
|
+
profilera: "Generate a decision profile from session history.",
|
|
14
|
+
realisera: "Run one autonomous development cycle.",
|
|
15
|
+
resonera: "Deliberate on whether to add a new dependency.",
|
|
16
|
+
visionera: "Create a vision document for this project.",
|
|
17
|
+
visualisera: "Create a visual identity system for this project.",
|
|
18
|
+
};
|
|
19
|
+
export const DEFAULT_TIMEOUT = 120;
|
|
20
|
+
export const DEFAULT_PARALLEL = 1;
|
|
21
|
+
export class ExitError extends Error {
|
|
22
|
+
code;
|
|
23
|
+
constructor(code, message = "") {
|
|
24
|
+
super(message);
|
|
25
|
+
this.name = "ExitError";
|
|
26
|
+
this.code = code;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
const realWhich = (name) => {
|
|
30
|
+
const result = spawnSync(process.platform === "win32" ? "where" : "which", [name], {
|
|
31
|
+
encoding: "utf8",
|
|
32
|
+
});
|
|
33
|
+
return result.status === 0 ? result.stdout.trim().split(/\r?\n/)[0] : null;
|
|
34
|
+
};
|
|
35
|
+
export function detectRuntime(explicit, opts = {}) {
|
|
36
|
+
const which = opts.which ?? realWhich;
|
|
37
|
+
const err = opts.err ?? ((line) => process.stderr.write(line + "\n"));
|
|
38
|
+
if (explicit && explicit !== "auto") {
|
|
39
|
+
if (explicit === "cursor-agent") {
|
|
40
|
+
if (which("cursor-agent") === null && which("agent") === null) {
|
|
41
|
+
err("ERROR: 'cursor-agent' not found on PATH. Install Cursor Agent CLI " +
|
|
42
|
+
"and ensure the binary is accessible.");
|
|
43
|
+
throw new ExitError(1);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return explicit;
|
|
47
|
+
}
|
|
48
|
+
const hasClaude = which("claude") !== null;
|
|
49
|
+
const hasOpencode = which("opencode") !== null;
|
|
50
|
+
const hasCursorAgent = which("cursor-agent") !== null || which("agent") !== null;
|
|
51
|
+
if (hasClaude)
|
|
52
|
+
return "claude";
|
|
53
|
+
if (hasOpencode)
|
|
54
|
+
return "opencode";
|
|
55
|
+
if (hasCursorAgent)
|
|
56
|
+
return "cursor-agent";
|
|
57
|
+
err("ERROR: Neither 'claude', 'opencode', nor 'cursor-agent' found on PATH. " +
|
|
58
|
+
"Install a supported runtime host and ensure the binary is accessible.");
|
|
59
|
+
throw new ExitError(1);
|
|
60
|
+
}
|
|
61
|
+
export function parseFrontmatterName(text) {
|
|
62
|
+
if (!text.startsWith("---")) {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
const end = text.indexOf("---", 3);
|
|
66
|
+
if (end === -1) {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
const block = text.slice(3, end);
|
|
70
|
+
const m = /^name:\s*(.+)/m.exec(block);
|
|
71
|
+
return m ? m[1].trim() : null;
|
|
72
|
+
}
|
|
73
|
+
export function discoverSkills(repoRoot = resolveSourceRoot()) {
|
|
74
|
+
const skillsDir = path.join(repoRoot, "skills");
|
|
75
|
+
const entries = [];
|
|
76
|
+
if (!fs.existsSync(skillsDir) || !fs.statSync(skillsDir).isDirectory()) {
|
|
77
|
+
return entries;
|
|
78
|
+
}
|
|
79
|
+
const dirs = fs
|
|
80
|
+
.readdirSync(skillsDir)
|
|
81
|
+
.filter((d) => fs.existsSync(path.join(skillsDir, d, "SKILL.md")))
|
|
82
|
+
.sort();
|
|
83
|
+
for (const dir of dirs) {
|
|
84
|
+
const skillMd = path.join(skillsDir, dir, "SKILL.md");
|
|
85
|
+
const text = fs.readFileSync(skillMd, "utf8");
|
|
86
|
+
const name = parseFrontmatterName(text) ?? dir;
|
|
87
|
+
const prompt = TRIGGER_PROMPTS[name] ?? `Invoke the ${name} skill.`;
|
|
88
|
+
entries.push({ name, prompt });
|
|
89
|
+
}
|
|
90
|
+
return entries;
|
|
91
|
+
}
|
|
92
|
+
const realRun = (cmd, opts) => {
|
|
93
|
+
const result = spawnSync(cmd[0], cmd.slice(1), {
|
|
94
|
+
input: opts.input ?? undefined,
|
|
95
|
+
encoding: "utf8",
|
|
96
|
+
timeout: opts.timeout * 1000,
|
|
97
|
+
cwd: opts.cwd,
|
|
98
|
+
});
|
|
99
|
+
return {
|
|
100
|
+
status: result.status,
|
|
101
|
+
stdout: result.stdout ?? "",
|
|
102
|
+
stderr: result.stderr ?? "",
|
|
103
|
+
timedOut: result.signal === "SIGTERM" && result.error !== undefined,
|
|
104
|
+
};
|
|
105
|
+
};
|
|
106
|
+
export function invokeSkill(name, prompt, timeout, runtime = "claude", opts = {}) {
|
|
107
|
+
const which = opts.which ?? realWhich;
|
|
108
|
+
const run = opts.run ?? realRun;
|
|
109
|
+
const repoRoot = opts.repoRoot ?? resolveSourceRoot();
|
|
110
|
+
let cmd;
|
|
111
|
+
let stdinPrompt;
|
|
112
|
+
if (runtime === "opencode") {
|
|
113
|
+
cmd = ["opencode", "run", "--prompt"];
|
|
114
|
+
stdinPrompt = prompt;
|
|
115
|
+
}
|
|
116
|
+
else if (runtime === "cursor-agent") {
|
|
117
|
+
const binary = which("cursor-agent") ? "cursor-agent" : "agent";
|
|
118
|
+
cmd = [binary, "-p", "--output-format", "json", "--force", prompt];
|
|
119
|
+
stdinPrompt = null;
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
cmd = ["claude", "-p", "--output-format", "json"];
|
|
123
|
+
stdinPrompt = prompt;
|
|
124
|
+
}
|
|
125
|
+
const start = Date.now();
|
|
126
|
+
let error = null;
|
|
127
|
+
let status = "pass";
|
|
128
|
+
const result = run(cmd, { input: stdinPrompt, timeout, cwd: repoRoot });
|
|
129
|
+
const duration = (Date.now() - start) / 1000;
|
|
130
|
+
if (result.timedOut) {
|
|
131
|
+
error = `Timed out after ${timeout}s`;
|
|
132
|
+
status = "fail";
|
|
133
|
+
}
|
|
134
|
+
else if (result.status !== 0) {
|
|
135
|
+
const stderrSnippet = (result.stderr ?? "").trim().slice(0, 300);
|
|
136
|
+
const stdoutSnippet = (result.stdout ?? "").trim().slice(0, 300);
|
|
137
|
+
const detail = stderrSnippet || stdoutSnippet || "(no output)";
|
|
138
|
+
error = `Exit code ${result.status}: ${detail}`;
|
|
139
|
+
status = "fail";
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
const combined = (result.stdout ?? "") + (result.stderr ?? "");
|
|
143
|
+
const errorIndicators = [
|
|
144
|
+
/\bTraceback \(most recent call last\)\b/i,
|
|
145
|
+
/\bError:\s/i,
|
|
146
|
+
/\bfatal error\b/i,
|
|
147
|
+
/"is_error"\s*:\s*true/i,
|
|
148
|
+
];
|
|
149
|
+
for (const pattern of errorIndicators) {
|
|
150
|
+
const m = pattern.exec(combined);
|
|
151
|
+
if (m) {
|
|
152
|
+
const snippet = combined.slice(Math.max(0, m.index - 20), m.index + m[0].length + 80).trim();
|
|
153
|
+
error = `Error indicator in output: ${snippet.slice(0, 300)}`;
|
|
154
|
+
status = "fail";
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return {
|
|
160
|
+
skill: name,
|
|
161
|
+
status,
|
|
162
|
+
duration_s: Math.round(duration * 100) / 100,
|
|
163
|
+
error,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
export function runSkills(skills, timeout, _parallel, runtime = "claude", opts = {}) {
|
|
167
|
+
// Parallelism in the Python runner is a maintainer perf detail; the smoke
|
|
168
|
+
// semantics (one result per skill, original order) are preserved sequentially.
|
|
169
|
+
return skills.map((entry) => invokeSkill(entry.name, entry.prompt, timeout, runtime, opts));
|
|
170
|
+
}
|
|
171
|
+
export function buildReport(results) {
|
|
172
|
+
const passed = results.filter((r) => r.status === "pass").length;
|
|
173
|
+
const failed = results.length - passed;
|
|
174
|
+
return {
|
|
175
|
+
timestamp: new Date().toISOString().replace(/\.\d{3}Z$/, "Z"),
|
|
176
|
+
skills_tested: results.length,
|
|
177
|
+
passed,
|
|
178
|
+
failed,
|
|
179
|
+
results,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
export function buildDryRun(skills, runtime = "claude", runtimeSource = "auto-detected") {
|
|
183
|
+
return {
|
|
184
|
+
mode: "dry-run",
|
|
185
|
+
runtime: `${runtime} (${runtimeSource})`,
|
|
186
|
+
skills: skills.map((s) => ({ name: s.name, prompt: s.prompt })),
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
export function main(argv = [], opts = {}) {
|
|
190
|
+
const out = opts.out ?? ((line) => process.stdout.write(line + "\n"));
|
|
191
|
+
const err = opts.err ?? ((line) => process.stderr.write(line + "\n"));
|
|
192
|
+
const discover = opts.discoverSkills ?? (() => discoverSkills(opts.repoRoot));
|
|
193
|
+
const detect = opts.detectRuntime ?? detectRuntime;
|
|
194
|
+
const args = parseArgs(argv);
|
|
195
|
+
const allSkills = discover();
|
|
196
|
+
if (allSkills.length === 0) {
|
|
197
|
+
err("ERROR: No SKILL.md files found under skills/");
|
|
198
|
+
return 1;
|
|
199
|
+
}
|
|
200
|
+
let skillsToRun = allSkills;
|
|
201
|
+
if (args.skill) {
|
|
202
|
+
const matched = allSkills.filter((s) => s.name === args.skill);
|
|
203
|
+
if (matched.length === 0) {
|
|
204
|
+
const known = allSkills.map((s) => s.name).join(", ");
|
|
205
|
+
err(`ERROR: Unknown skill '${args.skill}'. Known skills: ${known}`);
|
|
206
|
+
return 1;
|
|
207
|
+
}
|
|
208
|
+
skillsToRun = matched;
|
|
209
|
+
}
|
|
210
|
+
const explicit = args.runtime !== "auto" ? args.runtime : null;
|
|
211
|
+
let runtime;
|
|
212
|
+
try {
|
|
213
|
+
runtime = detect(explicit, { which: opts.which, err });
|
|
214
|
+
}
|
|
215
|
+
catch (exc) {
|
|
216
|
+
if (exc instanceof ExitError) {
|
|
217
|
+
return exc.code;
|
|
218
|
+
}
|
|
219
|
+
throw exc;
|
|
220
|
+
}
|
|
221
|
+
const runtimeSource = explicit ? "--runtime flag" : "auto-detected";
|
|
222
|
+
if (args.dry_run) {
|
|
223
|
+
out(JSON.stringify(buildDryRun(skillsToRun, runtime, runtimeSource), null, 2));
|
|
224
|
+
return 0;
|
|
225
|
+
}
|
|
226
|
+
const results = runSkills(skillsToRun, args.timeout, args.parallel, runtime, {
|
|
227
|
+
which: opts.which,
|
|
228
|
+
run: opts.run,
|
|
229
|
+
repoRoot: opts.repoRoot,
|
|
230
|
+
});
|
|
231
|
+
const report = buildReport(results);
|
|
232
|
+
out(JSON.stringify(report, null, 2));
|
|
233
|
+
return report.failed === 0 ? 0 : 1;
|
|
234
|
+
}
|
|
235
|
+
export function parseArgs(argv) {
|
|
236
|
+
const args = {
|
|
237
|
+
skill: null,
|
|
238
|
+
dry_run: false,
|
|
239
|
+
parallel: DEFAULT_PARALLEL,
|
|
240
|
+
timeout: DEFAULT_TIMEOUT,
|
|
241
|
+
runtime: "auto",
|
|
242
|
+
};
|
|
243
|
+
for (let i = 0; i < argv.length; i++) {
|
|
244
|
+
const arg = argv[i];
|
|
245
|
+
if (arg === "--skill")
|
|
246
|
+
args.skill = argv[++i];
|
|
247
|
+
else if (arg === "--dry-run")
|
|
248
|
+
args.dry_run = true;
|
|
249
|
+
else if (arg === "--parallel")
|
|
250
|
+
args.parallel = parseInt(argv[++i], 10);
|
|
251
|
+
else if (arg === "--timeout")
|
|
252
|
+
args.timeout = parseInt(argv[++i], 10);
|
|
253
|
+
else if (arg === "--runtime")
|
|
254
|
+
args.runtime = argv[++i];
|
|
255
|
+
}
|
|
256
|
+
return args;
|
|
257
|
+
}
|
|
258
|
+
//# sourceMappingURL=evalSkills.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evalSkills.js","sourceRoot":"","sources":["../../src/eval/evalSkills.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAE/C,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAU1D,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,WAAW,EAAE,2CAA2C;IACxD,GAAG,EAAE,oEAAoE;IACzE,UAAU,EAAE,8BAA8B;IAC1C,SAAS,EAAE,+DAA+D;IAC1E,QAAQ,EAAE,qCAAqC;IAC/C,UAAU,EAAE,6CAA6C;IACzD,OAAO,EAAE,yCAAyC;IAClD,SAAS,EAAE,mDAAmD;IAC9D,SAAS,EAAE,uCAAuC;IAClD,QAAQ,EAAE,gDAAgD;IAC1D,SAAS,EAAE,4CAA4C;IACvD,WAAW,EAAE,mDAAmD;CACjE,CAAC;AAEF,MAAM,CAAC,MAAM,eAAe,GAAG,GAAG,CAAC;AACnC,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAElC,MAAM,OAAO,SAAU,SAAQ,KAAK;IAClC,IAAI,CAAS;IACb,YAAY,IAAY,EAAE,OAAO,GAAG,EAAE;QACpC,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;CACF;AAGD,MAAM,SAAS,GAAY,CAAC,IAAI,EAAE,EAAE;IAClC,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,EAAE;QACjF,QAAQ,EAAE,MAAM;KACjB,CAAC,CAAC;IACH,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7E,CAAC,CAAC;AAEF,MAAM,UAAU,aAAa,CAC3B,QAAuB,EACvB,OAA0D,EAAE;IAE5D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC;IACtC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;IAC9E,IAAI,QAAQ,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;QACpC,IAAI,QAAQ,KAAK,cAAc,EAAE,CAAC;YAChC,IAAI,KAAK,CAAC,cAAc,CAAC,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;gBAC9D,GAAG,CACD,oEAAoE;oBAClE,sCAAsC,CACzC,CAAC;gBACF,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,SAAS,GAAG,KAAK,CAAC,QAAQ,CAAC,KAAK,IAAI,CAAC;IAC3C,MAAM,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,CAAC;IAC/C,MAAM,cAAc,GAAG,KAAK,CAAC,cAAc,CAAC,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC;IAEjF,IAAI,SAAS;QAAE,OAAO,QAAQ,CAAC;IAC/B,IAAI,WAAW;QAAE,OAAO,UAAU,CAAC;IACnC,IAAI,cAAc;QAAE,OAAO,cAAc,CAAC;IAE1C,GAAG,CACD,yEAAyE;QACvE,uEAAuE,CAC1E,CAAC;IACF,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,IAAY;IAC/C,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACnC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;QACf,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACjC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,WAAmB,iBAAiB,EAAE;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAChD,MAAM,OAAO,GAA4C,EAAE,CAAC;IAC5D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;QACvE,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,MAAM,IAAI,GAAG,EAAE;SACZ,WAAW,CAAC,SAAS,CAAC;SACtB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC;SACjE,IAAI,EAAE,CAAC;IACV,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC;QACtD,MAAM,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC9C,MAAM,IAAI,GAAG,oBAAoB,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;QAC/C,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,CAAC,IAAI,cAAc,IAAI,SAAS,CAAC;QACpE,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAOD,MAAM,OAAO,GAAU,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE;IACnC,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;QAC7C,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;QAC9B,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE,IAAI,CAAC,OAAO,GAAG,IAAI;QAC5B,GAAG,EAAE,IAAI,CAAC,GAAG;KACd,CAAC,CAAC;IACH,OAAO;QACL,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,EAAE;QAC3B,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,EAAE;QAC3B,QAAQ,EAAE,MAAM,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS;KACpE,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,UAAU,WAAW,CACzB,IAAY,EACZ,MAAc,EACd,OAAe,EACf,OAAO,GAAG,QAAQ,EAClB,OAA4D,EAAE;IAE9D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC;IACtC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,OAAO,CAAC;IAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,iBAAiB,EAAE,CAAC;IAEtD,IAAI,GAAa,CAAC;IAClB,IAAI,WAA0B,CAAC;IAC/B,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;QAC3B,GAAG,GAAG,CAAC,UAAU,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;QACtC,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;SAAM,IAAI,OAAO,KAAK,cAAc,EAAE,CAAC;QACtC,MAAM,MAAM,GAAG,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC;QAChE,GAAG,GAAG,CAAC,MAAM,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QACnE,WAAW,GAAG,IAAI,CAAC;IACrB,CAAC;SAAM,CAAC;QACN,GAAG,GAAG,CAAC,QAAQ,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAClD,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,KAAK,GAAkB,IAAI,CAAC;IAChC,IAAI,MAAM,GAAG,MAAM,CAAC;IAEpB,MAAM,MAAM,GAAG,GAAG,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAC;IACxE,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC;IAE7C,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,KAAK,GAAG,mBAAmB,OAAO,GAAG,CAAC;QACtC,MAAM,GAAG,MAAM,CAAC;IAClB,CAAC;SAAM,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,aAAa,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACjE,MAAM,aAAa,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACjE,MAAM,MAAM,GAAG,aAAa,IAAI,aAAa,IAAI,aAAa,CAAC;QAC/D,KAAK,GAAG,aAAa,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QAChD,MAAM,GAAG,MAAM,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;QAC/D,MAAM,eAAe,GAAG;YACtB,0CAA0C;YAC1C,aAAa;YACb,kBAAkB;YAClB,wBAAwB;SACzB,CAAC;QACF,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;YACtC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACjC,IAAI,CAAC,EAAE,CAAC;gBACN,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7F,KAAK,GAAG,8BAA8B,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBAC9D,MAAM,GAAG,MAAM,CAAC;gBAChB,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,EAAE,IAAI;QACX,MAAM;QACN,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,GAAG;QAC5C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,SAAS,CACvB,MAA+C,EAC/C,OAAe,EACf,SAAiB,EACjB,OAAO,GAAG,QAAQ,EAClB,OAA4D,EAAE;IAE9D,0EAA0E;IAC1E,+EAA+E;IAC/E,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACjE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC;IACvC,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;QAC7D,aAAa,EAAE,OAAO,CAAC,MAAM;QAC7B,MAAM;QACN,MAAM;QACN,OAAO;KACR,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,WAAW,CACzB,MAA+C,EAC/C,OAAO,GAAG,QAAQ,EAClB,aAAa,GAAG,eAAe;IAE/B,OAAO;QACL,IAAI,EAAE,SAAS;QACf,OAAO,EAAE,GAAG,OAAO,KAAK,aAAa,GAAG;QACxC,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;KAChE,CAAC;AACJ,CAAC;AAoBD,MAAM,UAAU,IAAI,CAAC,OAAiB,EAAE,EAAE,OAAwB,EAAE;IAClE,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;IAC9E,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;IAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,IAAI,CAAC,GAAG,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC9E,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC;IACnD,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE7B,MAAM,SAAS,GAAG,QAAQ,EAAE,CAAC;IAC7B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,GAAG,CAAC,8CAA8C,CAAC,CAAC;QACpD,OAAO,CAAC,CAAC;IACX,CAAC;IAED,IAAI,WAAW,GAAG,SAAS,CAAC;IAC5B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtD,GAAG,CAAC,yBAAyB,IAAI,CAAC,KAAK,oBAAoB,KAAK,EAAE,CAAC,CAAC;YACpE,OAAO,CAAC,CAAC;QACX,CAAC;QACD,WAAW,GAAG,OAAO,CAAC;IACxB,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IAC/D,IAAI,OAAe,CAAC;IACpB,IAAI,CAAC;QACH,OAAO,GAAG,MAAM,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;IACzD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,SAAS,EAAE,CAAC;YAC7B,OAAO,GAAG,CAAC,IAAI,CAAC;QAClB,CAAC;QACD,MAAM,GAAG,CAAC;IACZ,CAAC;IACD,MAAM,aAAa,GAAG,QAAQ,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,eAAe,CAAC;IAEpE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,WAAW,EAAE,OAAO,EAAE,aAAa,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/E,OAAO,CAAC,CAAC;IACX,CAAC;IAED,MAAM,OAAO,GAAG,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE;QAC3E,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,QAAQ,EAAE,IAAI,CAAC,QAAQ;KACxB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IACpC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACrC,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,IAAI,GAAa;QACrB,KAAK,EAAE,IAAI;QACX,OAAO,EAAE,KAAK;QACd,QAAQ,EAAE,gBAAgB;QAC1B,OAAO,EAAE,eAAe;QACxB,OAAO,EAAE,MAAM;KAChB,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,GAAG,KAAK,SAAS;YAAE,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;aACzC,IAAI,GAAG,KAAK,WAAW;YAAE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;aAC7C,IAAI,GAAG,KAAK,YAAY;YAAE,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;aAClE,IAAI,GAAG,KAAK,WAAW;YAAE,IAAI,CAAC,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;aAChE,IAAI,GAAG,KAAK,WAAW;YAAE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { loadFixture, pyRepr } from "./semanticFixtures.js";
|
|
2
|
+
import { pyJsonIndent } from "../core/pyjson.js";
|
|
3
|
+
function utcTimestamp() {
|
|
4
|
+
return new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
5
|
+
}
|
|
6
|
+
export function evaluateFixture(fixture, source = "<fixture>") {
|
|
7
|
+
const facts = [
|
|
8
|
+
...checkOutputFacts(fixture),
|
|
9
|
+
...checkToolTraceFacts(fixture),
|
|
10
|
+
...checkSeededArtifactFacts(fixture),
|
|
11
|
+
];
|
|
12
|
+
const failing = facts.find((fact) => fact.status === "fail") ?? null;
|
|
13
|
+
return {
|
|
14
|
+
fixture: source,
|
|
15
|
+
status: failing ? "fail" : "pass",
|
|
16
|
+
checked_facts: facts.map((f) => ({ fact: f.fact, status: f.status, detail: f.detail })),
|
|
17
|
+
failing_fact: failing ? { fact: failing.fact, status: failing.status, detail: failing.detail } : null,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
export function evaluateFixtureFile(path) {
|
|
21
|
+
const [fixture, errors] = loadFixture(path);
|
|
22
|
+
if (errors.length > 0) {
|
|
23
|
+
const failing = { fact: "fixture_contract", status: "fail", detail: errors.join("; ") };
|
|
24
|
+
return {
|
|
25
|
+
fixture: path,
|
|
26
|
+
status: "fail",
|
|
27
|
+
checked_facts: [failing],
|
|
28
|
+
failing_fact: failing,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
return evaluateFixture(fixture, path);
|
|
32
|
+
}
|
|
33
|
+
export function buildReport(results) {
|
|
34
|
+
const passed = results.filter((result) => result.status === "pass").length;
|
|
35
|
+
const failed = results.length - passed;
|
|
36
|
+
return {
|
|
37
|
+
timestamp: utcTimestamp(),
|
|
38
|
+
status: failed ? "fail" : "pass",
|
|
39
|
+
fixtures_tested: results.length,
|
|
40
|
+
passed,
|
|
41
|
+
failed,
|
|
42
|
+
results,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
function checkOutputFacts(fixture) {
|
|
46
|
+
const facts = [];
|
|
47
|
+
const expected = fixture.expectedFacts;
|
|
48
|
+
(expected.required_output ?? []).forEach((text, index) => {
|
|
49
|
+
const found = fixture.capturedOutput.includes(text);
|
|
50
|
+
facts.push({
|
|
51
|
+
fact: `required_output[${index}]`,
|
|
52
|
+
status: found ? "pass" : "fail",
|
|
53
|
+
detail: `captured output ${found ? "contains" : "does not contain"} ${pyRepr(text)}`,
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
(expected.forbidden_output ?? []).forEach((text, index) => {
|
|
57
|
+
const found = fixture.capturedOutput.includes(text);
|
|
58
|
+
facts.push({
|
|
59
|
+
fact: `forbidden_output[${index}]`,
|
|
60
|
+
status: found ? "fail" : "pass",
|
|
61
|
+
detail: `captured output ${found ? "contains forbidden" : "omits forbidden"} ${pyRepr(text)}`,
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
const writes = expected.artifact_expectations?.writes;
|
|
65
|
+
if (writes === "none") {
|
|
66
|
+
facts.push({
|
|
67
|
+
fact: "artifact_expectations.writes",
|
|
68
|
+
status: "pass",
|
|
69
|
+
detail: "fixture expects no artifact writes; offline eval performed none",
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
return facts;
|
|
73
|
+
}
|
|
74
|
+
function checkToolTraceFacts(fixture) {
|
|
75
|
+
const facts = [];
|
|
76
|
+
const expected = fixture.expectedFacts;
|
|
77
|
+
const callList = fixture.toolTrace.calls ?? [];
|
|
78
|
+
const calls = callList.join("\n");
|
|
79
|
+
(expected.required_tool_calls ?? []).forEach((text, index) => {
|
|
80
|
+
const found = calls.includes(text);
|
|
81
|
+
facts.push({
|
|
82
|
+
fact: `required_tool_calls[${index}]`,
|
|
83
|
+
status: found ? "pass" : "fail",
|
|
84
|
+
detail: `tool trace ${found ? "contains" : "does not contain"} ${pyRepr(text)}`,
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
(expected.forbidden_tool_calls ?? []).forEach((text, index) => {
|
|
88
|
+
const found = calls.includes(text);
|
|
89
|
+
facts.push({
|
|
90
|
+
fact: `forbidden_tool_calls[${index}]`,
|
|
91
|
+
status: found ? "fail" : "pass",
|
|
92
|
+
detail: `tool trace ${found ? "contains forbidden" : "omits forbidden"} ${pyRepr(text)}`,
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
for (const [text, expectedCount] of Object.entries(expected.tool_call_counts ?? {})) {
|
|
96
|
+
const actual = callList.filter((call) => call.includes(text)).length;
|
|
97
|
+
facts.push({
|
|
98
|
+
fact: `tool_call_counts[${text}]`,
|
|
99
|
+
status: actual === expectedCount ? "pass" : "fail",
|
|
100
|
+
detail: `tool trace contains ${actual} call(s) matching ${pyRepr(text)}; expected ${expectedCount}`,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
return facts;
|
|
104
|
+
}
|
|
105
|
+
function checkSeededArtifactFacts(fixture) {
|
|
106
|
+
const facts = [];
|
|
107
|
+
const byPath = {};
|
|
108
|
+
for (const item of fixture.seededState.files ?? []) {
|
|
109
|
+
if (item && typeof item === "object" && typeof item.path === "string") {
|
|
110
|
+
byPath[item.path] = item.content;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
(fixture.expectedFacts.required_artifacts ?? []).forEach((expected, index) => {
|
|
114
|
+
const factName = `required_artifacts[${index}]`;
|
|
115
|
+
if (!expected || typeof expected !== "object" || Array.isArray(expected)) {
|
|
116
|
+
facts.push({ fact: factName, status: "fail", detail: "expected artifact fact must be an object" });
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
const path = expected.path;
|
|
120
|
+
if (typeof path !== "string" || !path.trim()) {
|
|
121
|
+
facts.push({ fact: factName, status: "fail", detail: "expected artifact fact must name a path" });
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
const content = byPath[path];
|
|
125
|
+
if (content === undefined) {
|
|
126
|
+
facts.push({ fact: factName, status: "fail", detail: `seeded artifact ${pyRepr(path)} is missing` });
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
const missing = (expected.contains ?? []).filter((text) => !content.includes(text));
|
|
130
|
+
if (missing.length > 0) {
|
|
131
|
+
facts.push({ fact: factName, status: "fail", detail: `seeded artifact ${pyRepr(path)} lacks ${pyRepr(missing[0])}` });
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
facts.push({ fact: factName, status: "pass", detail: `seeded artifact ${pyRepr(path)} matched` });
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
return facts;
|
|
138
|
+
}
|
|
139
|
+
export function main(argv = [], out = (l) => process.stdout.write(l)) {
|
|
140
|
+
if (argv.length === 0) {
|
|
141
|
+
process.stderr.write("usage: semantic_eval <fixtures...>\n");
|
|
142
|
+
return 2;
|
|
143
|
+
}
|
|
144
|
+
const report = buildReport(argv.map((path) => evaluateFixtureFile(path)));
|
|
145
|
+
out(pyJsonIndent(report) + "\n");
|
|
146
|
+
return report.status === "pass" ? 0 : 1;
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=semanticEval.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semanticEval.js","sourceRoot":"","sources":["../../src/eval/semanticEval.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,WAAW,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC7E,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAejD,SAAS,YAAY;IACnB,OAAO,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,OAAwB,EAAE,MAAM,GAAG,WAAW;IAC5E,MAAM,KAAK,GAAG;QACZ,GAAG,gBAAgB,CAAC,OAAO,CAAC;QAC5B,GAAG,mBAAmB,CAAC,OAAO,CAAC;QAC/B,GAAG,wBAAwB,CAAC,OAAO,CAAC;KACrC,CAAC;IACF,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC;IACrE,OAAO;QACL,OAAO,EAAE,MAAM;QACf,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;QACjC,aAAa,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;QACvF,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,IAAI;KACtG,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,OAAO,GAAgB,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACrG,OAAO;YACL,OAAO,EAAE,IAAI;YACb,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,CAAC,OAAO,CAAC;YACxB,YAAY,EAAE,OAAO;SACtB,CAAC;IACJ,CAAC;IACD,OAAO,eAAe,CAAC,OAAQ,EAAE,IAAI,CAAC,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAC3E,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC;IACvC,OAAO;QACL,SAAS,EAAE,YAAY,EAAE;QACzB,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;QAChC,eAAe,EAAE,OAAO,CAAC,MAAM;QAC/B,MAAM;QACN,MAAM;QACN,OAAO;KACR,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAwB;IAChD,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,MAAM,QAAQ,GAAG,OAAO,CAAC,aAAa,CAAC;IACvC,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAY,EAAE,KAAa,EAAE,EAAE;QACvE,MAAM,KAAK,GAAG,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACpD,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,mBAAmB,KAAK,GAAG;YACjC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAC/B,MAAM,EAAE,mBAAmB,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,kBAAkB,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE;SACrF,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,CAAC,QAAQ,CAAC,gBAAgB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAY,EAAE,KAAa,EAAE,EAAE;QACxE,MAAM,KAAK,GAAG,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACpD,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,oBAAoB,KAAK,GAAG;YAClC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAC/B,MAAM,EAAE,mBAAmB,KAAK,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,iBAAiB,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE;SAC9F,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,QAAQ,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACtD,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,8BAA8B;YACpC,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,iEAAiE;SAC1E,CAAC,CAAC;IACL,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAwB;IACnD,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,MAAM,QAAQ,GAAG,OAAO,CAAC,aAAa,CAAC;IACvC,MAAM,QAAQ,GAAa,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE,CAAC;IACzD,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC,QAAQ,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAY,EAAE,KAAa,EAAE,EAAE;QAC3E,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACnC,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,uBAAuB,KAAK,GAAG;YACrC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAC/B,MAAM,EAAE,cAAc,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,kBAAkB,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE;SAChF,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,CAAC,QAAQ,CAAC,oBAAoB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAY,EAAE,KAAa,EAAE,EAAE;QAC5E,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACnC,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,wBAAwB,KAAK,GAAG;YACtC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAC/B,MAAM,EAAE,cAAc,KAAK,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,iBAAiB,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE;SACzF,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,KAAK,MAAM,CAAC,IAAI,EAAE,aAAa,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE,CAAC;QACpF,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;QACrE,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,oBAAoB,IAAI,GAAG;YACjC,MAAM,EAAE,MAAM,KAAK,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YAClD,MAAM,EAAE,uBAAuB,MAAM,qBAAqB,MAAM,CAAC,IAAI,CAAC,cAAc,aAAa,EAAE;SACpG,CAAC,CAAC;IACL,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,wBAAwB,CAAC,OAAwB;IACxD,MAAM,KAAK,GAAkB,EAAE,CAAC;IAChC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;QACnD,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC;QACnC,CAAC;IACH,CAAC;IAED,CAAC,OAAO,CAAC,aAAa,CAAC,kBAAkB,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,QAAa,EAAE,KAAa,EAAE,EAAE;QACxF,MAAM,QAAQ,GAAG,sBAAsB,KAAK,GAAG,CAAC;QAChD,IAAI,CAAC,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YACzE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,0CAA0C,EAAE,CAAC,CAAC;YACnG,OAAO;QACT,CAAC;QACD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;YAC7C,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,yCAAyC,EAAE,CAAC,CAAC;YAClG,OAAO;QACT,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;QAC7B,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,MAAM,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;YACrG,OAAO;QACT,CAAC;QACD,MAAM,OAAO,GAAG,CAAC,QAAQ,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QAC5F,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,MAAM,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACxH,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,mBAAmB,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QACpG,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,IAAI,CAAC,OAAiB,EAAE,EAAE,MAA8B,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;IACpG,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC7D,OAAO,CAAC,CAAC;IACX,CAAC;IACD,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC1E,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC;IACjC,OAAO,MAAM,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
export const REQUIRED_SECTIONS = [
|
|
3
|
+
"Prompt",
|
|
4
|
+
"Seeded Project State",
|
|
5
|
+
"Captured Output",
|
|
6
|
+
"Expected Facts",
|
|
7
|
+
];
|
|
8
|
+
function nonEmptyString(value) {
|
|
9
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
10
|
+
}
|
|
11
|
+
export function loadFixture(path) {
|
|
12
|
+
return validateFixtureText(fs.readFileSync(path, "utf8"));
|
|
13
|
+
}
|
|
14
|
+
export function validateFixtureText(text) {
|
|
15
|
+
const sections = parseSections(text);
|
|
16
|
+
const errors = [];
|
|
17
|
+
for (const section of REQUIRED_SECTIONS) {
|
|
18
|
+
if (!(section in sections)) {
|
|
19
|
+
errors.push(`missing section: ${section}`);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const prompt = (sections.Prompt ?? "").trim();
|
|
23
|
+
if ("Prompt" in sections && !prompt) {
|
|
24
|
+
errors.push("malformed section: Prompt: must be non-empty");
|
|
25
|
+
}
|
|
26
|
+
let seededState = {};
|
|
27
|
+
if ("Seeded Project State" in sections) {
|
|
28
|
+
const [state, stateErrors] = validateSeededState(sections["Seeded Project State"]);
|
|
29
|
+
seededState = state;
|
|
30
|
+
errors.push(...stateErrors);
|
|
31
|
+
}
|
|
32
|
+
const capturedOutput = (sections["Captured Output"] ?? "").trim();
|
|
33
|
+
if ("Captured Output" in sections && !capturedOutput) {
|
|
34
|
+
errors.push("malformed section: Captured Output: must be non-empty");
|
|
35
|
+
}
|
|
36
|
+
let toolTrace = { calls: [] };
|
|
37
|
+
if ("Tool Trace" in sections) {
|
|
38
|
+
const [trace, traceErrors] = validateToolTrace(sections["Tool Trace"]);
|
|
39
|
+
toolTrace = trace;
|
|
40
|
+
errors.push(...traceErrors);
|
|
41
|
+
}
|
|
42
|
+
let expectedFacts = {};
|
|
43
|
+
if ("Expected Facts" in sections) {
|
|
44
|
+
const [facts, factErrors] = validateExpectedFacts(sections["Expected Facts"]);
|
|
45
|
+
expectedFacts = facts;
|
|
46
|
+
errors.push(...factErrors);
|
|
47
|
+
}
|
|
48
|
+
if (errors.length > 0) {
|
|
49
|
+
return [null, errors];
|
|
50
|
+
}
|
|
51
|
+
return [{ prompt, seededState, capturedOutput, toolTrace, expectedFacts }, []];
|
|
52
|
+
}
|
|
53
|
+
function parseSections(text) {
|
|
54
|
+
const sections = {};
|
|
55
|
+
let current = null;
|
|
56
|
+
for (const line of text.split(/\r\n|\r|\n/)) {
|
|
57
|
+
const match = /^##\s+(.+?)\s*$/.exec(line);
|
|
58
|
+
if (match) {
|
|
59
|
+
current = match[1];
|
|
60
|
+
if (!(current in sections)) {
|
|
61
|
+
sections[current] = [];
|
|
62
|
+
}
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (current !== null) {
|
|
66
|
+
sections[current].push(line);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const out = {};
|
|
70
|
+
for (const [name, lines] of Object.entries(sections)) {
|
|
71
|
+
out[name] = lines.join("\n").trim();
|
|
72
|
+
}
|
|
73
|
+
return out;
|
|
74
|
+
}
|
|
75
|
+
function validateSeededState(sectionText) {
|
|
76
|
+
const [data, errors] = loadJsonSection("Seeded Project State", sectionText);
|
|
77
|
+
if (errors.length > 0) {
|
|
78
|
+
return [{}, errors];
|
|
79
|
+
}
|
|
80
|
+
if (data === null || typeof data !== "object" || Array.isArray(data)) {
|
|
81
|
+
return [{}, ["malformed section: Seeded Project State: JSON must be an object"]];
|
|
82
|
+
}
|
|
83
|
+
const files = data.files;
|
|
84
|
+
if (!Array.isArray(files)) {
|
|
85
|
+
return [{}, ["malformed section: Seeded Project State: files must be a list"]];
|
|
86
|
+
}
|
|
87
|
+
for (let index = 0; index < files.length; index++) {
|
|
88
|
+
const item = files[index];
|
|
89
|
+
if (!item || typeof item !== "object" || Array.isArray(item)) {
|
|
90
|
+
return [{}, [`malformed section: Seeded Project State: files[${index}] must be an object`]];
|
|
91
|
+
}
|
|
92
|
+
if (!nonEmptyString(item.path)) {
|
|
93
|
+
return [{}, [`malformed section: Seeded Project State: files[${index}].path must be non-empty`]];
|
|
94
|
+
}
|
|
95
|
+
if (typeof item.content !== "string") {
|
|
96
|
+
return [{}, [`malformed section: Seeded Project State: files[${index}].content must be a string`]];
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return [data, []];
|
|
100
|
+
}
|
|
101
|
+
function validateExpectedFacts(sectionText) {
|
|
102
|
+
const [data, jsonErrors] = loadJsonSection("Expected Facts", sectionText);
|
|
103
|
+
if (jsonErrors.length > 0) {
|
|
104
|
+
return [{}, jsonErrors];
|
|
105
|
+
}
|
|
106
|
+
if (data === null || typeof data !== "object" || Array.isArray(data)) {
|
|
107
|
+
return [{}, ["malformed section: Expected Facts: JSON must be an object"]];
|
|
108
|
+
}
|
|
109
|
+
const d = data;
|
|
110
|
+
const errors = [];
|
|
111
|
+
errors.push(...validateStringList(d, "required_output", false));
|
|
112
|
+
errors.push(...validateStringList(d, "forbidden_output", false));
|
|
113
|
+
errors.push(...validateStringList(d, "required_tool_calls", false));
|
|
114
|
+
errors.push(...validateStringList(d, "forbidden_tool_calls", false));
|
|
115
|
+
errors.push(...validateToolCallCounts(d.tool_call_counts));
|
|
116
|
+
const hasOutputFact = Boolean(d.required_output?.length || d.forbidden_output?.length);
|
|
117
|
+
const hasToolFact = Boolean(d.required_tool_calls?.length ||
|
|
118
|
+
d.forbidden_tool_calls?.length ||
|
|
119
|
+
(d.tool_call_counts && Object.keys(d.tool_call_counts).length));
|
|
120
|
+
const hasArtifactFact = "artifact_expectations" in d;
|
|
121
|
+
if (!hasOutputFact && !hasToolFact && !hasArtifactFact) {
|
|
122
|
+
errors.push("malformed section: Expected Facts: must declare at least one expected fact");
|
|
123
|
+
}
|
|
124
|
+
if (hasArtifactFact) {
|
|
125
|
+
errors.push(...validateArtifactExpectations(d.artifact_expectations));
|
|
126
|
+
}
|
|
127
|
+
return [d, errors];
|
|
128
|
+
}
|
|
129
|
+
function validateToolTrace(sectionText) {
|
|
130
|
+
const [data, errors] = loadJsonSection("Tool Trace", sectionText);
|
|
131
|
+
if (errors.length > 0) {
|
|
132
|
+
return [{}, errors];
|
|
133
|
+
}
|
|
134
|
+
if (data === null || typeof data !== "object" || Array.isArray(data)) {
|
|
135
|
+
return [{}, ["malformed section: Tool Trace: JSON must be an object"]];
|
|
136
|
+
}
|
|
137
|
+
const calls = data.calls;
|
|
138
|
+
if (!Array.isArray(calls) || !calls.every((item) => nonEmptyString(item))) {
|
|
139
|
+
return [{}, ["malformed section: Tool Trace: calls must be non-empty strings"]];
|
|
140
|
+
}
|
|
141
|
+
return [data, []];
|
|
142
|
+
}
|
|
143
|
+
function validateArtifactExpectations(value) {
|
|
144
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
145
|
+
return ["malformed section: Expected Facts: artifact_expectations must be an object"];
|
|
146
|
+
}
|
|
147
|
+
const writes = value.writes;
|
|
148
|
+
if (writes === "none") {
|
|
149
|
+
return [];
|
|
150
|
+
}
|
|
151
|
+
if (!Array.isArray(writes)) {
|
|
152
|
+
return ["malformed section: Expected Facts: artifact_expectations.writes must be 'none' or a list"];
|
|
153
|
+
}
|
|
154
|
+
for (let index = 0; index < writes.length; index++) {
|
|
155
|
+
const item = writes[index];
|
|
156
|
+
if (!item || typeof item !== "object" || Array.isArray(item)) {
|
|
157
|
+
return [`malformed section: Expected Facts: artifact_expectations.writes[${index}] must be an object`];
|
|
158
|
+
}
|
|
159
|
+
if (!nonEmptyString(item.path)) {
|
|
160
|
+
return [`malformed section: Expected Facts: artifact_expectations.writes[${index}].path must be non-empty`];
|
|
161
|
+
}
|
|
162
|
+
if ("contains" in item) {
|
|
163
|
+
const contains = item.contains;
|
|
164
|
+
if (!Array.isArray(contains) || !contains.every((s) => nonEmptyString(s))) {
|
|
165
|
+
return [
|
|
166
|
+
`malformed section: Expected Facts: artifact_expectations.writes[${index}].contains must be non-empty strings`,
|
|
167
|
+
];
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return [];
|
|
172
|
+
}
|
|
173
|
+
function validateToolCallCounts(value) {
|
|
174
|
+
if (value === undefined || value === null) {
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
if (typeof value !== "object" || Array.isArray(value)) {
|
|
178
|
+
return ["malformed section: Expected Facts: tool_call_counts must be an object"];
|
|
179
|
+
}
|
|
180
|
+
for (const [key, count] of Object.entries(value)) {
|
|
181
|
+
if (!nonEmptyString(key)) {
|
|
182
|
+
return ["malformed section: Expected Facts: tool_call_counts keys must be non-empty strings"];
|
|
183
|
+
}
|
|
184
|
+
if (typeof count !== "number" || !Number.isInteger(count) || count < 0) {
|
|
185
|
+
return [`malformed section: Expected Facts: tool_call_counts[${pyRepr(key)}] must be a non-negative integer`];
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return [];
|
|
189
|
+
}
|
|
190
|
+
function validateStringList(data, key, required) {
|
|
191
|
+
if (!(key in data)) {
|
|
192
|
+
return required ? [`malformed section: Expected Facts: ${key} is required`] : [];
|
|
193
|
+
}
|
|
194
|
+
const value = data[key];
|
|
195
|
+
if (!Array.isArray(value) || !value.every((item) => nonEmptyString(item))) {
|
|
196
|
+
return [`malformed section: Expected Facts: ${key} must be non-empty strings`];
|
|
197
|
+
}
|
|
198
|
+
return [];
|
|
199
|
+
}
|
|
200
|
+
function loadJsonSection(sectionName, text) {
|
|
201
|
+
const block = extractJsonBlock(text);
|
|
202
|
+
if (block === null) {
|
|
203
|
+
return [null, [`malformed section: ${sectionName}: expected fenced json block`]];
|
|
204
|
+
}
|
|
205
|
+
try {
|
|
206
|
+
return [JSON.parse(block), []];
|
|
207
|
+
}
|
|
208
|
+
catch {
|
|
209
|
+
// Best-effort line number; the offline-eval path always feeds valid JSON.
|
|
210
|
+
return [null, [`malformed section: ${sectionName}: invalid JSON`]];
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
function extractJsonBlock(text) {
|
|
214
|
+
const match = /```json\s*\n([\s\S]*?)\n```/.exec(text);
|
|
215
|
+
if (!match) {
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
return match[1];
|
|
219
|
+
}
|
|
220
|
+
/** Mirror Python repr() for strings used in diagnostics. */
|
|
221
|
+
export function pyRepr(value) {
|
|
222
|
+
if (value.includes("'") && !value.includes('"')) {
|
|
223
|
+
return `"${value}"`;
|
|
224
|
+
}
|
|
225
|
+
return "'" + value.replace(/\\/g, "\\\\").replace(/'/g, "\\'") + "'";
|
|
226
|
+
}
|
|
227
|
+
//# sourceMappingURL=semanticFixtures.js.map
|