trace-to-skill 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +456 -0
- package/dist/src/agentsLint.d.ts +15 -0
- package/dist/src/agentsLint.js +156 -0
- package/dist/src/agentsLint.js.map +1 -0
- package/dist/src/analyze.d.ts +3 -0
- package/dist/src/analyze.js +53 -0
- package/dist/src/analyze.js.map +1 -0
- package/dist/src/benchmark.d.ts +27 -0
- package/dist/src/benchmark.js +109 -0
- package/dist/src/benchmark.js.map +1 -0
- package/dist/src/cli.d.ts +2 -0
- package/dist/src/cli.js +281 -0
- package/dist/src/cli.js.map +1 -0
- package/dist/src/doctor.d.ts +18 -0
- package/dist/src/doctor.js +300 -0
- package/dist/src/doctor.js.map +1 -0
- package/dist/src/eval.d.ts +19 -0
- package/dist/src/eval.js +48 -0
- package/dist/src/eval.js.map +1 -0
- package/dist/src/github.d.ts +11 -0
- package/dist/src/github.js +66 -0
- package/dist/src/github.js.map +1 -0
- package/dist/src/githubContext.d.ts +6 -0
- package/dist/src/githubContext.js +60 -0
- package/dist/src/githubContext.js.map +1 -0
- package/dist/src/index.d.ts +11 -0
- package/dist/src/index.js +11 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/init.d.ts +16 -0
- package/dist/src/init.js +186 -0
- package/dist/src/init.js.map +1 -0
- package/dist/src/parsers.d.ts +2 -0
- package/dist/src/parsers.js +138 -0
- package/dist/src/parsers.js.map +1 -0
- package/dist/src/report.d.ts +11 -0
- package/dist/src/report.js +273 -0
- package/dist/src/report.js.map +1 -0
- package/dist/src/rules.d.ts +2 -0
- package/dist/src/rules.js +400 -0
- package/dist/src/rules.js.map +1 -0
- package/dist/src/scorecard.d.ts +25 -0
- package/dist/src/scorecard.js +75 -0
- package/dist/src/scorecard.js.map +1 -0
- package/dist/src/types.d.ts +31 -0
- package/dist/src/types.js +2 -0
- package/dist/src/types.js.map +1 -0
- package/docs/ADOPTION_GUIDE.md +97 -0
- package/docs/AGENTS_LINT.md +30 -0
- package/docs/BENCHMARK.md +21 -0
- package/docs/FAILURE_TAXONOMY.md +57 -0
- package/docs/SCORECARD.md +51 -0
- package/examples/codex-failed-run.md +17 -0
- package/fixtures/codex-session.jsonl +4 -0
- package/fixtures/failed-run.md +28 -0
- package/fixtures/github-pr-event.json +6 -0
- package/fixtures/github-prompt-injection-event.json +9 -0
- package/fixtures/instruction-drift/AGENTS.md +5 -0
- package/fixtures/instruction-drift/CLAUDE.md +6 -0
- package/fixtures/mcp-risk.json +22 -0
- package/fixtures/prompt-injection.md +7 -0
- package/fixtures/safe-run.md +12 -0
- package/package.json +55 -0
- package/schemas/agents-lint-result.schema.json +67 -0
- package/schemas/analysis-result.schema.json +134 -0
- package/schemas/doctor-result.schema.json +81 -0
- package/schemas/scorecard-result.schema.json +102 -0
- package/skills/codex-readiness-auditor/SKILL.md +61 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { promises as fs } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { doctorRepo } from "./doctor.js";
|
|
4
|
+
const SKIP_DIRS = new Set([".git", "node_modules", "dist", "coverage", ".next", "build"]);
|
|
5
|
+
export async function lintAgents(target = process.cwd()) {
|
|
6
|
+
const root = path.resolve(target);
|
|
7
|
+
const files = await listFiles(root);
|
|
8
|
+
const lintableFiles = files.filter((file) => !isEvidenceArchive(file));
|
|
9
|
+
const instructionFiles = lintableFiles.filter((file) => isInstructionFile(file));
|
|
10
|
+
const mcpConfigs = lintableFiles.filter((file) => isMcpConfigCandidate(file));
|
|
11
|
+
const doctor = await doctorRepo(root);
|
|
12
|
+
const checks = doctor.checks.filter((check) => check.id === "agent-instructions" || check.id === "validation");
|
|
13
|
+
const findings = doctor.findings.filter((finding) => finding.kind === "ignored_instruction" ||
|
|
14
|
+
finding.kind === "mcp_risk" ||
|
|
15
|
+
finding.kind === "secret_exposure" ||
|
|
16
|
+
finding.kind === "hidden_unicode" ||
|
|
17
|
+
finding.kind === "prompt_injection");
|
|
18
|
+
const score = calculateAgentsLintScore(checks, findings);
|
|
19
|
+
const status = statusFrom(score, checks, findings);
|
|
20
|
+
return {
|
|
21
|
+
generatedAt: new Date().toISOString(),
|
|
22
|
+
root,
|
|
23
|
+
status,
|
|
24
|
+
score,
|
|
25
|
+
instructionFiles,
|
|
26
|
+
mcpConfigs,
|
|
27
|
+
checks,
|
|
28
|
+
findings,
|
|
29
|
+
summary: summarizeAgentsLint(status, instructionFiles, mcpConfigs, findings)
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
export function renderAgentsLintMarkdown(result) {
|
|
33
|
+
const lines = [
|
|
34
|
+
"# AGENTS.md Lint Report",
|
|
35
|
+
"",
|
|
36
|
+
`Status: **${result.status}**`,
|
|
37
|
+
`Score: **${result.score}/100**`,
|
|
38
|
+
"",
|
|
39
|
+
result.summary,
|
|
40
|
+
"",
|
|
41
|
+
`Repository: \`${result.root}\``,
|
|
42
|
+
`Generated: ${result.generatedAt}`,
|
|
43
|
+
"",
|
|
44
|
+
"## Instruction Files",
|
|
45
|
+
""
|
|
46
|
+
];
|
|
47
|
+
if (result.instructionFiles.length > 0) {
|
|
48
|
+
result.instructionFiles.forEach((file) => lines.push(`- \`${file}\``));
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
lines.push("No AGENTS.md, CLAUDE.md, GEMINI.md, Cursor rules, or Copilot instruction files found.");
|
|
52
|
+
}
|
|
53
|
+
lines.push("", "## MCP Configs", "");
|
|
54
|
+
if (result.mcpConfigs.length > 0) {
|
|
55
|
+
result.mcpConfigs.forEach((file) => lines.push(`- \`${file}\``));
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
lines.push("No MCP config files detected.");
|
|
59
|
+
}
|
|
60
|
+
lines.push("", "## Checks", "");
|
|
61
|
+
result.checks.forEach((check) => {
|
|
62
|
+
lines.push(`- **${check.status.toUpperCase()}** ${check.title}: ${check.detail}`);
|
|
63
|
+
if (check.recommendation) {
|
|
64
|
+
lines.push(` Recommendation: ${check.recommendation}`);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
lines.push("", "## Findings", "");
|
|
68
|
+
if (result.findings.length === 0) {
|
|
69
|
+
lines.push("No instruction or MCP findings detected.");
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
result.findings.forEach((finding) => {
|
|
73
|
+
const firstEvidence = finding.evidence[0];
|
|
74
|
+
const evidence = firstEvidence ? ` Evidence: \`${firstEvidence.file}:${firstEvidence.line}\`.` : "";
|
|
75
|
+
lines.push(`- **${finding.severity}** ${finding.title}.${evidence}`);
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
lines.push("", "## Suggested Next Step", "", result.status === "pass"
|
|
79
|
+
? "Keep AGENTS.md as the canonical maintainer-controlled instruction file, and make tool-specific files reference it."
|
|
80
|
+
: "Fix failed checks or high-risk findings before letting Codex act broadly on this repository.");
|
|
81
|
+
return `${lines.join("\n")}\n`;
|
|
82
|
+
}
|
|
83
|
+
async function listFiles(root) {
|
|
84
|
+
const files = [];
|
|
85
|
+
async function visit(dir) {
|
|
86
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
87
|
+
for (const entry of entries) {
|
|
88
|
+
const fullPath = path.join(dir, entry.name);
|
|
89
|
+
const relative = path.relative(root, fullPath).split(path.sep).join("/");
|
|
90
|
+
if (entry.isDirectory()) {
|
|
91
|
+
if (!SKIP_DIRS.has(entry.name)) {
|
|
92
|
+
await visit(fullPath);
|
|
93
|
+
}
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
if (entry.isFile()) {
|
|
97
|
+
files.push(relative);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
await visit(root);
|
|
102
|
+
return files.sort((a, b) => a.localeCompare(b));
|
|
103
|
+
}
|
|
104
|
+
function calculateAgentsLintScore(checks, findings) {
|
|
105
|
+
const checkPenalty = checks.reduce((total, check) => {
|
|
106
|
+
if (check.status === "fail") {
|
|
107
|
+
return total + 30;
|
|
108
|
+
}
|
|
109
|
+
if (check.status === "warn") {
|
|
110
|
+
return total + 10;
|
|
111
|
+
}
|
|
112
|
+
return total;
|
|
113
|
+
}, 0);
|
|
114
|
+
const findingPenalty = findings.reduce((total, finding) => {
|
|
115
|
+
const value = {
|
|
116
|
+
low: 5,
|
|
117
|
+
medium: 10,
|
|
118
|
+
high: 25,
|
|
119
|
+
critical: 35
|
|
120
|
+
}[finding.severity];
|
|
121
|
+
return total + value;
|
|
122
|
+
}, 0);
|
|
123
|
+
return Math.max(0, Math.min(100, 100 - checkPenalty - findingPenalty));
|
|
124
|
+
}
|
|
125
|
+
function statusFrom(checksScore, checks, findings) {
|
|
126
|
+
if (checks.some((check) => check.status === "fail") || findings.some((finding) => finding.severity === "critical" || finding.severity === "high")) {
|
|
127
|
+
return "fail";
|
|
128
|
+
}
|
|
129
|
+
if (checksScore < 90 || checks.some((check) => check.status === "warn") || findings.length > 0) {
|
|
130
|
+
return "warn";
|
|
131
|
+
}
|
|
132
|
+
return "pass";
|
|
133
|
+
}
|
|
134
|
+
function summarizeAgentsLint(status, instructionFiles, mcpConfigs, findings) {
|
|
135
|
+
if (status === "pass") {
|
|
136
|
+
return "Agent instructions look consistent and ready for Codex use.";
|
|
137
|
+
}
|
|
138
|
+
if (status === "fail") {
|
|
139
|
+
return "Agent instructions or MCP configuration need fixes before broad Codex automation.";
|
|
140
|
+
}
|
|
141
|
+
const missingAgents = instructionFiles.includes("AGENTS.md") ? "" : " Add AGENTS.md as the shared source of truth.";
|
|
142
|
+
const mcpNote = mcpConfigs.length > 0 && findings.length === 0 ? " MCP configs were detected; keep their trust boundaries documented." : "";
|
|
143
|
+
return `Agent instructions are usable but need tightening.${missingAgents}${mcpNote}`.trim();
|
|
144
|
+
}
|
|
145
|
+
function isInstructionFile(file) {
|
|
146
|
+
return /(^|\/)(AGENTS|CLAUDE|GEMINI|COPILOT|copilot-instructions)\.md$/i.test(file) ||
|
|
147
|
+
/^\.cursor\/rules\/.+/i.test(file);
|
|
148
|
+
}
|
|
149
|
+
function isMcpConfigCandidate(file) {
|
|
150
|
+
return /(^|\/)(mcp|\.mcp|mcp-config|model-context)\.(json|jsonc)$/i.test(file) ||
|
|
151
|
+
/(^|\/)\.cursor\/mcp\.json$/i.test(file);
|
|
152
|
+
}
|
|
153
|
+
function isEvidenceArchive(file) {
|
|
154
|
+
return /^(fixtures|examples|runs)\//i.test(file);
|
|
155
|
+
}
|
|
156
|
+
//# sourceMappingURL=agentsLint.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agentsLint.js","sourceRoot":"","sources":["../../src/agentsLint.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,UAAU,EAAoB,MAAM,aAAa,CAAC;AAe3D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;AAE1F,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,MAAM,GAAG,OAAO,CAAC,GAAG,EAAE;IACrD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;IACvE,MAAM,gBAAgB,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;IACjF,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC,CAAC;IAC9E,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,KAAK,oBAAoB,IAAI,KAAK,CAAC,EAAE,KAAK,YAAY,CAAC,CAAC;IAC/G,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAClD,OAAO,CAAC,IAAI,KAAK,qBAAqB;QACtC,OAAO,CAAC,IAAI,KAAK,UAAU;QAC3B,OAAO,CAAC,IAAI,KAAK,iBAAiB;QAClC,OAAO,CAAC,IAAI,KAAK,gBAAgB;QACjC,OAAO,CAAC,IAAI,KAAK,kBAAkB,CACpC,CAAC;IACF,MAAM,KAAK,GAAG,wBAAwB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IACzD,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAEnD,OAAO;QACL,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,IAAI;QACJ,MAAM;QACN,KAAK;QACL,gBAAgB;QAChB,UAAU;QACV,MAAM;QACN,QAAQ;QACR,OAAO,EAAE,mBAAmB,CAAC,MAAM,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,CAAC;KAC7E,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,MAAwB;IAC/D,MAAM,KAAK,GAAG;QACZ,yBAAyB;QACzB,EAAE;QACF,aAAa,MAAM,CAAC,MAAM,IAAI;QAC9B,YAAY,MAAM,CAAC,KAAK,QAAQ;QAChC,EAAE;QACF,MAAM,CAAC,OAAO;QACd,EAAE;QACF,iBAAiB,MAAM,CAAC,IAAI,IAAI;QAChC,cAAc,MAAM,CAAC,WAAW,EAAE;QAClC,EAAE;QACF,sBAAsB;QACtB,EAAE;KACH,CAAC;IAEF,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC;IACzE,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,uFAAuF,CAAC,CAAC;IACtG,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,gBAAgB,EAAE,EAAE,CAAC,CAAC;IACrC,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACjC,MAAM,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC;IACnE,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,WAAW,EAAE,EAAE,CAAC,CAAC;IAChC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,KAAK,CAAC,IAAI,CAAC,OAAO,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,KAAK,CAAC,KAAK,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAClF,IAAI,KAAK,CAAC,cAAc,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,qBAAqB,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,aAAa,EAAE,EAAE,CAAC,CAAC;IAClC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;IACzD,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAClC,MAAM,aAAa,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC1C,MAAM,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,gBAAgB,aAAa,CAAC,IAAI,IAAI,aAAa,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACpG,KAAK,CAAC,IAAI,CAAC,OAAO,OAAO,CAAC,QAAQ,MAAM,OAAO,CAAC,KAAK,IAAI,QAAQ,EAAE,CAAC,CAAC;QACvE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CACR,EAAE,EACF,wBAAwB,EACxB,EAAE,EACF,MAAM,CAAC,MAAM,KAAK,MAAM;QACtB,CAAC,CAAC,oHAAoH;QACtH,CAAC,CAAC,8FAA8F,CACnG,CAAC;IAEF,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AACjC,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,UAAU,KAAK,CAAC,GAAW;QAC9B,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzE,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC/B,MAAM,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACxB,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;gBACnB,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,wBAAwB,CAAC,MAAqB,EAAE,QAAmB;IAC1E,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAClD,IAAI,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC5B,OAAO,KAAK,GAAG,EAAE,CAAC;QACpB,CAAC;QACD,IAAI,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC5B,OAAO,KAAK,GAAG,EAAE,CAAC;QACpB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,EAAE,CAAC,CAAC,CAAC;IACN,MAAM,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;QACxD,MAAM,KAAK,GAAG;YACZ,GAAG,EAAE,CAAC;YACN,MAAM,EAAE,EAAE;YACV,IAAI,EAAE,EAAE;YACR,QAAQ,EAAE,EAAE;SACb,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpB,OAAO,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC,EAAE,CAAC,CAAC,CAAC;IAEN,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,GAAG,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC;AACzE,CAAC;AAED,SAAS,UAAU,CAAC,WAAmB,EAAE,MAAqB,EAAE,QAAmB;IACjF,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,KAAK,UAAU,IAAI,OAAO,CAAC,QAAQ,KAAK,MAAM,CAAC,EAAE,CAAC;QAClJ,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,IAAI,WAAW,GAAG,EAAE,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/F,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,MAAkC,EAAE,gBAA0B,EAAE,UAAoB,EAAE,QAAmB;IACpI,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;QACtB,OAAO,6DAA6D,CAAC;IACvE,CAAC;IAED,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;QACtB,OAAO,mFAAmF,CAAC;IAC7F,CAAC;IAED,MAAM,aAAa,GAAG,gBAAgB,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,+CAA+C,CAAC;IACpH,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,qEAAqE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5I,OAAO,qDAAqD,aAAa,GAAG,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AAC/F,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,iEAAiE,CAAC,IAAI,CAAC,IAAI,CAAC;QACjF,uBAAuB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACvC,CAAC;AAED,SAAS,oBAAoB,CAAC,IAAY;IACxC,OAAO,4DAA4D,CAAC,IAAI,CAAC,IAAI,CAAC;QAC5E,6BAA6B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,8BAA8B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { AnalysisResult, AnalyzeOptions, TraceInput } from "./types.js";
|
|
2
|
+
export declare function analyzeTargets(targets: string[], options?: AnalyzeOptions): Promise<AnalysisResult>;
|
|
3
|
+
export declare function analyzeInputs(inputs: TraceInput[], options?: AnalyzeOptions): AnalysisResult;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { loadTraceInputs } from "./parsers.js";
|
|
2
|
+
import { collectFindings } from "./rules.js";
|
|
3
|
+
export async function analyzeTargets(targets, options = {}) {
|
|
4
|
+
const inputs = await loadTraceInputs(targets);
|
|
5
|
+
return analyzeInputs(inputs, options);
|
|
6
|
+
}
|
|
7
|
+
export function analyzeInputs(inputs, options = {}) {
|
|
8
|
+
const findings = collectFindings(inputs, options.maxFilesChanged);
|
|
9
|
+
const score = calculateScore(findings);
|
|
10
|
+
return {
|
|
11
|
+
generatedAt: new Date().toISOString(),
|
|
12
|
+
inputs: inputs.map((input) => input.path),
|
|
13
|
+
score,
|
|
14
|
+
summary: summarize(score, findings),
|
|
15
|
+
findings,
|
|
16
|
+
recommendations: buildRecommendations(findings)
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
function calculateScore(findings) {
|
|
20
|
+
const penalty = findings.reduce((total, finding) => {
|
|
21
|
+
const value = {
|
|
22
|
+
low: 4,
|
|
23
|
+
medium: 9,
|
|
24
|
+
high: 16,
|
|
25
|
+
critical: 25
|
|
26
|
+
}[finding.severity];
|
|
27
|
+
return total + value;
|
|
28
|
+
}, 0);
|
|
29
|
+
return Math.max(0, Math.min(100, 100 - penalty));
|
|
30
|
+
}
|
|
31
|
+
function summarize(score, findings) {
|
|
32
|
+
if (findings.length === 0) {
|
|
33
|
+
return "No agent workflow risks detected in the provided traces.";
|
|
34
|
+
}
|
|
35
|
+
if (findings.some((finding) => finding.severity === "critical")) {
|
|
36
|
+
return "Agent workflow is risky. Critical findings must be resolved before this workflow is reused.";
|
|
37
|
+
}
|
|
38
|
+
if (findings.some((finding) => finding.severity === "high")) {
|
|
39
|
+
return "Agent workflow needs clearer verification, instruction, or security hardening before broad reuse.";
|
|
40
|
+
}
|
|
41
|
+
if (score >= 80) {
|
|
42
|
+
return "Agent workflow is mostly healthy, with a few hardening opportunities.";
|
|
43
|
+
}
|
|
44
|
+
if (score >= 60) {
|
|
45
|
+
return "Agent workflow needs clearer verification and instruction hardening before broad reuse.";
|
|
46
|
+
}
|
|
47
|
+
return "Agent workflow is risky. Convert the findings into explicit rules or skills before repeating this workflow.";
|
|
48
|
+
}
|
|
49
|
+
function buildRecommendations(findings) {
|
|
50
|
+
const unique = new Set(findings.map((finding) => finding.suggestedRule));
|
|
51
|
+
return Array.from(unique);
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=analyze.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyze.js","sourceRoot":"","sources":["../../src/analyze.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAG7C,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,OAAiB,EAAE,UAA0B,EAAE;IAClF,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,OAAO,CAAC,CAAC;IAC9C,OAAO,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAoB,EAAE,UAA0B,EAAE;IAC9E,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;IAClE,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;IAEvC,OAAO;QACL,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;QACzC,KAAK;QACL,OAAO,EAAE,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC;QACnC,QAAQ;QACR,eAAe,EAAE,oBAAoB,CAAC,QAAQ,CAAC;KAChD,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,QAAoC;IAC1D,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;QACjD,MAAM,KAAK,GAAG;YACZ,GAAG,EAAE,CAAC;YACN,MAAM,EAAE,CAAC;YACT,IAAI,EAAE,EAAE;YACR,QAAQ,EAAE,EAAE;SACb,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpB,OAAO,KAAK,GAAG,KAAK,CAAC;IACvB,CAAC,EAAE,CAAC,CAAC,CAAC;IAEN,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC;AACnD,CAAC;AAED,SAAS,SAAS,CAAC,KAAa,EAAE,QAAoC;IACpE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,0DAA0D,CAAC;IACpE,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,KAAK,UAAU,CAAC,EAAE,CAAC;QAChE,OAAO,6FAA6F,CAAC;IACvG,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM,CAAC,EAAE,CAAC;QAC5D,OAAO,mGAAmG,CAAC;IAC7G,CAAC;IAED,IAAI,KAAK,IAAI,EAAE,EAAE,CAAC;QAChB,OAAO,uEAAuE,CAAC;IACjF,CAAC;IAED,IAAI,KAAK,IAAI,EAAE,EAAE,CAAC;QAChB,OAAO,yFAAyF,CAAC;IACnG,CAAC;IAED,OAAO,6GAA6G,CAAC;AACvH,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAoC;IAChE,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC;IACzE,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { FindingKind } from "./types.js";
|
|
2
|
+
export interface BenchmarkCaseDefinition {
|
|
3
|
+
id: string;
|
|
4
|
+
title: string;
|
|
5
|
+
fixture: string;
|
|
6
|
+
expectedKinds: FindingKind[];
|
|
7
|
+
requireCritical?: boolean;
|
|
8
|
+
requireClean?: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface BenchmarkCaseResult {
|
|
11
|
+
id: string;
|
|
12
|
+
title: string;
|
|
13
|
+
fixture: string;
|
|
14
|
+
score: number;
|
|
15
|
+
findings: number;
|
|
16
|
+
criticalFindings: number;
|
|
17
|
+
detectedKinds: FindingKind[];
|
|
18
|
+
expectedKinds: FindingKind[];
|
|
19
|
+
passed: boolean;
|
|
20
|
+
}
|
|
21
|
+
export interface BenchmarkResult {
|
|
22
|
+
generatedAt: string;
|
|
23
|
+
passed: boolean;
|
|
24
|
+
cases: BenchmarkCaseResult[];
|
|
25
|
+
}
|
|
26
|
+
export declare function runBenchmark(): Promise<BenchmarkResult>;
|
|
27
|
+
export declare function renderBenchmarkMarkdown(result: BenchmarkResult): string;
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { analyzeTargets } from "./analyze.js";
|
|
4
|
+
const BENCHMARK_CASES = [
|
|
5
|
+
{
|
|
6
|
+
id: "clean-validated-run",
|
|
7
|
+
title: "Clean validated agent run",
|
|
8
|
+
fixture: "fixtures/safe-run.md",
|
|
9
|
+
expectedKinds: [],
|
|
10
|
+
requireClean: true
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
id: "failed-workflow",
|
|
14
|
+
title: "Failed workflow with missing validation",
|
|
15
|
+
fixture: "fixtures/failed-run.md",
|
|
16
|
+
expectedKinds: ["test_failure", "premature_completion", "tests_not_run", "mcp_risk", "hallucinated_file"],
|
|
17
|
+
requireCritical: true
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
id: "codex-jsonl",
|
|
21
|
+
title: "Codex JSONL failed session",
|
|
22
|
+
fixture: "fixtures/codex-session.jsonl",
|
|
23
|
+
expectedKinds: ["test_failure", "premature_completion", "weak_evidence"],
|
|
24
|
+
requireCritical: true
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
id: "mcp-risk",
|
|
28
|
+
title: "MCP config with secret exposure",
|
|
29
|
+
fixture: "fixtures/mcp-risk.json",
|
|
30
|
+
expectedKinds: ["secret_exposure", "mcp_risk"],
|
|
31
|
+
requireCritical: true
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
id: "prompt-injection",
|
|
35
|
+
title: "Untrusted PR comment prompt injection",
|
|
36
|
+
fixture: "fixtures/prompt-injection.md",
|
|
37
|
+
expectedKinds: ["prompt_injection"],
|
|
38
|
+
requireCritical: true
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
id: "instruction-drift",
|
|
42
|
+
title: "Conflicting agent instruction files",
|
|
43
|
+
fixture: "fixtures/instruction-drift",
|
|
44
|
+
expectedKinds: ["ignored_instruction"]
|
|
45
|
+
}
|
|
46
|
+
];
|
|
47
|
+
export async function runBenchmark() {
|
|
48
|
+
const cases = [];
|
|
49
|
+
const root = packageRoot();
|
|
50
|
+
for (const definition of BENCHMARK_CASES) {
|
|
51
|
+
const result = await analyzeTargets([path.join(root, definition.fixture)]);
|
|
52
|
+
const detectedKinds = Array.from(new Set(result.findings.map((finding) => finding.kind))).sort();
|
|
53
|
+
const criticalFindings = result.findings.filter((finding) => finding.severity === "critical").length;
|
|
54
|
+
cases.push({
|
|
55
|
+
id: definition.id,
|
|
56
|
+
title: definition.title,
|
|
57
|
+
fixture: definition.fixture,
|
|
58
|
+
score: result.score,
|
|
59
|
+
findings: result.findings.length,
|
|
60
|
+
criticalFindings,
|
|
61
|
+
detectedKinds,
|
|
62
|
+
expectedKinds: definition.expectedKinds,
|
|
63
|
+
passed: casePassed(definition, detectedKinds, result.findings.length, criticalFindings)
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
generatedAt: new Date().toISOString(),
|
|
68
|
+
passed: cases.every((item) => item.passed),
|
|
69
|
+
cases
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
export function renderBenchmarkMarkdown(result) {
|
|
73
|
+
const lines = [
|
|
74
|
+
"# trace-to-skill Benchmark",
|
|
75
|
+
"",
|
|
76
|
+
`Status: **${result.passed ? "pass" : "fail"}**`,
|
|
77
|
+
"",
|
|
78
|
+
"This benchmark runs the public fixture pack that ships with the repository and package. It is not a model leaderboard; it checks whether deterministic detectors still catch the agent-workflow failure classes the project claims to cover.",
|
|
79
|
+
"",
|
|
80
|
+
"| Case | Fixture | Score | Findings | Critical | Detected kinds | Result |",
|
|
81
|
+
"| --- | --- | ---: | ---: | ---: | --- | --- |"
|
|
82
|
+
];
|
|
83
|
+
for (const item of result.cases) {
|
|
84
|
+
lines.push([
|
|
85
|
+
`| ${item.title}`,
|
|
86
|
+
`\`${item.fixture}\``,
|
|
87
|
+
`${item.score}`,
|
|
88
|
+
`${item.findings}`,
|
|
89
|
+
`${item.criticalFindings}`,
|
|
90
|
+
item.detectedKinds.length > 0 ? item.detectedKinds.map((kind) => `\`${kind}\``).join(", ") : "none",
|
|
91
|
+
item.passed ? "pass |" : "fail |"
|
|
92
|
+
].join(" | "));
|
|
93
|
+
}
|
|
94
|
+
lines.push("", "Run it locally:", "", "```bash", "trace-to-skill benchmark", "trace-to-skill benchmark --format json", "```", "");
|
|
95
|
+
return lines.join("\n");
|
|
96
|
+
}
|
|
97
|
+
function casePassed(definition, detectedKinds, findings, criticalFindings) {
|
|
98
|
+
if (definition.requireClean && findings !== 0) {
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
if (definition.requireCritical && criticalFindings === 0) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
return definition.expectedKinds.every((kind) => detectedKinds.includes(kind));
|
|
105
|
+
}
|
|
106
|
+
function packageRoot() {
|
|
107
|
+
return path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../..");
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=benchmark.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../../src/benchmark.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AA8B9C,MAAM,eAAe,GAA8B;IACjD;QACE,EAAE,EAAE,qBAAqB;QACzB,KAAK,EAAE,2BAA2B;QAClC,OAAO,EAAE,sBAAsB;QAC/B,aAAa,EAAE,EAAE;QACjB,YAAY,EAAE,IAAI;KACnB;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,KAAK,EAAE,yCAAyC;QAChD,OAAO,EAAE,wBAAwB;QACjC,aAAa,EAAE,CAAC,cAAc,EAAE,sBAAsB,EAAE,eAAe,EAAE,UAAU,EAAE,mBAAmB,CAAC;QACzG,eAAe,EAAE,IAAI;KACtB;IACD;QACE,EAAE,EAAE,aAAa;QACjB,KAAK,EAAE,4BAA4B;QACnC,OAAO,EAAE,8BAA8B;QACvC,aAAa,EAAE,CAAC,cAAc,EAAE,sBAAsB,EAAE,eAAe,CAAC;QACxE,eAAe,EAAE,IAAI;KACtB;IACD;QACE,EAAE,EAAE,UAAU;QACd,KAAK,EAAE,iCAAiC;QACxC,OAAO,EAAE,wBAAwB;QACjC,aAAa,EAAE,CAAC,iBAAiB,EAAE,UAAU,CAAC;QAC9C,eAAe,EAAE,IAAI;KACtB;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,KAAK,EAAE,uCAAuC;QAC9C,OAAO,EAAE,8BAA8B;QACvC,aAAa,EAAE,CAAC,kBAAkB,CAAC;QACnC,eAAe,EAAE,IAAI;KACtB;IACD;QACE,EAAE,EAAE,mBAAmB;QACvB,KAAK,EAAE,qCAAqC;QAC5C,OAAO,EAAE,4BAA4B;QACrC,aAAa,EAAE,CAAC,qBAAqB,CAAC;KACvC;CACF,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,MAAM,KAAK,GAA0B,EAAE,CAAC;IACxC,MAAM,IAAI,GAAG,WAAW,EAAE,CAAC;IAE3B,KAAK,MAAM,UAAU,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAC3E,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACjG,MAAM,gBAAgB,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;QAErG,KAAK,CAAC,IAAI,CAAC;YACT,EAAE,EAAE,UAAU,CAAC,EAAE;YACjB,KAAK,EAAE,UAAU,CAAC,KAAK;YACvB,OAAO,EAAE,UAAU,CAAC,OAAO;YAC3B,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,QAAQ,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;YAChC,gBAAgB;YAChB,aAAa;YACb,aAAa,EAAE,UAAU,CAAC,aAAa;YACvC,MAAM,EAAE,UAAU,CAAC,UAAU,EAAE,aAAa,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC;SACxF,CAAC,CAAC;IACL,CAAC;IAED,OAAO;QACL,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC;QAC1C,KAAK;KACN,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,MAAuB;IAC7D,MAAM,KAAK,GAAG;QACZ,4BAA4B;QAC5B,EAAE;QACF,aAAa,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,IAAI;QAChD,EAAE;QACF,8OAA8O;QAC9O,EAAE;QACF,4EAA4E;QAC5E,gDAAgD;KACjD,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC;YACT,KAAK,IAAI,CAAC,KAAK,EAAE;YACjB,KAAK,IAAI,CAAC,OAAO,IAAI;YACrB,GAAG,IAAI,CAAC,KAAK,EAAE;YACf,GAAG,IAAI,CAAC,QAAQ,EAAE;YAClB,GAAG,IAAI,CAAC,gBAAgB,EAAE;YAC1B,IAAI,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM;YACnG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ;SAClC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,IAAI,CACR,EAAE,EACF,iBAAiB,EACjB,EAAE,EACF,SAAS,EACT,0BAA0B,EAC1B,wCAAwC,EACxC,KAAK,EACL,EAAE,CACH,CAAC;IAEF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,UAAU,CACjB,UAAmC,EACnC,aAA4B,EAC5B,QAAgB,EAChB,gBAAwB;IAExB,IAAI,UAAU,CAAC,YAAY,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;QAC9C,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,UAAU,CAAC,eAAe,IAAI,gBAAgB,KAAK,CAAC,EAAE,CAAC;QACzD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,UAAU,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAChF,CAAC;AAED,SAAS,WAAW;IAClB,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AAC7E,CAAC"}
|
package/dist/src/cli.js
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { writeFile } from "node:fs/promises";
|
|
3
|
+
import { lintAgents, renderAgentsLintMarkdown } from "./agentsLint.js";
|
|
4
|
+
import { analyzeTargets } from "./analyze.js";
|
|
5
|
+
import { renderBenchmarkMarkdown, runBenchmark } from "./benchmark.js";
|
|
6
|
+
import { doctorRepo } from "./doctor.js";
|
|
7
|
+
import { compareAnalyses, evaluate } from "./eval.js";
|
|
8
|
+
import { analyzeGithubEventContext } from "./githubContext.js";
|
|
9
|
+
import { postPullRequestComment } from "./github.js";
|
|
10
|
+
import { initProject } from "./init.js";
|
|
11
|
+
import { renderAgentsRules, renderComparison, renderDoctorMarkdown, renderDoctorPrComment, renderMarkdown, renderPrComment, renderSarif, renderSkill } from "./report.js";
|
|
12
|
+
import { renderScorecardMarkdown, renderScorecardPrComment, runScorecard } from "./scorecard.js";
|
|
13
|
+
async function main() {
|
|
14
|
+
const parsed = parseArgs(process.argv.slice(2));
|
|
15
|
+
if (parsed.flags.help || parsed.command === "help") {
|
|
16
|
+
printHelp();
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
if (parsed.command === "analyze") {
|
|
20
|
+
const result = await analyzeTargets(parsed.targets);
|
|
21
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
22
|
+
const output = renderAnalysis(result, format);
|
|
23
|
+
await writeOutput(output, parsed.flags.output);
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
if (parsed.command === "suggest") {
|
|
27
|
+
const result = await analyzeTargets(parsed.targets);
|
|
28
|
+
const target = String(parsed.flags.target ?? "agents-md");
|
|
29
|
+
const output = target === "skill" ? renderSkill(result) : renderAgentsRules(result);
|
|
30
|
+
await writeOutput(output, parsed.flags.output);
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
if (parsed.command === "lint-agents") {
|
|
34
|
+
const result = await lintAgents(parsed.targets[0] ?? process.cwd());
|
|
35
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
36
|
+
const output = format === "json" ? `${JSON.stringify(result, null, 2)}\n` : renderAgentsLintMarkdown(result);
|
|
37
|
+
await writeOutput(output, parsed.flags.output);
|
|
38
|
+
process.exitCode = result.status === "fail" ? 1 : 0;
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
if (parsed.command === "eval") {
|
|
42
|
+
const threshold = Number(parsed.flags.threshold ?? 75);
|
|
43
|
+
const result = await analyzeTargets(parsed.targets);
|
|
44
|
+
const evalResult = evaluate(result, threshold);
|
|
45
|
+
const format = String(parsed.flags.format ?? "text");
|
|
46
|
+
const output = format === "json" ? `${JSON.stringify(evalResult, null, 2)}\n` : `${evalResult.message}\n`;
|
|
47
|
+
await writeOutput(output, parsed.flags.output);
|
|
48
|
+
process.exitCode = evalResult.passed ? 0 : 1;
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (parsed.command === "benchmark") {
|
|
52
|
+
const result = await runBenchmark();
|
|
53
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
54
|
+
const output = format === "json" ? `${JSON.stringify(result, null, 2)}\n` : renderBenchmarkMarkdown(result);
|
|
55
|
+
await writeOutput(output, parsed.flags.output);
|
|
56
|
+
process.exitCode = result.passed ? 0 : 1;
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
if (parsed.command === "scorecard") {
|
|
60
|
+
const threshold = numberFlag(parsed.flags.threshold) ?? 85;
|
|
61
|
+
const result = await runScorecard(parsed.targets[0] ?? process.cwd(), threshold);
|
|
62
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
63
|
+
const output = format === "json" ? `${JSON.stringify(result, null, 2)}\n` : renderScorecardMarkdown(result);
|
|
64
|
+
await writeOutput(output, parsed.flags.output);
|
|
65
|
+
process.exitCode = result.passed ? 0 : 1;
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
if (parsed.command === "scorecard-comment") {
|
|
69
|
+
const threshold = numberFlag(parsed.flags.threshold) ?? 85;
|
|
70
|
+
const result = await runScorecard(parsed.targets[0] ?? process.cwd(), threshold);
|
|
71
|
+
const body = renderScorecardPrComment(result);
|
|
72
|
+
const message = await postPullRequestComment({
|
|
73
|
+
body,
|
|
74
|
+
token: stringFlag(parsed.flags.token),
|
|
75
|
+
repository: stringFlag(parsed.flags.repository),
|
|
76
|
+
eventPath: stringFlag(parsed.flags.event),
|
|
77
|
+
dryRun: Boolean(parsed.flags["dry-run"]),
|
|
78
|
+
marker: "<!-- trace-to-skill-scorecard-report -->",
|
|
79
|
+
reportName: "trace-to-skill scorecard report"
|
|
80
|
+
});
|
|
81
|
+
process.stdout.write(`${message}\n`);
|
|
82
|
+
process.exitCode = result.passed ? 0 : 1;
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
if (parsed.command === "guard-github-event") {
|
|
86
|
+
const eventPath = parsed.targets[0] ?? stringFlag(parsed.flags.event) ?? process.env.GITHUB_EVENT_PATH;
|
|
87
|
+
if (!eventPath) {
|
|
88
|
+
throw new Error("guard-github-event requires an event JSON path or GITHUB_EVENT_PATH.");
|
|
89
|
+
}
|
|
90
|
+
const threshold = numberFlag(parsed.flags.threshold) ?? 80;
|
|
91
|
+
const result = await analyzeGithubEventContext(eventPath);
|
|
92
|
+
const evalResult = evaluate(result, threshold);
|
|
93
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
94
|
+
const output = format === "json" ? `${JSON.stringify(result, null, 2)}\n` : renderAnalysis(result, "markdown");
|
|
95
|
+
await writeOutput(output, parsed.flags.output);
|
|
96
|
+
process.exitCode = evalResult.passed ? 0 : 1;
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
if (parsed.command === "comment") {
|
|
100
|
+
const result = await analyzeTargets(parsed.targets);
|
|
101
|
+
const body = renderPrComment(result);
|
|
102
|
+
const message = await postPullRequestComment({
|
|
103
|
+
body,
|
|
104
|
+
token: stringFlag(parsed.flags.token),
|
|
105
|
+
repository: stringFlag(parsed.flags.repository),
|
|
106
|
+
eventPath: stringFlag(parsed.flags.event),
|
|
107
|
+
dryRun: Boolean(parsed.flags["dry-run"])
|
|
108
|
+
});
|
|
109
|
+
process.stdout.write(`${message}\n`);
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
if (parsed.command === "compare") {
|
|
113
|
+
const before = stringFlag(parsed.flags.before) ?? parsed.targets[0];
|
|
114
|
+
const after = stringFlag(parsed.flags.after) ?? parsed.targets[1];
|
|
115
|
+
if (!before || !after) {
|
|
116
|
+
throw new Error("compare requires --before <trace> and --after <trace>, or two positional paths.");
|
|
117
|
+
}
|
|
118
|
+
const beforeResult = await analyzeTargets([before]);
|
|
119
|
+
const afterResult = await analyzeTargets([after]);
|
|
120
|
+
const comparison = compareAnalyses(beforeResult, afterResult);
|
|
121
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
122
|
+
const output = format === "json" ? `${JSON.stringify(comparison, null, 2)}\n` : renderComparison(comparison);
|
|
123
|
+
await writeOutput(output, parsed.flags.output);
|
|
124
|
+
process.exitCode = comparison.decision === "reject" ? 1 : 0;
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
if (parsed.command === "doctor") {
|
|
128
|
+
const result = await doctorRepo(parsed.targets[0] ?? process.cwd());
|
|
129
|
+
const threshold = numberFlag(parsed.flags.threshold);
|
|
130
|
+
const format = String(parsed.flags.format ?? "markdown");
|
|
131
|
+
const output = format === "json" ? `${JSON.stringify(result, null, 2)}\n` :
|
|
132
|
+
format === "comment" ? renderDoctorPrComment(result, threshold) :
|
|
133
|
+
renderDoctorMarkdown(result);
|
|
134
|
+
await writeOutput(output, parsed.flags.output);
|
|
135
|
+
process.exitCode = doctorPassed(result, threshold) ? 0 : 1;
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
if (parsed.command === "doctor-comment") {
|
|
139
|
+
const result = await doctorRepo(parsed.targets[0] ?? process.cwd());
|
|
140
|
+
const threshold = numberFlag(parsed.flags.threshold);
|
|
141
|
+
const body = renderDoctorPrComment(result, threshold);
|
|
142
|
+
const message = await postPullRequestComment({
|
|
143
|
+
body,
|
|
144
|
+
token: stringFlag(parsed.flags.token),
|
|
145
|
+
repository: stringFlag(parsed.flags.repository),
|
|
146
|
+
eventPath: stringFlag(parsed.flags.event),
|
|
147
|
+
dryRun: Boolean(parsed.flags["dry-run"]),
|
|
148
|
+
marker: "<!-- trace-to-skill-doctor-report -->",
|
|
149
|
+
reportName: "trace-to-skill doctor report"
|
|
150
|
+
});
|
|
151
|
+
process.stdout.write(`${message}\n`);
|
|
152
|
+
process.exitCode = doctorPassed(result, threshold) ? 0 : 1;
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
if (parsed.command === "init") {
|
|
156
|
+
const result = await initProject({
|
|
157
|
+
traces: stringFlag(parsed.flags.traces),
|
|
158
|
+
threshold: stringFlag(parsed.flags.threshold),
|
|
159
|
+
doctorThreshold: stringFlag(parsed.flags["doctor-threshold"]),
|
|
160
|
+
comment: Boolean(parsed.flags.comment),
|
|
161
|
+
sarif: Boolean(parsed.flags.sarif),
|
|
162
|
+
force: Boolean(parsed.flags.force),
|
|
163
|
+
dryRun: Boolean(parsed.flags["dry-run"])
|
|
164
|
+
});
|
|
165
|
+
process.stdout.write(`${result.message}\n`);
|
|
166
|
+
result.written.forEach((file) => process.stdout.write(`write ${file}\n`));
|
|
167
|
+
result.skipped.forEach((file) => process.stdout.write(`skip ${file}\n`));
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
printHelp();
|
|
171
|
+
process.exitCode = 1;
|
|
172
|
+
}
|
|
173
|
+
function stringFlag(value) {
|
|
174
|
+
return typeof value === "string" ? value : undefined;
|
|
175
|
+
}
|
|
176
|
+
function numberFlag(value) {
|
|
177
|
+
if (value === undefined) {
|
|
178
|
+
return undefined;
|
|
179
|
+
}
|
|
180
|
+
if (typeof value !== "string" || !/^[0-9]{1,3}$/.test(value)) {
|
|
181
|
+
throw new Error("--threshold must be an integer between 1 and 100");
|
|
182
|
+
}
|
|
183
|
+
const parsed = Number(value);
|
|
184
|
+
if (!Number.isInteger(parsed) || parsed < 1 || parsed > 100) {
|
|
185
|
+
throw new Error("--threshold must be an integer between 1 and 100");
|
|
186
|
+
}
|
|
187
|
+
return parsed;
|
|
188
|
+
}
|
|
189
|
+
function doctorPassed(result, threshold) {
|
|
190
|
+
if (result.checks.some((check) => check.status === "fail")) {
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
if (result.findings.some((finding) => finding.severity === "critical")) {
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
return threshold === undefined || result.score >= threshold;
|
|
197
|
+
}
|
|
198
|
+
function renderAnalysis(result, format) {
|
|
199
|
+
if (format === "json") {
|
|
200
|
+
return `${JSON.stringify(result, null, 2)}\n`;
|
|
201
|
+
}
|
|
202
|
+
if (format === "sarif") {
|
|
203
|
+
return renderSarif(result);
|
|
204
|
+
}
|
|
205
|
+
return renderMarkdown(result);
|
|
206
|
+
}
|
|
207
|
+
function parseArgs(args) {
|
|
208
|
+
const [command = "help", ...rest] = args;
|
|
209
|
+
const targets = [];
|
|
210
|
+
const flags = {};
|
|
211
|
+
for (let index = 0; index < rest.length; index += 1) {
|
|
212
|
+
const value = rest[index];
|
|
213
|
+
if (!value.startsWith("--")) {
|
|
214
|
+
targets.push(value);
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
const [key, inlineValue] = value.slice(2).split("=", 2);
|
|
218
|
+
if (inlineValue !== undefined) {
|
|
219
|
+
flags[key] = inlineValue;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
const next = rest[index + 1];
|
|
223
|
+
if (next && !next.startsWith("--")) {
|
|
224
|
+
flags[key] = next;
|
|
225
|
+
index += 1;
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
flags[key] = true;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return { command, targets, flags };
|
|
232
|
+
}
|
|
233
|
+
async function writeOutput(output, outputPath) {
|
|
234
|
+
if (typeof outputPath === "string") {
|
|
235
|
+
await writeFile(outputPath, output, "utf8");
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
process.stdout.write(output);
|
|
239
|
+
}
|
|
240
|
+
function printHelp() {
|
|
241
|
+
process.stdout.write(`trace-to-skill
|
|
242
|
+
|
|
243
|
+
Turn failed AI coding-agent runs into reusable rules, skills, and eval evidence.
|
|
244
|
+
|
|
245
|
+
Usage:
|
|
246
|
+
trace-to-skill analyze <trace-file-or-dir> [--format markdown|json|sarif] [--output report.md]
|
|
247
|
+
trace-to-skill suggest <trace-file-or-dir> [--target agents-md|skill] [--output AGENTS.generated.md]
|
|
248
|
+
trace-to-skill lint-agents [repo-dir] [--format markdown|json] [--output report.md]
|
|
249
|
+
trace-to-skill eval <trace-file-or-dir> [--threshold 75] [--format text|json]
|
|
250
|
+
trace-to-skill benchmark [--format markdown|json] [--output docs/BENCHMARK.md]
|
|
251
|
+
trace-to-skill scorecard [repo-dir] [--threshold 85] [--format markdown|json] [--output docs/SCORECARD.md]
|
|
252
|
+
trace-to-skill scorecard-comment [repo-dir] [--threshold 85] [--dry-run] [--token $GITHUB_TOKEN]
|
|
253
|
+
trace-to-skill guard-github-event [event.json] [--threshold 80] [--format markdown|json] [--output report.md]
|
|
254
|
+
trace-to-skill comment <trace-file-or-dir> [--dry-run] [--token $GITHUB_TOKEN]
|
|
255
|
+
trace-to-skill compare --before <old-run> --after <new-run> [--format markdown|json]
|
|
256
|
+
trace-to-skill doctor [repo-dir] [--threshold 85] [--format markdown|json|comment] [--output report.md]
|
|
257
|
+
trace-to-skill doctor-comment [repo-dir] [--threshold 85] [--dry-run] [--token $GITHUB_TOKEN]
|
|
258
|
+
trace-to-skill init [--traces runs] [--threshold 80] [--doctor-threshold 85] [--comment] [--sarif] [--dry-run]
|
|
259
|
+
|
|
260
|
+
Examples:
|
|
261
|
+
trace-to-skill analyze ./runs
|
|
262
|
+
trace-to-skill suggest ./runs --target skill --output skills/verification-before-completion/SKILL.md
|
|
263
|
+
trace-to-skill lint-agents .
|
|
264
|
+
trace-to-skill eval ./runs --threshold 80
|
|
265
|
+
trace-to-skill benchmark
|
|
266
|
+
trace-to-skill scorecard .
|
|
267
|
+
trace-to-skill scorecard-comment . --threshold 85
|
|
268
|
+
trace-to-skill guard-github-event "$GITHUB_EVENT_PATH"
|
|
269
|
+
trace-to-skill comment ./runs
|
|
270
|
+
trace-to-skill compare --before ./runs/before --after ./runs/after
|
|
271
|
+
trace-to-skill doctor . --threshold 85
|
|
272
|
+
trace-to-skill doctor-comment . --threshold 85
|
|
273
|
+
trace-to-skill init --comment --sarif
|
|
274
|
+
`);
|
|
275
|
+
}
|
|
276
|
+
main().catch((error) => {
|
|
277
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
278
|
+
process.stderr.write(`trace-to-skill: ${message}\n`);
|
|
279
|
+
process.exitCode = 1;
|
|
280
|
+
});
|
|
281
|
+
//# sourceMappingURL=cli.js.map
|