@kevinrabun/judges 3.113.0 → 3.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +9 -0
  2. package/agents/accessibility.judge.md +37 -0
  3. package/agents/agent-instructions.judge.md +37 -0
  4. package/agents/ai-code-safety.judge.md +48 -0
  5. package/agents/api-contract.judge.md +30 -0
  6. package/agents/api-design.judge.md +39 -0
  7. package/agents/authentication.judge.md +37 -0
  8. package/agents/backwards-compatibility.judge.md +37 -0
  9. package/agents/caching.judge.md +37 -0
  10. package/agents/ci-cd.judge.md +37 -0
  11. package/agents/cloud-readiness.judge.md +37 -0
  12. package/agents/code-structure.judge.md +48 -0
  13. package/agents/compliance.judge.md +40 -0
  14. package/agents/concurrency.judge.md +39 -0
  15. package/agents/configuration-management.judge.md +37 -0
  16. package/agents/cost-effectiveness.judge.md +40 -0
  17. package/agents/cybersecurity.judge.md +36 -0
  18. package/agents/data-security.judge.md +34 -0
  19. package/agents/data-sovereignty.judge.md +58 -0
  20. package/agents/database.judge.md +41 -0
  21. package/agents/dependency-health.judge.md +39 -0
  22. package/agents/documentation.judge.md +39 -0
  23. package/agents/error-handling.judge.md +37 -0
  24. package/agents/ethics-bias.judge.md +39 -0
  25. package/agents/false-positive-review.judge.md +73 -0
  26. package/agents/framework-safety.judge.md +40 -0
  27. package/agents/hallucination-detection.judge.md +33 -0
  28. package/agents/iac-security.judge.md +38 -0
  29. package/agents/intent-alignment.judge.md +31 -0
  30. package/agents/internationalization.judge.md +42 -0
  31. package/agents/logging-privacy.judge.md +37 -0
  32. package/agents/logic-review.judge.md +34 -0
  33. package/agents/maintainability.judge.md +37 -0
  34. package/agents/model-fingerprint.judge.md +31 -0
  35. package/agents/multi-turn-coherence.judge.md +29 -0
  36. package/agents/observability.judge.md +37 -0
  37. package/agents/over-engineering.judge.md +48 -0
  38. package/agents/performance.judge.md +44 -0
  39. package/agents/portability.judge.md +37 -0
  40. package/agents/rate-limiting.judge.md +37 -0
  41. package/agents/reliability.judge.md +39 -0
  42. package/agents/scalability.judge.md +41 -0
  43. package/agents/security.judge.md +31 -0
  44. package/agents/software-practices.judge.md +44 -0
  45. package/agents/testing.judge.md +39 -0
  46. package/agents/ux.judge.md +37 -0
  47. package/dist/api.d.ts +9 -1
  48. package/dist/api.js +9 -1
  49. package/dist/commands/fix.d.ts +10 -0
  50. package/dist/commands/fix.js +52 -0
  51. package/dist/commands/llm-benchmark.d.ts +13 -4
  52. package/dist/commands/llm-benchmark.js +39 -8
  53. package/dist/commands/review.d.ts +51 -1
  54. package/dist/commands/review.js +213 -7
  55. package/dist/evaluators/index.js +61 -35
  56. package/dist/github-app.d.ts +35 -0
  57. package/dist/github-app.js +125 -4
  58. package/dist/judges/index.d.ts +23 -61
  59. package/dist/judges/index.js +49 -63
  60. package/dist/patches/apply.d.ts +15 -0
  61. package/dist/patches/apply.js +37 -0
  62. package/dist/tools/prompts.d.ts +2 -2
  63. package/dist/tools/prompts.js +21 -10
  64. package/docs/skills.md +7 -0
  65. package/package.json +18 -3
  66. package/packages/judges-cli/README.md +24 -0
  67. package/packages/judges-cli/bin/judges.js +8 -0
  68. package/scripts/generate-agents-from-judges.ts +111 -0
  69. package/scripts/generate-skills-docs.ts +26 -0
  70. package/scripts/validate-agents.ts +104 -0
  71. package/server.json +2 -2
  72. package/skills/ai-code-review.skill.md +57 -0
  73. package/skills/release-gate.skill.md +27 -0
  74. package/skills/security-review.skill.md +32 -0
  75. package/src/agent-loader.ts +324 -0
  76. package/src/skill-loader.ts +199 -0
@@ -0,0 +1,324 @@
1
+ /**
2
+ * Agent Markdown Loader — reads `.judge.md` files (legacy `.agent.md` also
3
+ * accepted) and converts them into JudgeDefinition objects that register with
4
+ * the unified JudgeRegistry.
5
+ *
6
+ * This is the bridge between the file-based agent paradigm and the existing
7
+ * TypeScript judge system. Agent files use YAML frontmatter for metadata
8
+ * and markdown body for the system prompt (persona + evaluation criteria).
9
+ *
10
+ * ## File Format
11
+ *
12
+ * ```markdown
13
+ * ---
14
+ * id: cybersecurity
15
+ * name: Judge Cybersecurity
16
+ * domain: Cybersecurity & Threat Defense
17
+ * rulePrefix: CYBER
18
+ * description: Evaluates code for vulnerability...
19
+ * tableDescription: "Injection attacks, XSS, CSRF, auth flaws"
20
+ * promptDescription: Deep cybersecurity review
21
+ * script: ../src/evaluators/cybersecurity.ts # optional
22
+ * priority: 10 # optional, default 10
23
+ * ---
24
+ *
25
+ * You are Judge Cybersecurity — a principal application security engineer...
26
+ *
27
+ * ## Evaluation Criteria
28
+ * ...
29
+ * ```
30
+ *
31
+ * - `script` is a relative path to the evaluator module (must export a
32
+ * function matching `(code: string, language: string, context?) => Finding[]`).
33
+ * If omitted, the judge is LLM-only (no deterministic layer).
34
+ * - `priority` controls ordering. Higher = later. 999 is reserved for
35
+ * false-positive-review (always last). Default is 10.
36
+ */
37
+
38
+ import { readFileSync, readdirSync, existsSync } from "node:fs";
39
+ import { join, resolve, dirname } from "node:path";
40
+ import { createRequire } from "node:module";
41
+ import type { JudgeDefinition, Finding, AnalyzeContext } from "./types.js";
42
+
43
+ // ─── Frontmatter Types ──────────────────────────────────────────────────────
44
+
45
+ /** Parsed YAML frontmatter from a `.judge.md` file (legacy `.agent.md`). */
46
+ export interface AgentFrontmatter {
47
+ id: string;
48
+ name: string;
49
+ domain: string;
50
+ rulePrefix: string;
51
+ description: string;
52
+ tableDescription: string;
53
+ promptDescription: string;
54
+ script?: string;
55
+ priority?: number;
56
+ }
57
+
58
+ /** A parsed agent file — metadata + the markdown body (system prompt). */
59
+ export interface ParsedAgent {
60
+ frontmatter: AgentFrontmatter;
61
+ /** The markdown body below the frontmatter — becomes the systemPrompt. */
62
+ body: string;
63
+ /** Absolute path of the source `.judge.md` file (legacy `.agent.md`). */
64
+ sourcePath: string;
65
+ }
66
+
67
+ // ─── Frontmatter Parser ─────────────────────────────────────────────────────
68
+
69
+ /**
70
+ * Parse YAML frontmatter from a string. Handles the subset of YAML used
71
+ * by agent files: simple key-value pairs, quoted strings, and multi-line
72
+ * `>` folded scalars. No arrays, nested objects, or anchors.
73
+ */
74
+ export function parseFrontmatter(raw: string): { meta: Record<string, string>; body: string } {
75
+ const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
76
+ if (!match) {
77
+ return { meta: {}, body: raw };
78
+ }
79
+
80
+ const yamlBlock = match[1];
81
+ const body = match[2].trim();
82
+ const meta: Record<string, string> = {};
83
+
84
+ const lines = yamlBlock.split(/\r?\n/);
85
+ let i = 0;
86
+
87
+ while (i < lines.length) {
88
+ const line = lines[i];
89
+
90
+ // Skip empty lines and comments
91
+ if (!line.trim() || line.trim().startsWith("#")) {
92
+ i++;
93
+ continue;
94
+ }
95
+
96
+ const kvMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(.*)/);
97
+ if (!kvMatch) {
98
+ i++;
99
+ continue;
100
+ }
101
+
102
+ const key = kvMatch[1];
103
+ let value = kvMatch[2].trim();
104
+
105
+ // Handle folded scalar (>)
106
+ if (value === ">") {
107
+ const parts: string[] = [];
108
+ i++;
109
+ while (i < lines.length && (lines[i].startsWith(" ") || lines[i].trim() === "")) {
110
+ if (lines[i].trim() === "") {
111
+ parts.push("");
112
+ } else {
113
+ parts.push(lines[i].trimStart());
114
+ }
115
+ i++;
116
+ }
117
+ // Folded scalar: join non-empty lines with spaces, blank lines become newlines
118
+ value = parts
119
+ .reduce<string[]>((acc, part) => {
120
+ if (part === "") {
121
+ acc.push("\n");
122
+ } else if (acc.length > 0 && acc[acc.length - 1] !== "\n") {
123
+ acc[acc.length - 1] += " " + part;
124
+ } else {
125
+ acc.push(part);
126
+ }
127
+ return acc;
128
+ }, [])
129
+ .join("")
130
+ .trim();
131
+ } else {
132
+ // Strip surrounding quotes
133
+ if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
134
+ value = value.slice(1, -1);
135
+ }
136
+ i++;
137
+ }
138
+
139
+ meta[key] = value;
140
+ }
141
+
142
+ return { meta, body };
143
+ }
144
+
145
+ // ─── Validation ──────────────────────────────────────────────────────────────
146
+
147
+ const REQUIRED_FIELDS: (keyof AgentFrontmatter)[] = [
148
+ "id",
149
+ "name",
150
+ "domain",
151
+ "rulePrefix",
152
+ "description",
153
+ "tableDescription",
154
+ "promptDescription",
155
+ ];
156
+
157
+ /**
158
+ * Validate and coerce parsed frontmatter into a typed AgentFrontmatter.
159
+ * Throws on missing required fields.
160
+ */
161
+ export function validateFrontmatter(meta: Record<string, string>, sourcePath: string): AgentFrontmatter {
162
+ for (const field of REQUIRED_FIELDS) {
163
+ if (!meta[field]) {
164
+ throw new Error(`Agent file ${sourcePath} is missing required field: "${field}"`);
165
+ }
166
+ }
167
+
168
+ return {
169
+ id: meta.id,
170
+ name: meta.name,
171
+ domain: meta.domain,
172
+ rulePrefix: meta.rulePrefix,
173
+ description: meta.description,
174
+ tableDescription: meta.tableDescription,
175
+ promptDescription: meta.promptDescription,
176
+ script: meta.script || undefined,
177
+ priority: meta.priority ? parseInt(meta.priority, 10) : 10,
178
+ };
179
+ }
180
+
181
+ // ─── Agent File Parsing ──────────────────────────────────────────────────────
182
+
183
+ /**
184
+ * Parse a single `.judge.md` file into its frontmatter and body (legacy `.agent.md`).
185
+ */
186
+ export function parseAgentFile(filePath: string): ParsedAgent {
187
+ const absPath = resolve(filePath);
188
+ const raw = readFileSync(absPath, "utf-8");
189
+ const { meta, body } = parseFrontmatter(raw);
190
+ const frontmatter = validateFrontmatter(meta, absPath);
191
+
192
+ return {
193
+ frontmatter,
194
+ body,
195
+ sourcePath: absPath,
196
+ };
197
+ }
198
+
199
+ // ─── Evaluator Resolution ────────────────────────────────────────────────────
200
+
201
+ /**
202
+ * Resolve the `script` path to an analyze function.
203
+ *
204
+ * Requirements:
205
+ * - Synchronous (to support existing synchronous evaluation paths)
206
+ * - Works both from source (`tsx`/ts-node) and compiled `dist`
207
+ */
208
+ type AnalyzeFn = (code: string, language: string, context?: AnalyzeContext) => Finding[];
209
+
210
+ export function resolveEvaluator(agent: ParsedAgent): AnalyzeFn | undefined {
211
+ if (!agent.frontmatter.script) return undefined;
212
+
213
+ const scriptPath = resolve(dirname(agent.sourcePath), agent.frontmatter.script);
214
+ const candidatePaths: string[] = [
215
+ scriptPath,
216
+ scriptPath.replace(/\.ts$/, ".js"),
217
+ scriptPath
218
+ .replace(/\\src\\/g, "\\dist\\")
219
+ .replace(/\/src\//g, "/dist/")
220
+ .replace(/\.ts$/, ".js"),
221
+ resolve(process.cwd(), "dist", "evaluators", `${agent.frontmatter.id}.js`),
222
+ ];
223
+
224
+ const req = createRequire(import.meta.url);
225
+ for (const candidate of candidatePaths) {
226
+ try {
227
+ const mod = req(candidate) as Record<string, unknown>;
228
+ const pascalId = agent.frontmatter.id
229
+ .split("-")
230
+ .map((s) => s.charAt(0).toUpperCase() + s.slice(1))
231
+ .join("");
232
+ const fnName = `analyze${pascalId}`;
233
+ const maybeFn = mod?.[fnName];
234
+ if (typeof maybeFn === "function") return maybeFn as AnalyzeFn;
235
+ for (const key of Object.keys(mod || {})) {
236
+ const candidateFn = mod[key];
237
+ if (typeof candidateFn === "function" && key.startsWith("analyze")) return candidateFn as AnalyzeFn;
238
+ }
239
+ } catch {
240
+ // swallow and try next
241
+ }
242
+ }
243
+ return undefined;
244
+ }
245
+
246
+ // ─── Conversion to JudgeDefinition ───────────────────────────────────────────
247
+
248
+ /**
249
+ * Convert a parsed agent file to a JudgeDefinition, reconstructing the
250
+ * systemPrompt from the markdown body with the standard adversarial
251
+ * mandate appended.
252
+ */
253
+ export function agentToJudgeDefinition(
254
+ agent: ParsedAgent,
255
+ analyze?: (code: string, language: string, context?: AnalyzeContext) => Finding[],
256
+ ): JudgeDefinition {
257
+ const fm = agent.frontmatter;
258
+
259
+ // The markdown body IS the system prompt content. We prepend the persona
260
+ // line (which is typically the first line of the body) and leave the
261
+ // rest as structured evaluation criteria.
262
+ const systemPrompt = agent.body;
263
+
264
+ return {
265
+ id: fm.id,
266
+ name: fm.name,
267
+ domain: fm.domain,
268
+ description: fm.description,
269
+ rulePrefix: fm.rulePrefix,
270
+ tableDescription: fm.tableDescription,
271
+ promptDescription: fm.promptDescription,
272
+ systemPrompt,
273
+ ...(analyze ? { analyze } : {}),
274
+ };
275
+ }
276
+
277
+ // ─── Directory Loading ───────────────────────────────────────────────────────
278
+
279
+ /**
280
+ * Load all `.judge.md` files from a directory (legacy `.agent.md` supported)
281
+ * and return parsed agents sorted by priority (ascending — lower number =
282
+ * earlier in pipeline).
283
+ */
284
+ export function loadAgentDirectory(dirPath: string): ParsedAgent[] {
285
+ const absDir = resolve(dirPath);
286
+ if (!existsSync(absDir)) return [];
287
+
288
+ const files = readdirSync(absDir).filter((f) => /\.(agent|judge)\.md$/i.test(f));
289
+
290
+ return files
291
+ .map((f) => parseAgentFile(join(absDir, f)))
292
+ .sort((a, b) => (a.frontmatter.priority ?? 10) - (b.frontmatter.priority ?? 10));
293
+ }
294
+
295
+ /**
296
+ * Load all agent files from a directory and register them with the
297
+ * JudgeRegistry. This is the main entry point for the hybrid phase.
298
+ *
299
+ * Returns the number of agents loaded.
300
+ */
301
+ export function loadAndRegisterAgents(
302
+ dirPath: string,
303
+ registry: {
304
+ register: (judge: JudgeDefinition) => void;
305
+ getJudge: (id: string) => JudgeDefinition | undefined;
306
+ },
307
+ ): number {
308
+ const agents = loadAgentDirectory(dirPath);
309
+ let count = 0;
310
+
311
+ for (const agent of agents) {
312
+ // Skip if a judge with this ID already exists (built-ins or previously loaded agents)
313
+ if (registry.getJudge(agent.frontmatter.id)) {
314
+ continue;
315
+ }
316
+
317
+ const analyze = resolveEvaluator(agent);
318
+ const judge = agentToJudgeDefinition(agent, analyze);
319
+ registry.register(judge);
320
+ count++;
321
+ }
322
+
323
+ return count;
324
+ }
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Skill Loader — reads `.skill.md` files and converts them into skill
3
+ * definitions that orchestrate sets of judges/agents. A skill represents a
4
+ * reusable review workflow (e.g., AI code review, security gate, release gate).
5
+ */
6
+ import { readFileSync, readdirSync, existsSync } from "node:fs";
7
+ import { join, resolve, dirname } from "node:path";
8
+ import { fileURLToPath } from "node:url";
9
+ import type { JudgeDefinition, TribunalVerdict } from "./types.js";
10
+ import { evaluateWithTribunal } from "./evaluators/index.js";
11
+ import { defaultRegistry } from "./judge-registry.js";
12
+ import { loadAgentJudges } from "./judges/index.js";
13
+
14
+ export interface SkillFrontmatter {
15
+ id: string;
16
+ name: string;
17
+ description: string;
18
+ agents: string[];
19
+ tags?: string[];
20
+ priority?: number;
21
+ }
22
+
23
+ export interface ParsedSkill {
24
+ frontmatter: SkillFrontmatter;
25
+ body: string; // orchestrator instructions
26
+ sourcePath: string;
27
+ }
28
+
29
+ type SkillMeta = Record<string, unknown>;
30
+
31
+ export function parseSkillFrontmatter(raw: string): { meta: SkillMeta; body: string } {
32
+ const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
33
+ if (!match) {
34
+ return { meta: {}, body: raw };
35
+ }
36
+ const yamlBlock = match[1];
37
+ const body = match[2].trim();
38
+ const meta: SkillMeta = {};
39
+ const lines = yamlBlock.split(/\r?\n/);
40
+ let i = 0;
41
+ while (i < lines.length) {
42
+ const line = lines[i];
43
+ if (!line.trim() || line.trim().startsWith("#")) {
44
+ i++;
45
+ continue;
46
+ }
47
+ const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)$/);
48
+ if (!kv) {
49
+ i++;
50
+ continue;
51
+ }
52
+ const key = kv[1];
53
+ let value: unknown = kv[2].trim();
54
+
55
+ // Multi-line array (YAML list)
56
+ if (!value || value === "|") {
57
+ // Peek ahead for indented or dash-prefixed lines
58
+ const items: string[] = [];
59
+ i++;
60
+ while (i < lines.length) {
61
+ const next = lines[i];
62
+ if (!next.trim()) {
63
+ i++;
64
+ continue;
65
+ }
66
+ if (/^\s*-\s+/.test(next)) {
67
+ items.push(next.replace(/^\s*-\s+/, "").trim());
68
+ i++;
69
+ continue;
70
+ }
71
+ if (/^\s{2,}\S/.test(next)) {
72
+ items.push(next.trim());
73
+ i++;
74
+ continue;
75
+ }
76
+ break; // end of list
77
+ }
78
+ if (items.length > 0) {
79
+ meta[key] = items;
80
+ continue;
81
+ }
82
+ // fall through if no items captured
83
+ }
84
+
85
+ if (typeof value === "string" && ((value.startsWith("[") && value.endsWith("]")) || value.includes(","))) {
86
+ // simple array parsing: split on comma
87
+ const normalized = (value as string)
88
+ .replace(/^\s*\[/, "")
89
+ .replace(/\]\s*$/, "")
90
+ .split(/\s*,\s*/)
91
+ .filter(Boolean);
92
+ value = normalized;
93
+ } else if (
94
+ typeof value === "string" &&
95
+ ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'")))
96
+ ) {
97
+ value = (value as string).slice(1, -1);
98
+ }
99
+ meta[key] = value;
100
+ i++;
101
+ }
102
+ return { meta, body };
103
+ }
104
+
105
+ const REQUIRED_FIELDS: (keyof SkillFrontmatter)[] = ["id", "name", "description", "agents"];
106
+
107
+ export function validateSkillFrontmatter(meta: SkillMeta, sourcePath: string): SkillFrontmatter {
108
+ for (const field of REQUIRED_FIELDS) {
109
+ if (!meta[field] || (Array.isArray(meta[field]) && meta[field].length === 0)) {
110
+ throw new Error(`Skill file ${sourcePath} is missing required field: "${field}"`);
111
+ }
112
+ }
113
+ return {
114
+ id: String(meta.id),
115
+ name: String(meta.name),
116
+ description: String(meta.description),
117
+ agents: Array.isArray(meta.agents)
118
+ ? (meta.agents as string[])
119
+ : String(meta.agents ?? "")
120
+ .split(/\s*,\s*/)
121
+ .filter(Boolean),
122
+ tags: Array.isArray(meta.tags)
123
+ ? (meta.tags as string[])
124
+ : meta.tags
125
+ ? String(meta.tags)
126
+ .split(/\s*,\s*/)
127
+ .filter(Boolean)
128
+ : undefined,
129
+ priority: meta.priority ? Number(meta.priority) : 10,
130
+ };
131
+ }
132
+
133
+ export function parseSkillFile(filePath: string): ParsedSkill {
134
+ const absPath = resolve(filePath);
135
+ const raw = readFileSync(absPath, "utf-8");
136
+ const { meta, body } = parseSkillFrontmatter(raw);
137
+ const frontmatter = validateSkillFrontmatter(meta, absPath);
138
+ return { frontmatter, body, sourcePath: absPath };
139
+ }
140
+
141
+ export function loadSkillDirectory(dirPath: string): ParsedSkill[] {
142
+ const absDir = resolve(dirPath);
143
+ if (!existsSync(absDir)) return [];
144
+ return readdirSync(absDir)
145
+ .filter((f) => f.endsWith(".skill.md"))
146
+ .map((f) => parseSkillFile(join(absDir, f)))
147
+ .sort((a, b) => (a.frontmatter.priority ?? 10) - (b.frontmatter.priority ?? 10));
148
+ }
149
+
150
+ /** List skills with metadata for display (id, name, description). */
151
+ export function listSkills(
152
+ dirPath: string,
153
+ ): Array<Pick<SkillFrontmatter, "id" | "name" | "description" | "tags" | "agents">> {
154
+ return loadSkillDirectory(dirPath).map((s) => ({
155
+ id: s.frontmatter.id,
156
+ name: s.frontmatter.name,
157
+ description: s.frontmatter.description,
158
+ tags: s.frontmatter.tags,
159
+ agents: s.frontmatter.agents,
160
+ }));
161
+ }
162
+
163
+ /**
164
+ * Run a skill by ID. Loads any missing agent judges, then evaluates code using
165
+ * only the judges referenced by the skill. Returns a tribunal verdict.
166
+ */
167
+ export async function runSkill(
168
+ skillId: string,
169
+ code: string,
170
+ language: string,
171
+ opts?: { skillsDir?: string; context?: unknown },
172
+ ): Promise<TribunalVerdict> {
173
+ const skillsDir = opts?.skillsDir ?? resolve(dirname(fileURLToPath(import.meta.url)), "..", "skills");
174
+ const skills = loadSkillDirectory(skillsDir);
175
+ const skill = skills.find((s) => s.frontmatter.id === skillId);
176
+ if (!skill) throw new Error(`Skill not found: ${skillId}`);
177
+
178
+ // Load agent judges referenced by the skill
179
+ loadAgentJudges();
180
+
181
+ const judges: JudgeDefinition[] = [];
182
+ for (const id of skill.frontmatter.agents) {
183
+ const judge = defaultRegistry.getJudge(id);
184
+ if (!judge) {
185
+ throw new Error(`Judge referenced by skill not found in registry: ${id}`);
186
+ }
187
+ judges.push(judge);
188
+ }
189
+
190
+ const allJudgeIds = defaultRegistry.getJudges().map((j) => j.id);
191
+ const enabled = new Set(skill.frontmatter.agents);
192
+ const disabled = allJudgeIds.filter((id) => !enabled.has(id));
193
+
194
+ return evaluateWithTribunal(code, language, `skill:${skill.frontmatter.id}`, {
195
+ config: {
196
+ disabledJudges: disabled,
197
+ },
198
+ });
199
+ }