@kevinrabun/judges 3.113.0 → 3.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +9 -0
  2. package/agents/accessibility.judge.md +37 -0
  3. package/agents/agent-instructions.judge.md +37 -0
  4. package/agents/ai-code-safety.judge.md +48 -0
  5. package/agents/api-contract.judge.md +30 -0
  6. package/agents/api-design.judge.md +39 -0
  7. package/agents/authentication.judge.md +37 -0
  8. package/agents/backwards-compatibility.judge.md +37 -0
  9. package/agents/caching.judge.md +37 -0
  10. package/agents/ci-cd.judge.md +37 -0
  11. package/agents/cloud-readiness.judge.md +37 -0
  12. package/agents/code-structure.judge.md +48 -0
  13. package/agents/compliance.judge.md +40 -0
  14. package/agents/concurrency.judge.md +39 -0
  15. package/agents/configuration-management.judge.md +37 -0
  16. package/agents/cost-effectiveness.judge.md +40 -0
  17. package/agents/cybersecurity.judge.md +36 -0
  18. package/agents/data-security.judge.md +34 -0
  19. package/agents/data-sovereignty.judge.md +58 -0
  20. package/agents/database.judge.md +41 -0
  21. package/agents/dependency-health.judge.md +39 -0
  22. package/agents/documentation.judge.md +39 -0
  23. package/agents/error-handling.judge.md +37 -0
  24. package/agents/ethics-bias.judge.md +39 -0
  25. package/agents/false-positive-review.judge.md +73 -0
  26. package/agents/framework-safety.judge.md +40 -0
  27. package/agents/hallucination-detection.judge.md +33 -0
  28. package/agents/iac-security.judge.md +38 -0
  29. package/agents/intent-alignment.judge.md +31 -0
  30. package/agents/internationalization.judge.md +42 -0
  31. package/agents/logging-privacy.judge.md +37 -0
  32. package/agents/logic-review.judge.md +34 -0
  33. package/agents/maintainability.judge.md +37 -0
  34. package/agents/model-fingerprint.judge.md +31 -0
  35. package/agents/multi-turn-coherence.judge.md +29 -0
  36. package/agents/observability.judge.md +37 -0
  37. package/agents/over-engineering.judge.md +48 -0
  38. package/agents/performance.judge.md +44 -0
  39. package/agents/portability.judge.md +37 -0
  40. package/agents/rate-limiting.judge.md +37 -0
  41. package/agents/reliability.judge.md +39 -0
  42. package/agents/scalability.judge.md +41 -0
  43. package/agents/security.judge.md +31 -0
  44. package/agents/software-practices.judge.md +44 -0
  45. package/agents/testing.judge.md +39 -0
  46. package/agents/ux.judge.md +37 -0
  47. package/dist/api.d.ts +9 -1
  48. package/dist/api.js +9 -1
  49. package/dist/commands/fix.d.ts +10 -0
  50. package/dist/commands/fix.js +52 -0
  51. package/dist/commands/llm-benchmark.d.ts +13 -4
  52. package/dist/commands/llm-benchmark.js +39 -8
  53. package/dist/commands/review.d.ts +51 -1
  54. package/dist/commands/review.js +213 -7
  55. package/dist/evaluators/index.js +61 -35
  56. package/dist/github-app.d.ts +35 -0
  57. package/dist/github-app.js +125 -4
  58. package/dist/judges/index.d.ts +23 -61
  59. package/dist/judges/index.js +49 -63
  60. package/dist/patches/apply.d.ts +15 -0
  61. package/dist/patches/apply.js +37 -0
  62. package/dist/tools/prompts.d.ts +2 -2
  63. package/dist/tools/prompts.js +21 -10
  64. package/docs/skills.md +7 -0
  65. package/package.json +18 -3
  66. package/packages/judges-cli/README.md +24 -0
  67. package/packages/judges-cli/bin/judges.js +8 -0
  68. package/scripts/generate-agents-from-judges.ts +111 -0
  69. package/scripts/generate-skills-docs.ts +26 -0
  70. package/scripts/validate-agents.ts +104 -0
  71. package/server.json +2 -2
  72. package/skills/ai-code-review.skill.md +57 -0
  73. package/skills/release-gate.skill.md +27 -0
  74. package/skills/security-review.skill.md +32 -0
  75. package/src/agent-loader.ts +324 -0
  76. package/src/skill-loader.ts +199 -0
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { runCli } from "../dist/cli.js";
4
+
5
+ runCli(process.argv).catch((error) => {
6
+ console.error("CLI error:", error);
7
+ process.exit(1);
8
+ });
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Generate `.judge.md` files for all existing judges registered in the default registry.
4
+ * (Legacy `.agent.md` is still supported for reading.)
5
+ *
6
+ * Usage:
7
+ * npx tsx scripts/generate-agents-from-judges.ts [--force]
8
+ */
9
+ import { writeFileSync, mkdirSync, existsSync } from "node:fs";
10
+ import { join, dirname, relative } from "node:path";
11
+ import { fileURLToPath } from "node:url";
12
+
13
+ import { defaultRegistry } from "../src/judge-registry.js";
14
+ import { loadJudges } from "../src/judges/index.js";
15
+ import type { JudgeDefinition } from "../src/types.js";
16
+
17
+ const __filename = fileURLToPath(import.meta.url);
18
+ const __dirname = dirname(__filename);
19
+
20
+ const FORCE = process.argv.includes("--force") || process.argv.includes("-f");
21
+ const AGENTS_DIR = join(__dirname, "..", "agents");
22
+
23
+ function ensureDir(p: string) {
24
+ if (!existsSync(p)) mkdirSync(p, { recursive: true });
25
+ }
26
+
27
+ // naive heuristic to map judge id -> evaluator file path
28
+ const evaluatorPathFor = (judge: JudgeDefinition): string | undefined => {
29
+ // some judge IDs use dashes; evaluator files are the same id with .ts
30
+ // special cases can be added here if any diverge
31
+ const candidate = join(__dirname, "..", "src", "evaluators", `${judge.id}.ts`);
32
+ if (existsSync(candidate)) return relative(AGENTS_DIR, candidate).replace(/\\/g, "/");
33
+
34
+ // fallback: try rulePrefix lowercased? or analyze fn name
35
+ const analyzeFnName = judge.analyze?.name;
36
+ if (analyzeFnName?.startsWith("analyze")) {
37
+ const inferred = analyzeFnName
38
+ .replace(/^analyze/, "")
39
+ .replace(/([a-z])([A-Z])/g, "$1-$2")
40
+ .toLowerCase();
41
+ const fallback = join(__dirname, "..", "src", "evaluators", `${inferred}.ts`);
42
+ if (existsSync(fallback)) return relative(AGENTS_DIR, fallback).replace(/\\/g, "/");
43
+ }
44
+
45
+ return undefined;
46
+ };
47
+
48
+ function toYamlFrontmatter(judge: JudgeDefinition): string {
49
+ const lines: string[] = ["---"];
50
+ const fields: Record<string, string | number | undefined> = {
51
+ id: judge.id,
52
+ name: judge.name,
53
+ domain: judge.domain,
54
+ rulePrefix: judge.rulePrefix,
55
+ description: judge.description,
56
+ tableDescription: judge.tableDescription,
57
+ promptDescription: judge.promptDescription,
58
+ script: evaluatorPathFor(judge),
59
+ priority: judge.id === "false-positive-review" ? 999 : judge.id === "tribunal" ? 1000 : 10,
60
+ };
61
+
62
+ for (const [key, value] of Object.entries(fields)) {
63
+ if (value === undefined || value === null || value === "") continue;
64
+ // quote values that contain ':' or '#'
65
+ const needsQuotes = typeof value === "string" && /[:#]/.test(value);
66
+ lines.push(`${key}: ${needsQuotes ? JSON.stringify(value) : value}`);
67
+ }
68
+ lines.push("---", "");
69
+ return lines.join("\n");
70
+ }
71
+
72
+ function normalizePrompt(prompt: string): string {
73
+ // Existing prompts are plain strings; some use backticks. Preserve formatting.
74
+ // Trim leading/trailing whitespace but retain internal newlines.
75
+ return prompt.replace(/^\s+|\s+$/g, "");
76
+ }
77
+
78
+ async function main() {
79
+ await loadJudges(); // ensure all TS judges registered
80
+ const judges = defaultRegistry.getJudges();
81
+ ensureDir(AGENTS_DIR);
82
+
83
+ const results: { id: string; path: string; skipped: boolean; reason?: string }[] = [];
84
+
85
+ for (const judge of judges) {
86
+ const targetPath = join(AGENTS_DIR, `${judge.id}.judge.md`);
87
+
88
+ if (!FORCE && existsSync(targetPath)) {
89
+ results.push({ id: judge.id, path: targetPath, skipped: true, reason: "exists" });
90
+ continue;
91
+ }
92
+
93
+ const fm = toYamlFrontmatter(judge);
94
+ const body = normalizePrompt(judge.systemPrompt ?? "");
95
+ const content = `${fm}${body}\n`;
96
+
97
+ ensureDir(dirname(targetPath));
98
+ writeFileSync(targetPath, content, "utf-8");
99
+ results.push({ id: judge.id, path: targetPath, skipped: false });
100
+ }
101
+
102
+ console.log("Generated agent files:");
103
+ for (const r of results) {
104
+ console.log(`- ${r.id}: ${r.path}${r.skipped ? ` (skipped: ${r.reason})` : ""}`);
105
+ }
106
+ }
107
+
108
+ main().catch((err) => {
109
+ console.error(err);
110
+ process.exit(1);
111
+ });
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env tsx
2
+ import { join } from "node:path";
3
+ import { writeFileSync } from "node:fs";
4
+ import { listSkills } from "../src/skill-loader.js";
5
+
6
+ async function main() {
7
+ const skillsDir = join(process.cwd(), "skills");
8
+ const skills = listSkills(skillsDir);
9
+ const lines: string[] = [];
10
+ lines.push(`# Skills Catalog`);
11
+ lines.push("");
12
+ lines.push("| ID | Name | Description | Tags | Agents |");
13
+ lines.push("| --- | --- | --- | --- | --- |");
14
+ for (const s of skills) {
15
+ lines.push(`| ${s.id} | ${s.name} | ${s.description} | ${(s.tags || []).join(", ")} | ${s.agents.join(", ")} |`);
16
+ }
17
+ lines.push("");
18
+ const outPath = join(process.cwd(), "docs", "skills.md");
19
+ writeFileSync(outPath, lines.join("\n"), "utf-8");
20
+ console.log(`Wrote skills catalog to ${outPath}`);
21
+ }
22
+
23
+ main().catch((err) => {
24
+ console.error(err);
25
+ process.exit(1);
26
+ });
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Validate that generated `.judge.md` files round-trip with the current registry.
4
+ * (Legacy `.agent.md` is still accepted for backward compatibility.)
5
+ * Fails with exit code 1 if mismatches are found.
6
+ */
7
+ import { join } from "node:path";
8
+ import { pathToFileURL } from "node:url";
9
+ import { parseAgentFile, agentToJudgeDefinition } from "../src/agent-loader.js";
10
+ import { loadJudges } from "../src/judges/index.js";
11
+ import { existsSync, readdirSync } from "node:fs";
12
+
13
+ function stripWhitespace(s?: string) {
14
+ return (s ?? "").trim().replace(/\s+/g, " ");
15
+ }
16
+
17
+ /**
18
+ * List agent files in a directory. Prefers `.judge.md`, but accepts legacy
19
+ * `.agent.md` for backward compatibility.
20
+ */
21
+ export function listAgentFiles(agentsDir: string): string[] {
22
+ if (!existsSync(agentsDir)) {
23
+ throw new Error(`agents directory not found: ${agentsDir}`);
24
+ }
25
+ const files = readdirSync(agentsDir).filter((f) => f.endsWith(".judge.md") || f.endsWith(".agent.md"));
26
+ if (files.length === 0) {
27
+ throw new Error(`No agent files found in ${agentsDir}. Expected .judge.md files (legacy .agent.md tolerated).`);
28
+ }
29
+
30
+ // Sort with .judge.md first (canonical)
31
+ files.sort((a, b) => {
32
+ const aLegacy = a.endsWith(".agent.md");
33
+ const bLegacy = b.endsWith(".agent.md");
34
+ if (aLegacy === bLegacy) return a.localeCompare(b);
35
+ return aLegacy ? 1 : -1; // prefer .judge.md
36
+ });
37
+
38
+ return files;
39
+ }
40
+
41
+ /**
42
+ * Validate agent files against the current registry. Useful in CI.
43
+ * @param agentsDir Directory containing `.judge.md` agent files
44
+ */
45
+ export async function validateAgents(
46
+ agentsDir: string = join(process.cwd(), "agents"),
47
+ ): Promise<{ filesChecked: number }> {
48
+ const judges = await loadJudges();
49
+ const map = new Map(judges.map((j) => [j.id, j]));
50
+ const missing: string[] = [];
51
+ const mismatches: string[] = [];
52
+
53
+ const files = listAgentFiles(agentsDir);
54
+
55
+ for (const file of files) {
56
+ const agent = parseAgentFile(join(agentsDir, file));
57
+ const judge = map.get(agent.frontmatter.id);
58
+ if (!judge) {
59
+ missing.push(agent.frontmatter.id);
60
+ continue;
61
+ }
62
+ const converted = agentToJudgeDefinition(agent);
63
+ const fields: (keyof typeof judge)[] = [
64
+ "id",
65
+ "name",
66
+ "domain",
67
+ "rulePrefix",
68
+ "description",
69
+ "tableDescription",
70
+ "promptDescription",
71
+ ];
72
+ const convertedRecord = converted as unknown as Record<string, unknown>;
73
+ const judgeRecord = judge as unknown as Record<string, unknown>;
74
+ for (const field of fields) {
75
+ if (convertedRecord[field as string] !== judgeRecord[field as string]) {
76
+ mismatches.push(`${agent.frontmatter.id}: field ${String(field)} mismatch`);
77
+ }
78
+ }
79
+ if (stripWhitespace(converted.systemPrompt) !== stripWhitespace(judge.systemPrompt)) {
80
+ mismatches.push(`${agent.frontmatter.id}: systemPrompt mismatch`);
81
+ }
82
+ }
83
+
84
+ if (missing.length || mismatches.length) {
85
+ const errors: string[] = [];
86
+ if (missing.length) errors.push(`Missing judges in agent files: ${missing.join(", ")}`);
87
+ if (mismatches.length) errors.push(`Mismatches: \n- ${mismatches.join("\n- ")}`);
88
+ throw new Error(errors.join("\n"));
89
+ }
90
+
91
+ return { filesChecked: files.length };
92
+ }
93
+
94
+ async function main() {
95
+ const { filesChecked } = await validateAgents();
96
+ console.log(`Agent files validated against registry (${filesChecked} agents).`);
97
+ }
98
+
99
+ if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
100
+ main().catch((err) => {
101
+ console.error(err);
102
+ process.exit(1);
103
+ });
104
+ }
package/server.json CHANGED
@@ -7,12 +7,12 @@
7
7
  "url": "https://github.com/kevinrabun/judges",
8
8
  "source": "github"
9
9
  },
10
- "version": "3.113.0",
10
+ "version": "3.115.0",
11
11
  "packages": [
12
12
  {
13
13
  "registryType": "npm",
14
14
  "identifier": "@kevinrabun/judges",
15
- "version": "3.113.0",
15
+ "version": "3.115.0",
16
16
  "transport": {
17
17
  "type": "stdio"
18
18
  }
@@ -0,0 +1,57 @@
1
+ ---
2
+ id: ai-code-review
3
+ name: AI Code Review Skill
4
+ description: "Full-spectrum AI-generated code review using the Judges Panel, tuned for minimizing false positives and focusing on AI-specific failure modes."
5
+ tags: [ai-code, code-review, tribunal]
6
+ agents:
7
+ - ai-code-safety
8
+ - hallucination-detection
9
+ - logic-review
10
+ - over-engineering
11
+ - code-structure
12
+ - maintainability
13
+ - performance
14
+ - reliability
15
+ - cybersecurity
16
+ - data-security
17
+ - authentication
18
+ - api-design
19
+ - api-contract
20
+ - database
21
+ - caching
22
+ - observability
23
+ - logging-privacy
24
+ - configuration-management
25
+ - dependency-health
26
+ - framework-safety
27
+ - testing
28
+ - ci-cd
29
+ - intent-alignment
30
+ - multi-turn-coherence
31
+ - model-fingerprint
32
+ - agent-instructions
33
+ - cloud-readiness
34
+ - cost-effectiveness
35
+ - ethics-bias
36
+ - accessibility
37
+ - internationalization
38
+ - data-sovereignty
39
+ - iac-security
40
+ - rate-limiting
41
+ - portability
42
+ - ux
43
+ - backwards-compatibility
44
+ - security
45
+ - false-positive-review
46
+ priority: 10
47
+ ---
48
+
49
+ You are the AI Code Review Skill. Your job is to orchestrate the Judges Panel to review AI-generated code safely and reliably for production deployment.
50
+
51
+ ## Orchestration Guidance
52
+ - Run the listed judges in parallel; aggregate findings.
53
+ - Apply the **Precision Mandate** and **False Positive Cost** guidance; default to "no finding" unless evidence is clear.
54
+ - Highlight AI-specific risks: hallucinated APIs, insecure defaults, missing validation, under-specified logic, misaligned intent.
55
+ - For conflicting guidance, prefer security, data, and safety judges over style-only advice.
56
+ - Summarize top 5 actionable findings with rule IDs and remediation steps.
57
+ - If code passes with zero findings, explicitly state coverage across security, data, auth, and error paths.
@@ -0,0 +1,27 @@
1
+ ---
2
+ id: release-gate
3
+ name: Release Gate Skill
4
+ description: "Pre-deploy release gate combining reliability, observability, CI/CD, and security checks."
5
+ tags: [release, sre, reliability, deployment]
6
+ agents:
7
+ - reliability
8
+ - observability
9
+ - performance
10
+ - ci-cd
11
+ - testing
12
+ - cloud-readiness
13
+ - cost-effectiveness
14
+ - security
15
+ - data-security
16
+ - cybersecurity
17
+ - false-positive-review
18
+ priority: 7
19
+ ---
20
+
21
+ You are the Release Gate Skill. Act as the final reviewer before production deployment.
22
+
23
+ ## Orchestration Guidance
24
+ - Verify health checks, structured logging, metrics/tracing coverage, circuit breakers.
25
+ - Confirm CI/CD checks exist and are enforced (tests, lint, vulnerability scans, IaC policies).
26
+ - Ensure rollback strategies and deployment safety (blue/green or canary where applicable).
27
+ - Provide a go/no-go recommendation with rationale and list of blocking findings.
@@ -0,0 +1,32 @@
1
+ ---
2
+ id: security-review
3
+ name: Security Review Skill
4
+ description: "Security-focused review for production readiness, covering AppSec, DataSec, AuthZ, and IaC."
5
+ tags: [security, appsec, datasec]
6
+ agents:
7
+ - cybersecurity
8
+ - data-security
9
+ - authentication
10
+ - logging-privacy
11
+ - api-contract
12
+ - database
13
+ - iac-security
14
+ - framework-safety
15
+ - dependency-health
16
+ - configuration-management
17
+ - rate-limiting
18
+ - compliance
19
+ - data-sovereignty
20
+ - security
21
+ - ai-code-safety
22
+ - false-positive-review
23
+ priority: 5
24
+ ---
25
+
26
+ You are the Security Review Skill. Ensure safe-by-default deployment readiness.
27
+
28
+ ## Orchestration Guidance
29
+ - Enforce OWASP Top 10, SLSA-style supply chain checks, and least privilege.
30
+ - Flag hardcoded secrets, missing auth, insecure transport, injection risks, and misconfigurations.
31
+ - Cross-check IaC templates for public exposure, missing encryption, and permissive IAM.
32
+ - Deduplicate findings across judges; prefer the most specific rule ID.