npm - @kevinrabun/judges - Versions diffs - 3.113.0 → 3.115.0 - Mend

@kevinrabun/judges 3.113.0 → 3.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/README.md +9 -0
package/agents/accessibility.judge.md +37 -0
package/agents/agent-instructions.judge.md +37 -0
package/agents/ai-code-safety.judge.md +48 -0
package/agents/api-contract.judge.md +30 -0
package/agents/api-design.judge.md +39 -0
package/agents/authentication.judge.md +37 -0
package/agents/backwards-compatibility.judge.md +37 -0
package/agents/caching.judge.md +37 -0
package/agents/ci-cd.judge.md +37 -0
package/agents/cloud-readiness.judge.md +37 -0
package/agents/code-structure.judge.md +48 -0
package/agents/compliance.judge.md +40 -0
package/agents/concurrency.judge.md +39 -0
package/agents/configuration-management.judge.md +37 -0
package/agents/cost-effectiveness.judge.md +40 -0
package/agents/cybersecurity.judge.md +36 -0
package/agents/data-security.judge.md +34 -0
package/agents/data-sovereignty.judge.md +58 -0
package/agents/database.judge.md +41 -0
package/agents/dependency-health.judge.md +39 -0
package/agents/documentation.judge.md +39 -0
package/agents/error-handling.judge.md +37 -0
package/agents/ethics-bias.judge.md +39 -0
package/agents/false-positive-review.judge.md +73 -0
package/agents/framework-safety.judge.md +40 -0
package/agents/hallucination-detection.judge.md +33 -0
package/agents/iac-security.judge.md +38 -0
package/agents/intent-alignment.judge.md +31 -0
package/agents/internationalization.judge.md +42 -0
package/agents/logging-privacy.judge.md +37 -0
package/agents/logic-review.judge.md +34 -0
package/agents/maintainability.judge.md +37 -0
package/agents/model-fingerprint.judge.md +31 -0
package/agents/multi-turn-coherence.judge.md +29 -0
package/agents/observability.judge.md +37 -0
package/agents/over-engineering.judge.md +48 -0
package/agents/performance.judge.md +44 -0
package/agents/portability.judge.md +37 -0
package/agents/rate-limiting.judge.md +37 -0
package/agents/reliability.judge.md +39 -0
package/agents/scalability.judge.md +41 -0
package/agents/security.judge.md +31 -0
package/agents/software-practices.judge.md +44 -0
package/agents/testing.judge.md +39 -0
package/agents/ux.judge.md +37 -0
package/dist/api.d.ts +9 -1
package/dist/api.js +9 -1
package/dist/commands/fix.d.ts +10 -0
package/dist/commands/fix.js +52 -0
package/dist/commands/llm-benchmark.d.ts +13 -4
package/dist/commands/llm-benchmark.js +39 -8
package/dist/commands/review.d.ts +51 -1
package/dist/commands/review.js +213 -7
package/dist/evaluators/index.js +61 -35
package/dist/github-app.d.ts +35 -0
package/dist/github-app.js +125 -4
package/dist/judges/index.d.ts +23 -61
package/dist/judges/index.js +49 -63
package/dist/patches/apply.d.ts +15 -0
package/dist/patches/apply.js +37 -0
package/dist/tools/prompts.d.ts +2 -2
package/dist/tools/prompts.js +21 -10
package/docs/skills.md +7 -0
package/package.json +18 -3
package/packages/judges-cli/README.md +24 -0
package/packages/judges-cli/bin/judges.js +8 -0
package/scripts/generate-agents-from-judges.ts +111 -0
package/scripts/generate-skills-docs.ts +26 -0
package/scripts/validate-agents.ts +104 -0
package/server.json +2 -2
package/skills/ai-code-review.skill.md +57 -0
package/skills/release-gate.skill.md +27 -0
package/skills/security-review.skill.md +32 -0
package/src/agent-loader.ts +324 -0
package/src/skill-loader.ts +199 -0

package/packages/judges-cli/bin/judges.js ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env node
+import { runCli } from "../dist/cli.js";
+runCli(process.argv).catch((error) => {
+  console.error("CLI error:", error);
+  process.exit(1);
+});

package/scripts/generate-agents-from-judges.ts ADDED Viewed

@@ -0,0 +1,111 @@
+#!/usr/bin/env tsx
+/**
+ * Generate `.judge.md` files for all existing judges registered in the default registry.
+ * (Legacy `.agent.md` is still supported for reading.)
+ *
+ * Usage:
+ *   npx tsx scripts/generate-agents-from-judges.ts [--force]
+ */
+import { writeFileSync, mkdirSync, existsSync } from "node:fs";
+import { join, dirname, relative } from "node:path";
+import { fileURLToPath } from "node:url";
+import { defaultRegistry } from "../src/judge-registry.js";
+import { loadJudges } from "../src/judges/index.js";
+import type { JudgeDefinition } from "../src/types.js";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+const FORCE = process.argv.includes("--force") || process.argv.includes("-f");
+const AGENTS_DIR = join(__dirname, "..", "agents");
+function ensureDir(p: string) {
+  if (!existsSync(p)) mkdirSync(p, { recursive: true });
+}
+// naive heuristic to map judge id -> evaluator file path
+const evaluatorPathFor = (judge: JudgeDefinition): string | undefined => {
+  // some judge IDs use dashes; evaluator files are the same id with .ts
+  // special cases can be added here if any diverge
+  const candidate = join(__dirname, "..", "src", "evaluators", `${judge.id}.ts`);
+  if (existsSync(candidate)) return relative(AGENTS_DIR, candidate).replace(/\\/g, "/");
+  // fallback: try rulePrefix lowercased? or analyze fn name
+  const analyzeFnName = judge.analyze?.name;
+  if (analyzeFnName?.startsWith("analyze")) {
+    const inferred = analyzeFnName
+      .replace(/^analyze/, "")
+      .replace(/([a-z])([A-Z])/g, "$1-$2")
+      .toLowerCase();
+    const fallback = join(__dirname, "..", "src", "evaluators", `${inferred}.ts`);
+    if (existsSync(fallback)) return relative(AGENTS_DIR, fallback).replace(/\\/g, "/");
+  }
+  return undefined;
+};
+function toYamlFrontmatter(judge: JudgeDefinition): string {
+  const lines: string[] = ["---"];
+  const fields: Record<string, string | number | undefined> = {
+    id: judge.id,
+    name: judge.name,
+    domain: judge.domain,
+    rulePrefix: judge.rulePrefix,
+    description: judge.description,
+    tableDescription: judge.tableDescription,
+    promptDescription: judge.promptDescription,
+    script: evaluatorPathFor(judge),
+    priority: judge.id === "false-positive-review" ? 999 : judge.id === "tribunal" ? 1000 : 10,
+  };
+  for (const [key, value] of Object.entries(fields)) {
+    if (value === undefined || value === null || value === "") continue;
+    // quote values that contain ':' or '#'
+    const needsQuotes = typeof value === "string" && /[:#]/.test(value);
+    lines.push(`${key}: ${needsQuotes ? JSON.stringify(value) : value}`);
+  }
+  lines.push("---", "");
+  return lines.join("\n");
+}
+function normalizePrompt(prompt: string): string {
+  // Existing prompts are plain strings; some use backticks. Preserve formatting.
+  // Trim leading/trailing whitespace but retain internal newlines.
+  return prompt.replace(/^\s+|\s+$/g, "");
+}
+async function main() {
+  await loadJudges(); // ensure all TS judges registered
+  const judges = defaultRegistry.getJudges();
+  ensureDir(AGENTS_DIR);
+  const results: { id: string; path: string; skipped: boolean; reason?: string }[] = [];
+  for (const judge of judges) {
+    const targetPath = join(AGENTS_DIR, `${judge.id}.judge.md`);
+    if (!FORCE && existsSync(targetPath)) {
+      results.push({ id: judge.id, path: targetPath, skipped: true, reason: "exists" });
+      continue;
+    }
+    const fm = toYamlFrontmatter(judge);
+    const body = normalizePrompt(judge.systemPrompt ?? "");
+    const content = `${fm}${body}\n`;
+    ensureDir(dirname(targetPath));
+    writeFileSync(targetPath, content, "utf-8");
+    results.push({ id: judge.id, path: targetPath, skipped: false });
+  }
+  console.log("Generated agent files:");
+  for (const r of results) {
+    console.log(`- ${r.id}: ${r.path}${r.skipped ? ` (skipped: ${r.reason})` : ""}`);
+  }
+}
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});

package/scripts/generate-skills-docs.ts ADDED Viewed

@@ -0,0 +1,26 @@
+#!/usr/bin/env tsx
+import { join } from "node:path";
+import { writeFileSync } from "node:fs";
+import { listSkills } from "../src/skill-loader.js";
+async function main() {
+  const skillsDir = join(process.cwd(), "skills");
+  const skills = listSkills(skillsDir);
+  const lines: string[] = [];
+  lines.push(`# Skills Catalog`);
+  lines.push("");
+  lines.push("| ID | Name | Description | Tags | Agents |");
+  lines.push("| --- | --- | --- | --- | --- |");
+  for (const s of skills) {
+    lines.push(`| ${s.id} | ${s.name} | ${s.description} | ${(s.tags || []).join(", ")} | ${s.agents.join(", ")} |`);
+  }
+  lines.push("");
+  const outPath = join(process.cwd(), "docs", "skills.md");
+  writeFileSync(outPath, lines.join("\n"), "utf-8");
+  console.log(`Wrote skills catalog to ${outPath}`);
+}
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});

package/scripts/validate-agents.ts ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/env tsx
+/**
+ * Validate that generated `.judge.md` files round-trip with the current registry.
+ * (Legacy `.agent.md` is still accepted for backward compatibility.)
+ * Fails with exit code 1 if mismatches are found.
+ */
+import { join } from "node:path";
+import { pathToFileURL } from "node:url";
+import { parseAgentFile, agentToJudgeDefinition } from "../src/agent-loader.js";
+import { loadJudges } from "../src/judges/index.js";
+import { existsSync, readdirSync } from "node:fs";
+function stripWhitespace(s?: string) {
+  return (s ?? "").trim().replace(/\s+/g, " ");
+}
+/**
+ * List agent files in a directory. Prefers `.judge.md`, but accepts legacy
+ * `.agent.md` for backward compatibility.
+ */
+export function listAgentFiles(agentsDir: string): string[] {
+  if (!existsSync(agentsDir)) {
+    throw new Error(`agents directory not found: ${agentsDir}`);
+  }
+  const files = readdirSync(agentsDir).filter((f) => f.endsWith(".judge.md") || f.endsWith(".agent.md"));
+  if (files.length === 0) {
+    throw new Error(`No agent files found in ${agentsDir}. Expected .judge.md files (legacy .agent.md tolerated).`);
+  }
+  // Sort with .judge.md first (canonical)
+  files.sort((a, b) => {
+    const aLegacy = a.endsWith(".agent.md");
+    const bLegacy = b.endsWith(".agent.md");
+    if (aLegacy === bLegacy) return a.localeCompare(b);
+    return aLegacy ? 1 : -1; // prefer .judge.md
+  });
+  return files;
+}
+/**
+ * Validate agent files against the current registry. Useful in CI.
+ * @param agentsDir Directory containing `.judge.md` agent files
+ */
+export async function validateAgents(
+  agentsDir: string = join(process.cwd(), "agents"),
+): Promise<{ filesChecked: number }> {
+  const judges = await loadJudges();
+  const map = new Map(judges.map((j) => [j.id, j]));
+  const missing: string[] = [];
+  const mismatches: string[] = [];
+  const files = listAgentFiles(agentsDir);
+  for (const file of files) {
+    const agent = parseAgentFile(join(agentsDir, file));
+    const judge = map.get(agent.frontmatter.id);
+    if (!judge) {
+      missing.push(agent.frontmatter.id);
+      continue;
+    }
+    const converted = agentToJudgeDefinition(agent);
+    const fields: (keyof typeof judge)[] = [
+      "id",
+      "name",
+      "domain",
+      "rulePrefix",
+      "description",
+      "tableDescription",
+      "promptDescription",
+    ];
+    const convertedRecord = converted as unknown as Record<string, unknown>;
+    const judgeRecord = judge as unknown as Record<string, unknown>;
+    for (const field of fields) {
+      if (convertedRecord[field as string] !== judgeRecord[field as string]) {
+        mismatches.push(`${agent.frontmatter.id}: field ${String(field)} mismatch`);
+      }
+    }
+    if (stripWhitespace(converted.systemPrompt) !== stripWhitespace(judge.systemPrompt)) {
+      mismatches.push(`${agent.frontmatter.id}: systemPrompt mismatch`);
+    }
+  }
+  if (missing.length || mismatches.length) {
+    const errors: string[] = [];
+    if (missing.length) errors.push(`Missing judges in agent files: ${missing.join(", ")}`);
+    if (mismatches.length) errors.push(`Mismatches: \n- ${mismatches.join("\n- ")}`);
+    throw new Error(errors.join("\n"));
+  }
+  return { filesChecked: files.length };
+}
+async function main() {
+  const { filesChecked } = await validateAgents();
+  console.log(`Agent files validated against registry (${filesChecked} agents).`);
+}
+if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
+  main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+  });
+}

package/server.json CHANGED Viewed

@@ -7,12 +7,12 @@
     "url": "https://github.com/kevinrabun/judges",
     "source": "github"
   },
-  "version": "3.113.0",
+  "version": "3.115.0",
   "packages": [
     {
       "registryType": "npm",
       "identifier": "@kevinrabun/judges",
-      "version": "3.113.0",
+      "version": "3.115.0",
       "transport": {
         "type": "stdio"
       }

package/skills/ai-code-review.skill.md ADDED Viewed

@@ -0,0 +1,57 @@
+---
+id: ai-code-review
+name: AI Code Review Skill
+description: "Full-spectrum AI-generated code review using the Judges Panel, tuned for minimizing false positives and focusing on AI-specific failure modes."
+tags: [ai-code, code-review, tribunal]
+agents:
+  - ai-code-safety
+  - hallucination-detection
+  - logic-review
+  - over-engineering
+  - code-structure
+  - maintainability
+  - performance
+  - reliability
+  - cybersecurity
+  - data-security
+  - authentication
+  - api-design
+  - api-contract
+  - database
+  - caching
+  - observability
+  - logging-privacy
+  - configuration-management
+  - dependency-health
+  - framework-safety
+  - testing
+  - ci-cd
+  - intent-alignment
+  - multi-turn-coherence
+  - model-fingerprint
+  - agent-instructions
+  - cloud-readiness
+  - cost-effectiveness
+  - ethics-bias
+  - accessibility
+  - internationalization
+  - data-sovereignty
+  - iac-security
+  - rate-limiting
+  - portability
+  - ux
+  - backwards-compatibility
+  - security
+  - false-positive-review
+priority: 10
+---
+You are the AI Code Review Skill. Your job is to orchestrate the Judges Panel to review AI-generated code safely and reliably for production deployment.
+## Orchestration Guidance
+- Run the listed judges in parallel; aggregate findings.
+- Apply the **Precision Mandate** and **False Positive Cost** guidance; default to "no finding" unless evidence is clear.
+- Highlight AI-specific risks: hallucinated APIs, insecure defaults, missing validation, under-specified logic, misaligned intent.
+- For conflicting guidance, prefer security, data, and safety judges over style-only advice.
+- Summarize top 5 actionable findings with rule IDs and remediation steps.
+- If code passes with zero findings, explicitly state coverage across security, data, auth, and error paths.

package/skills/release-gate.skill.md ADDED Viewed

@@ -0,0 +1,27 @@
+---
+id: release-gate
+name: Release Gate Skill
+description: "Pre-deploy release gate combining reliability, observability, CI/CD, and security checks."
+tags: [release, sre, reliability, deployment]
+agents:
+  - reliability
+  - observability
+  - performance
+  - ci-cd
+  - testing
+  - cloud-readiness
+  - cost-effectiveness
+  - security
+  - data-security
+  - cybersecurity
+  - false-positive-review
+priority: 7
+---
+You are the Release Gate Skill. Act as the final reviewer before production deployment.
+## Orchestration Guidance
+- Verify health checks, structured logging, metrics/tracing coverage, circuit breakers.
+- Confirm CI/CD checks exist and are enforced (tests, lint, vulnerability scans, IaC policies).
+- Ensure rollback strategies and deployment safety (blue/green or canary where applicable).
+- Provide a go/no-go recommendation with rationale and list of blocking findings.

package/skills/security-review.skill.md ADDED Viewed

@@ -0,0 +1,32 @@
+---
+id: security-review
+name: Security Review Skill
+description: "Security-focused review for production readiness, covering AppSec, DataSec, AuthZ, and IaC."
+tags: [security, appsec, datasec]
+agents:
+  - cybersecurity
+  - data-security
+  - authentication
+  - logging-privacy
+  - api-contract
+  - database
+  - iac-security
+  - framework-safety
+  - dependency-health
+  - configuration-management
+  - rate-limiting
+  - compliance
+  - data-sovereignty
+  - security
+  - ai-code-safety
+  - false-positive-review
+priority: 5
+---
+You are the Security Review Skill. Ensure safe-by-default deployment readiness.
+## Orchestration Guidance
+- Enforce OWASP Top 10, SLSA-style supply chain checks, and least privilege.
+- Flag hardcoded secrets, missing auth, insecure transport, injection risks, and misconfigurations.
+- Cross-check IaC templates for public exposure, missing encryption, and permissive IAM.
+- Deduplicate findings across judges; prefer the most specific rule ID.