npm - @kevinrabun/judges - Versions diffs - 3.113.0 → 3.115.0 - Mend

@kevinrabun/judges 3.113.0 → 3.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/README.md +9 -0
package/agents/accessibility.judge.md +37 -0
package/agents/agent-instructions.judge.md +37 -0
package/agents/ai-code-safety.judge.md +48 -0
package/agents/api-contract.judge.md +30 -0
package/agents/api-design.judge.md +39 -0
package/agents/authentication.judge.md +37 -0
package/agents/backwards-compatibility.judge.md +37 -0
package/agents/caching.judge.md +37 -0
package/agents/ci-cd.judge.md +37 -0
package/agents/cloud-readiness.judge.md +37 -0
package/agents/code-structure.judge.md +48 -0
package/agents/compliance.judge.md +40 -0
package/agents/concurrency.judge.md +39 -0
package/agents/configuration-management.judge.md +37 -0
package/agents/cost-effectiveness.judge.md +40 -0
package/agents/cybersecurity.judge.md +36 -0
package/agents/data-security.judge.md +34 -0
package/agents/data-sovereignty.judge.md +58 -0
package/agents/database.judge.md +41 -0
package/agents/dependency-health.judge.md +39 -0
package/agents/documentation.judge.md +39 -0
package/agents/error-handling.judge.md +37 -0
package/agents/ethics-bias.judge.md +39 -0
package/agents/false-positive-review.judge.md +73 -0
package/agents/framework-safety.judge.md +40 -0
package/agents/hallucination-detection.judge.md +33 -0
package/agents/iac-security.judge.md +38 -0
package/agents/intent-alignment.judge.md +31 -0
package/agents/internationalization.judge.md +42 -0
package/agents/logging-privacy.judge.md +37 -0
package/agents/logic-review.judge.md +34 -0
package/agents/maintainability.judge.md +37 -0
package/agents/model-fingerprint.judge.md +31 -0
package/agents/multi-turn-coherence.judge.md +29 -0
package/agents/observability.judge.md +37 -0
package/agents/over-engineering.judge.md +48 -0
package/agents/performance.judge.md +44 -0
package/agents/portability.judge.md +37 -0
package/agents/rate-limiting.judge.md +37 -0
package/agents/reliability.judge.md +39 -0
package/agents/scalability.judge.md +41 -0
package/agents/security.judge.md +31 -0
package/agents/software-practices.judge.md +44 -0
package/agents/testing.judge.md +39 -0
package/agents/ux.judge.md +37 -0
package/dist/api.d.ts +9 -1
package/dist/api.js +9 -1
package/dist/commands/fix.d.ts +10 -0
package/dist/commands/fix.js +52 -0
package/dist/commands/llm-benchmark.d.ts +13 -4
package/dist/commands/llm-benchmark.js +39 -8
package/dist/commands/review.d.ts +51 -1
package/dist/commands/review.js +213 -7
package/dist/evaluators/index.js +61 -35
package/dist/github-app.d.ts +35 -0
package/dist/github-app.js +125 -4
package/dist/judges/index.d.ts +23 -61
package/dist/judges/index.js +49 -63
package/dist/patches/apply.d.ts +15 -0
package/dist/patches/apply.js +37 -0
package/dist/tools/prompts.d.ts +2 -2
package/dist/tools/prompts.js +21 -10
package/docs/skills.md +7 -0
package/package.json +18 -3
package/packages/judges-cli/README.md +24 -0
package/packages/judges-cli/bin/judges.js +8 -0
package/scripts/generate-agents-from-judges.ts +111 -0
package/scripts/generate-skills-docs.ts +26 -0
package/scripts/validate-agents.ts +104 -0
package/server.json +2 -2
package/skills/ai-code-review.skill.md +57 -0
package/skills/release-gate.skill.md +27 -0
package/skills/security-review.skill.md +32 -0
package/src/agent-loader.ts +324 -0
package/src/skill-loader.ts +199 -0

package/src/agent-loader.ts ADDED Viewed

@@ -0,0 +1,324 @@
+/**
+ * Agent Markdown Loader — reads `.judge.md` files (legacy `.agent.md` also
+ * accepted) and converts them into JudgeDefinition objects that register with
+ * the unified JudgeRegistry.
+ *
+ * This is the bridge between the file-based agent paradigm and the existing
+ * TypeScript judge system. Agent files use YAML frontmatter for metadata
+ * and markdown body for the system prompt (persona + evaluation criteria).
+ *
+ * ## File Format
+ *
+ * ```markdown
+ * ---
+ * id: cybersecurity
+ * name: Judge Cybersecurity
+ * domain: Cybersecurity & Threat Defense
+ * rulePrefix: CYBER
+ * description: Evaluates code for vulnerability...
+ * tableDescription: "Injection attacks, XSS, CSRF, auth flaws"
+ * promptDescription: Deep cybersecurity review
+ * script: ../src/evaluators/cybersecurity.ts   # optional
+ * priority: 10                                  # optional, default 10
+ * ---
+ *
+ * You are Judge Cybersecurity — a principal application security engineer...
+ *
+ * ## Evaluation Criteria
+ * ...
+ * ```
+ *
+ * - `script` is a relative path to the evaluator module (must export a
+ *   function matching `(code: string, language: string, context?) => Finding[]`).
+ *   If omitted, the judge is LLM-only (no deterministic layer).
+ * - `priority` controls ordering. Higher = later. 999 is reserved for
+ *   false-positive-review (always last). Default is 10.
+ */
+import { readFileSync, readdirSync, existsSync } from "node:fs";
+import { join, resolve, dirname } from "node:path";
+import { createRequire } from "node:module";
+import type { JudgeDefinition, Finding, AnalyzeContext } from "./types.js";
+// ─── Frontmatter Types ──────────────────────────────────────────────────────
+/** Parsed YAML frontmatter from a `.judge.md` file (legacy `.agent.md`). */
+export interface AgentFrontmatter {
+  id: string;
+  name: string;
+  domain: string;
+  rulePrefix: string;
+  description: string;
+  tableDescription: string;
+  promptDescription: string;
+  script?: string;
+  priority?: number;
+}
+/** A parsed agent file — metadata + the markdown body (system prompt). */
+export interface ParsedAgent {
+  frontmatter: AgentFrontmatter;
+  /** The markdown body below the frontmatter — becomes the systemPrompt. */
+  body: string;
+  /** Absolute path of the source `.judge.md` file (legacy `.agent.md`). */
+  sourcePath: string;
+}
+// ─── Frontmatter Parser ─────────────────────────────────────────────────────
+/**
+ * Parse YAML frontmatter from a string. Handles the subset of YAML used
+ * by agent files: simple key-value pairs, quoted strings, and multi-line
+ * `>` folded scalars. No arrays, nested objects, or anchors.
+ */
+export function parseFrontmatter(raw: string): { meta: Record<string, string>; body: string } {
+  const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
+  if (!match) {
+    return { meta: {}, body: raw };
+  }
+  const yamlBlock = match[1];
+  const body = match[2].trim();
+  const meta: Record<string, string> = {};
+  const lines = yamlBlock.split(/\r?\n/);
+  let i = 0;
+  while (i < lines.length) {
+    const line = lines[i];
+    // Skip empty lines and comments
+    if (!line.trim() || line.trim().startsWith("#")) {
+      i++;
+      continue;
+    }
+    const kvMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(.*)/);
+    if (!kvMatch) {
+      i++;
+      continue;
+    }
+    const key = kvMatch[1];
+    let value = kvMatch[2].trim();
+    // Handle folded scalar (>)
+    if (value === ">") {
+      const parts: string[] = [];
+      i++;
+      while (i < lines.length && (lines[i].startsWith("  ") || lines[i].trim() === "")) {
+        if (lines[i].trim() === "") {
+          parts.push("");
+        } else {
+          parts.push(lines[i].trimStart());
+        }
+        i++;
+      }
+      // Folded scalar: join non-empty lines with spaces, blank lines become newlines
+      value = parts
+        .reduce<string[]>((acc, part) => {
+          if (part === "") {
+            acc.push("\n");
+          } else if (acc.length > 0 && acc[acc.length - 1] !== "\n") {
+            acc[acc.length - 1] += " " + part;
+          } else {
+            acc.push(part);
+          }
+          return acc;
+        }, [])
+        .join("")
+        .trim();
+    } else {
+      // Strip surrounding quotes
+      if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
+        value = value.slice(1, -1);
+      }
+      i++;
+    }
+    meta[key] = value;
+  }
+  return { meta, body };
+}
+// ─── Validation ──────────────────────────────────────────────────────────────
+const REQUIRED_FIELDS: (keyof AgentFrontmatter)[] = [
+  "id",
+  "name",
+  "domain",
+  "rulePrefix",
+  "description",
+  "tableDescription",
+  "promptDescription",
+];
+/**
+ * Validate and coerce parsed frontmatter into a typed AgentFrontmatter.
+ * Throws on missing required fields.
+ */
+export function validateFrontmatter(meta: Record<string, string>, sourcePath: string): AgentFrontmatter {
+  for (const field of REQUIRED_FIELDS) {
+    if (!meta[field]) {
+      throw new Error(`Agent file ${sourcePath} is missing required field: "${field}"`);
+    }
+  }
+  return {
+    id: meta.id,
+    name: meta.name,
+    domain: meta.domain,
+    rulePrefix: meta.rulePrefix,
+    description: meta.description,
+    tableDescription: meta.tableDescription,
+    promptDescription: meta.promptDescription,
+    script: meta.script || undefined,
+    priority: meta.priority ? parseInt(meta.priority, 10) : 10,
+  };
+}
+// ─── Agent File Parsing ──────────────────────────────────────────────────────
+/**
+ * Parse a single `.judge.md` file into its frontmatter and body (legacy `.agent.md`).
+ */
+export function parseAgentFile(filePath: string): ParsedAgent {
+  const absPath = resolve(filePath);
+  const raw = readFileSync(absPath, "utf-8");
+  const { meta, body } = parseFrontmatter(raw);
+  const frontmatter = validateFrontmatter(meta, absPath);
+  return {
+    frontmatter,
+    body,
+    sourcePath: absPath,
+  };
+}
+// ─── Evaluator Resolution ────────────────────────────────────────────────────
+/**
+ * Resolve the `script` path to an analyze function.
+ *
+ * Requirements:
+ * - Synchronous (to support existing synchronous evaluation paths)
+ * - Works both from source (`tsx`/ts-node) and compiled `dist`
+ */
+type AnalyzeFn = (code: string, language: string, context?: AnalyzeContext) => Finding[];
+export function resolveEvaluator(agent: ParsedAgent): AnalyzeFn | undefined {
+  if (!agent.frontmatter.script) return undefined;
+  const scriptPath = resolve(dirname(agent.sourcePath), agent.frontmatter.script);
+  const candidatePaths: string[] = [
+    scriptPath,
+    scriptPath.replace(/\.ts$/, ".js"),
+    scriptPath
+      .replace(/\\src\\/g, "\\dist\\")
+      .replace(/\/src\//g, "/dist/")
+      .replace(/\.ts$/, ".js"),
+    resolve(process.cwd(), "dist", "evaluators", `${agent.frontmatter.id}.js`),
+  ];
+  const req = createRequire(import.meta.url);
+  for (const candidate of candidatePaths) {
+    try {
+      const mod = req(candidate) as Record<string, unknown>;
+      const pascalId = agent.frontmatter.id
+        .split("-")
+        .map((s) => s.charAt(0).toUpperCase() + s.slice(1))
+        .join("");
+      const fnName = `analyze${pascalId}`;
+      const maybeFn = mod?.[fnName];
+      if (typeof maybeFn === "function") return maybeFn as AnalyzeFn;
+      for (const key of Object.keys(mod || {})) {
+        const candidateFn = mod[key];
+        if (typeof candidateFn === "function" && key.startsWith("analyze")) return candidateFn as AnalyzeFn;
+      }
+    } catch {
+      // swallow and try next
+    }
+  }
+  return undefined;
+}
+// ─── Conversion to JudgeDefinition ───────────────────────────────────────────
+/**
+ * Convert a parsed agent file to a JudgeDefinition, reconstructing the
+ * systemPrompt from the markdown body with the standard adversarial
+ * mandate appended.
+ */
+export function agentToJudgeDefinition(
+  agent: ParsedAgent,
+  analyze?: (code: string, language: string, context?: AnalyzeContext) => Finding[],
+): JudgeDefinition {
+  const fm = agent.frontmatter;
+  // The markdown body IS the system prompt content. We prepend the persona
+  // line (which is typically the first line of the body) and leave the
+  // rest as structured evaluation criteria.
+  const systemPrompt = agent.body;
+  return {
+    id: fm.id,
+    name: fm.name,
+    domain: fm.domain,
+    description: fm.description,
+    rulePrefix: fm.rulePrefix,
+    tableDescription: fm.tableDescription,
+    promptDescription: fm.promptDescription,
+    systemPrompt,
+    ...(analyze ? { analyze } : {}),
+  };
+}
+// ─── Directory Loading ───────────────────────────────────────────────────────
+/**
+ * Load all `.judge.md` files from a directory (legacy `.agent.md` supported)
+ * and return parsed agents sorted by priority (ascending — lower number =
+ * earlier in pipeline).
+ */
+export function loadAgentDirectory(dirPath: string): ParsedAgent[] {
+  const absDir = resolve(dirPath);
+  if (!existsSync(absDir)) return [];
+  const files = readdirSync(absDir).filter((f) => /\.(agent|judge)\.md$/i.test(f));
+  return files
+    .map((f) => parseAgentFile(join(absDir, f)))
+    .sort((a, b) => (a.frontmatter.priority ?? 10) - (b.frontmatter.priority ?? 10));
+}
+/**
+ * Load all agent files from a directory and register them with the
+ * JudgeRegistry. This is the main entry point for the hybrid phase.
+ *
+ * Returns the number of agents loaded.
+ */
+export function loadAndRegisterAgents(
+  dirPath: string,
+  registry: {
+    register: (judge: JudgeDefinition) => void;
+    getJudge: (id: string) => JudgeDefinition | undefined;
+  },
+): number {
+  const agents = loadAgentDirectory(dirPath);
+  let count = 0;
+  for (const agent of agents) {
+    // Skip if a judge with this ID already exists (built-ins or previously loaded agents)
+    if (registry.getJudge(agent.frontmatter.id)) {
+      continue;
+    }
+    const analyze = resolveEvaluator(agent);
+    const judge = agentToJudgeDefinition(agent, analyze);
+    registry.register(judge);
+    count++;
+  }
+  return count;
+}

package/src/skill-loader.ts ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Skill Loader — reads `.skill.md` files and converts them into skill
+ * definitions that orchestrate sets of judges/agents. A skill represents a
+ * reusable review workflow (e.g., AI code review, security gate, release gate).
+ */
+import { readFileSync, readdirSync, existsSync } from "node:fs";
+import { join, resolve, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+import type { JudgeDefinition, TribunalVerdict } from "./types.js";
+import { evaluateWithTribunal } from "./evaluators/index.js";
+import { defaultRegistry } from "./judge-registry.js";
+import { loadAgentJudges } from "./judges/index.js";
+export interface SkillFrontmatter {
+  id: string;
+  name: string;
+  description: string;
+  agents: string[];
+  tags?: string[];
+  priority?: number;
+}
+export interface ParsedSkill {
+  frontmatter: SkillFrontmatter;
+  body: string; // orchestrator instructions
+  sourcePath: string;
+}
+type SkillMeta = Record<string, unknown>;
+export function parseSkillFrontmatter(raw: string): { meta: SkillMeta; body: string } {
+  const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
+  if (!match) {
+    return { meta: {}, body: raw };
+  }
+  const yamlBlock = match[1];
+  const body = match[2].trim();
+  const meta: SkillMeta = {};
+  const lines = yamlBlock.split(/\r?\n/);
+  let i = 0;
+  while (i < lines.length) {
+    const line = lines[i];
+    if (!line.trim() || line.trim().startsWith("#")) {
+      i++;
+      continue;
+    }
+    const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)$/);
+    if (!kv) {
+      i++;
+      continue;
+    }
+    const key = kv[1];
+    let value: unknown = kv[2].trim();
+    // Multi-line array (YAML list)
+    if (!value || value === "|") {
+      // Peek ahead for indented or dash-prefixed lines
+      const items: string[] = [];
+      i++;
+      while (i < lines.length) {
+        const next = lines[i];
+        if (!next.trim()) {
+          i++;
+          continue;
+        }
+        if (/^\s*-\s+/.test(next)) {
+          items.push(next.replace(/^\s*-\s+/, "").trim());
+          i++;
+          continue;
+        }
+        if (/^\s{2,}\S/.test(next)) {
+          items.push(next.trim());
+          i++;
+          continue;
+        }
+        break; // end of list
+      }
+      if (items.length > 0) {
+        meta[key] = items;
+        continue;
+      }
+      // fall through if no items captured
+    }
+    if (typeof value === "string" && ((value.startsWith("[") && value.endsWith("]")) || value.includes(","))) {
+      // simple array parsing: split on comma
+      const normalized = (value as string)
+        .replace(/^\s*\[/, "")
+        .replace(/\]\s*$/, "")
+        .split(/\s*,\s*/)
+        .filter(Boolean);
+      value = normalized;
+    } else if (
+      typeof value === "string" &&
+      ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'")))
+    ) {
+      value = (value as string).slice(1, -1);
+    }
+    meta[key] = value;
+    i++;
+  }
+  return { meta, body };
+}
+const REQUIRED_FIELDS: (keyof SkillFrontmatter)[] = ["id", "name", "description", "agents"];
+export function validateSkillFrontmatter(meta: SkillMeta, sourcePath: string): SkillFrontmatter {
+  for (const field of REQUIRED_FIELDS) {
+    if (!meta[field] || (Array.isArray(meta[field]) && meta[field].length === 0)) {
+      throw new Error(`Skill file ${sourcePath} is missing required field: "${field}"`);
+    }
+  }
+  return {
+    id: String(meta.id),
+    name: String(meta.name),
+    description: String(meta.description),
+    agents: Array.isArray(meta.agents)
+      ? (meta.agents as string[])
+      : String(meta.agents ?? "")
+          .split(/\s*,\s*/)
+          .filter(Boolean),
+    tags: Array.isArray(meta.tags)
+      ? (meta.tags as string[])
+      : meta.tags
+        ? String(meta.tags)
+            .split(/\s*,\s*/)
+            .filter(Boolean)
+        : undefined,
+    priority: meta.priority ? Number(meta.priority) : 10,
+  };
+}
+export function parseSkillFile(filePath: string): ParsedSkill {
+  const absPath = resolve(filePath);
+  const raw = readFileSync(absPath, "utf-8");
+  const { meta, body } = parseSkillFrontmatter(raw);
+  const frontmatter = validateSkillFrontmatter(meta, absPath);
+  return { frontmatter, body, sourcePath: absPath };
+}
+export function loadSkillDirectory(dirPath: string): ParsedSkill[] {
+  const absDir = resolve(dirPath);
+  if (!existsSync(absDir)) return [];
+  return readdirSync(absDir)
+    .filter((f) => f.endsWith(".skill.md"))
+    .map((f) => parseSkillFile(join(absDir, f)))
+    .sort((a, b) => (a.frontmatter.priority ?? 10) - (b.frontmatter.priority ?? 10));
+}
+/** List skills with metadata for display (id, name, description). */
+export function listSkills(
+  dirPath: string,
+): Array<Pick<SkillFrontmatter, "id" | "name" | "description" | "tags" | "agents">> {
+  return loadSkillDirectory(dirPath).map((s) => ({
+    id: s.frontmatter.id,
+    name: s.frontmatter.name,
+    description: s.frontmatter.description,
+    tags: s.frontmatter.tags,
+    agents: s.frontmatter.agents,
+  }));
+}
+/**
+ * Run a skill by ID. Loads any missing agent judges, then evaluates code using
+ * only the judges referenced by the skill. Returns a tribunal verdict.
+ */
+export async function runSkill(
+  skillId: string,
+  code: string,
+  language: string,
+  opts?: { skillsDir?: string; context?: unknown },
+): Promise<TribunalVerdict> {
+  const skillsDir = opts?.skillsDir ?? resolve(dirname(fileURLToPath(import.meta.url)), "..", "skills");
+  const skills = loadSkillDirectory(skillsDir);
+  const skill = skills.find((s) => s.frontmatter.id === skillId);
+  if (!skill) throw new Error(`Skill not found: ${skillId}`);
+  // Load agent judges referenced by the skill
+  loadAgentJudges();
+  const judges: JudgeDefinition[] = [];
+  for (const id of skill.frontmatter.agents) {
+    const judge = defaultRegistry.getJudge(id);
+    if (!judge) {
+      throw new Error(`Judge referenced by skill not found in registry: ${id}`);
+    }
+    judges.push(judge);
+  }
+  const allJudgeIds = defaultRegistry.getJudges().map((j) => j.id);
+  const enabled = new Set(skill.frontmatter.agents);
+  const disabled = allJudgeIds.filter((id) => !enabled.has(id));
+  return evaluateWithTribunal(code, language, `skill:${skill.frontmatter.id}`, {
+    config: {
+      disabledJudges: disabled,
+    },
+  });
+}