@kevinrabun/judges 3.115.0 → 3.115.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-loader.d.ts +107 -0
- package/dist/agent-loader.js +254 -0
- package/dist/context/context-snippets.d.ts +15 -0
- package/dist/context/context-snippets.js +36 -0
- package/dist/context/embedding-cache.d.ts +30 -0
- package/dist/context/embedding-cache.js +48 -0
- package/dist/skill-loader.d.ts +33 -0
- package/dist/skill-loader.js +162 -0
- package/package.json +7 -1
- package/server.json +2 -2
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Markdown Loader — reads `.judge.md` files (legacy `.agent.md` also
|
|
3
|
+
* accepted) and converts them into JudgeDefinition objects that register with
|
|
4
|
+
* the unified JudgeRegistry.
|
|
5
|
+
*
|
|
6
|
+
* This is the bridge between the file-based agent paradigm and the existing
|
|
7
|
+
* TypeScript judge system. Agent files use YAML frontmatter for metadata
|
|
8
|
+
* and markdown body for the system prompt (persona + evaluation criteria).
|
|
9
|
+
*
|
|
10
|
+
* ## File Format
|
|
11
|
+
*
|
|
12
|
+
* ```markdown
|
|
13
|
+
* ---
|
|
14
|
+
* id: cybersecurity
|
|
15
|
+
* name: Judge Cybersecurity
|
|
16
|
+
* domain: Cybersecurity & Threat Defense
|
|
17
|
+
* rulePrefix: CYBER
|
|
18
|
+
* description: Evaluates code for vulnerability...
|
|
19
|
+
* tableDescription: "Injection attacks, XSS, CSRF, auth flaws"
|
|
20
|
+
* promptDescription: Deep cybersecurity review
|
|
21
|
+
* script: ../src/evaluators/cybersecurity.ts # optional
|
|
22
|
+
* priority: 10 # optional, default 10
|
|
23
|
+
* ---
|
|
24
|
+
*
|
|
25
|
+
* You are Judge Cybersecurity — a principal application security engineer...
|
|
26
|
+
*
|
|
27
|
+
* ## Evaluation Criteria
|
|
28
|
+
* ...
|
|
29
|
+
* ```
|
|
30
|
+
*
|
|
31
|
+
* - `script` is a relative path to the evaluator module (must export a
|
|
32
|
+
* function matching `(code: string, language: string, context?) => Finding[]`).
|
|
33
|
+
* If omitted, the judge is LLM-only (no deterministic layer).
|
|
34
|
+
* - `priority` controls ordering. Higher = later. 999 is reserved for
|
|
35
|
+
* false-positive-review (always last). Default is 10.
|
|
36
|
+
*/
|
|
37
|
+
import type { JudgeDefinition, Finding, AnalyzeContext } from "./types.js";
|
|
38
|
+
/** Parsed YAML frontmatter from a `.judge.md` file (legacy `.agent.md`). */
|
|
39
|
+
export interface AgentFrontmatter {
|
|
40
|
+
id: string;
|
|
41
|
+
name: string;
|
|
42
|
+
domain: string;
|
|
43
|
+
rulePrefix: string;
|
|
44
|
+
description: string;
|
|
45
|
+
tableDescription: string;
|
|
46
|
+
promptDescription: string;
|
|
47
|
+
script?: string;
|
|
48
|
+
priority?: number;
|
|
49
|
+
}
|
|
50
|
+
/** A parsed agent file — metadata + the markdown body (system prompt). */
|
|
51
|
+
export interface ParsedAgent {
|
|
52
|
+
frontmatter: AgentFrontmatter;
|
|
53
|
+
/** The markdown body below the frontmatter — becomes the systemPrompt. */
|
|
54
|
+
body: string;
|
|
55
|
+
/** Absolute path of the source `.judge.md` file (legacy `.agent.md`). */
|
|
56
|
+
sourcePath: string;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Parse YAML frontmatter from a string. Handles the subset of YAML used
|
|
60
|
+
* by agent files: simple key-value pairs, quoted strings, and multi-line
|
|
61
|
+
* `>` folded scalars. No arrays, nested objects, or anchors.
|
|
62
|
+
*/
|
|
63
|
+
export declare function parseFrontmatter(raw: string): {
|
|
64
|
+
meta: Record<string, string>;
|
|
65
|
+
body: string;
|
|
66
|
+
};
|
|
67
|
+
/**
|
|
68
|
+
* Validate and coerce parsed frontmatter into a typed AgentFrontmatter.
|
|
69
|
+
* Throws on missing required fields.
|
|
70
|
+
*/
|
|
71
|
+
export declare function validateFrontmatter(meta: Record<string, string>, sourcePath: string): AgentFrontmatter;
|
|
72
|
+
/**
|
|
73
|
+
* Parse a single `.judge.md` file into its frontmatter and body (legacy `.agent.md`).
|
|
74
|
+
*/
|
|
75
|
+
export declare function parseAgentFile(filePath: string): ParsedAgent;
|
|
76
|
+
/**
|
|
77
|
+
* Resolve the `script` path to an analyze function.
|
|
78
|
+
*
|
|
79
|
+
* Requirements:
|
|
80
|
+
* - Synchronous (to support existing synchronous evaluation paths)
|
|
81
|
+
* - Works both from source (`tsx`/ts-node) and compiled `dist`
|
|
82
|
+
*/
|
|
83
|
+
type AnalyzeFn = (code: string, language: string, context?: AnalyzeContext) => Finding[];
|
|
84
|
+
export declare function resolveEvaluator(agent: ParsedAgent): AnalyzeFn | undefined;
|
|
85
|
+
/**
|
|
86
|
+
* Convert a parsed agent file to a JudgeDefinition, reconstructing the
|
|
87
|
+
* systemPrompt from the markdown body with the standard adversarial
|
|
88
|
+
* mandate appended.
|
|
89
|
+
*/
|
|
90
|
+
export declare function agentToJudgeDefinition(agent: ParsedAgent, analyze?: (code: string, language: string, context?: AnalyzeContext) => Finding[]): JudgeDefinition;
|
|
91
|
+
/**
|
|
92
|
+
* Load all `.judge.md` files from a directory (legacy `.agent.md` supported)
|
|
93
|
+
* and return parsed agents sorted by priority (ascending — lower number =
|
|
94
|
+
* earlier in pipeline).
|
|
95
|
+
*/
|
|
96
|
+
export declare function loadAgentDirectory(dirPath: string): ParsedAgent[];
|
|
97
|
+
/**
|
|
98
|
+
* Load all agent files from a directory and register them with the
|
|
99
|
+
* JudgeRegistry. This is the main entry point for the hybrid phase.
|
|
100
|
+
*
|
|
101
|
+
* Returns the number of agents loaded.
|
|
102
|
+
*/
|
|
103
|
+
export declare function loadAndRegisterAgents(dirPath: string, registry: {
|
|
104
|
+
register: (judge: JudgeDefinition) => void;
|
|
105
|
+
getJudge: (id: string) => JudgeDefinition | undefined;
|
|
106
|
+
}): number;
|
|
107
|
+
export {};
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Markdown Loader — reads `.judge.md` files (legacy `.agent.md` also
|
|
3
|
+
* accepted) and converts them into JudgeDefinition objects that register with
|
|
4
|
+
* the unified JudgeRegistry.
|
|
5
|
+
*
|
|
6
|
+
* This is the bridge between the file-based agent paradigm and the existing
|
|
7
|
+
* TypeScript judge system. Agent files use YAML frontmatter for metadata
|
|
8
|
+
* and markdown body for the system prompt (persona + evaluation criteria).
|
|
9
|
+
*
|
|
10
|
+
* ## File Format
|
|
11
|
+
*
|
|
12
|
+
* ```markdown
|
|
13
|
+
* ---
|
|
14
|
+
* id: cybersecurity
|
|
15
|
+
* name: Judge Cybersecurity
|
|
16
|
+
* domain: Cybersecurity & Threat Defense
|
|
17
|
+
* rulePrefix: CYBER
|
|
18
|
+
* description: Evaluates code for vulnerability...
|
|
19
|
+
* tableDescription: "Injection attacks, XSS, CSRF, auth flaws"
|
|
20
|
+
* promptDescription: Deep cybersecurity review
|
|
21
|
+
* script: ../src/evaluators/cybersecurity.ts # optional
|
|
22
|
+
* priority: 10 # optional, default 10
|
|
23
|
+
* ---
|
|
24
|
+
*
|
|
25
|
+
* You are Judge Cybersecurity — a principal application security engineer...
|
|
26
|
+
*
|
|
27
|
+
* ## Evaluation Criteria
|
|
28
|
+
* ...
|
|
29
|
+
* ```
|
|
30
|
+
*
|
|
31
|
+
* - `script` is a relative path to the evaluator module (must export a
|
|
32
|
+
* function matching `(code: string, language: string, context?) => Finding[]`).
|
|
33
|
+
* If omitted, the judge is LLM-only (no deterministic layer).
|
|
34
|
+
* - `priority` controls ordering. Higher = later. 999 is reserved for
|
|
35
|
+
* false-positive-review (always last). Default is 10.
|
|
36
|
+
*/
|
|
37
|
+
import { readFileSync, readdirSync, existsSync } from "node:fs";
|
|
38
|
+
import { join, resolve, dirname } from "node:path";
|
|
39
|
+
import { createRequire } from "node:module";
|
|
40
|
+
// ─── Frontmatter Parser ─────────────────────────────────────────────────────
|
|
41
|
+
/**
|
|
42
|
+
* Parse YAML frontmatter from a string. Handles the subset of YAML used
|
|
43
|
+
* by agent files: simple key-value pairs, quoted strings, and multi-line
|
|
44
|
+
* `>` folded scalars. No arrays, nested objects, or anchors.
|
|
45
|
+
*/
|
|
46
|
+
export function parseFrontmatter(raw) {
|
|
47
|
+
const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
|
|
48
|
+
if (!match) {
|
|
49
|
+
return { meta: {}, body: raw };
|
|
50
|
+
}
|
|
51
|
+
const yamlBlock = match[1];
|
|
52
|
+
const body = match[2].trim();
|
|
53
|
+
const meta = {};
|
|
54
|
+
const lines = yamlBlock.split(/\r?\n/);
|
|
55
|
+
let i = 0;
|
|
56
|
+
while (i < lines.length) {
|
|
57
|
+
const line = lines[i];
|
|
58
|
+
// Skip empty lines and comments
|
|
59
|
+
if (!line.trim() || line.trim().startsWith("#")) {
|
|
60
|
+
i++;
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
const kvMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(.*)/);
|
|
64
|
+
if (!kvMatch) {
|
|
65
|
+
i++;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
const key = kvMatch[1];
|
|
69
|
+
let value = kvMatch[2].trim();
|
|
70
|
+
// Handle folded scalar (>)
|
|
71
|
+
if (value === ">") {
|
|
72
|
+
const parts = [];
|
|
73
|
+
i++;
|
|
74
|
+
while (i < lines.length && (lines[i].startsWith(" ") || lines[i].trim() === "")) {
|
|
75
|
+
if (lines[i].trim() === "") {
|
|
76
|
+
parts.push("");
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
parts.push(lines[i].trimStart());
|
|
80
|
+
}
|
|
81
|
+
i++;
|
|
82
|
+
}
|
|
83
|
+
// Folded scalar: join non-empty lines with spaces, blank lines become newlines
|
|
84
|
+
value = parts
|
|
85
|
+
.reduce((acc, part) => {
|
|
86
|
+
if (part === "") {
|
|
87
|
+
acc.push("\n");
|
|
88
|
+
}
|
|
89
|
+
else if (acc.length > 0 && acc[acc.length - 1] !== "\n") {
|
|
90
|
+
acc[acc.length - 1] += " " + part;
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
acc.push(part);
|
|
94
|
+
}
|
|
95
|
+
return acc;
|
|
96
|
+
}, [])
|
|
97
|
+
.join("")
|
|
98
|
+
.trim();
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
// Strip surrounding quotes
|
|
102
|
+
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
|
|
103
|
+
value = value.slice(1, -1);
|
|
104
|
+
}
|
|
105
|
+
i++;
|
|
106
|
+
}
|
|
107
|
+
meta[key] = value;
|
|
108
|
+
}
|
|
109
|
+
return { meta, body };
|
|
110
|
+
}
|
|
111
|
+
// ─── Validation ──────────────────────────────────────────────────────────────
|
|
112
|
+
const REQUIRED_FIELDS = [
|
|
113
|
+
"id",
|
|
114
|
+
"name",
|
|
115
|
+
"domain",
|
|
116
|
+
"rulePrefix",
|
|
117
|
+
"description",
|
|
118
|
+
"tableDescription",
|
|
119
|
+
"promptDescription",
|
|
120
|
+
];
|
|
121
|
+
/**
|
|
122
|
+
* Validate and coerce parsed frontmatter into a typed AgentFrontmatter.
|
|
123
|
+
* Throws on missing required fields.
|
|
124
|
+
*/
|
|
125
|
+
export function validateFrontmatter(meta, sourcePath) {
|
|
126
|
+
for (const field of REQUIRED_FIELDS) {
|
|
127
|
+
if (!meta[field]) {
|
|
128
|
+
throw new Error(`Agent file ${sourcePath} is missing required field: "${field}"`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return {
|
|
132
|
+
id: meta.id,
|
|
133
|
+
name: meta.name,
|
|
134
|
+
domain: meta.domain,
|
|
135
|
+
rulePrefix: meta.rulePrefix,
|
|
136
|
+
description: meta.description,
|
|
137
|
+
tableDescription: meta.tableDescription,
|
|
138
|
+
promptDescription: meta.promptDescription,
|
|
139
|
+
script: meta.script || undefined,
|
|
140
|
+
priority: meta.priority ? parseInt(meta.priority, 10) : 10,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
// ─── Agent File Parsing ──────────────────────────────────────────────────────
|
|
144
|
+
/**
|
|
145
|
+
* Parse a single `.judge.md` file into its frontmatter and body (legacy `.agent.md`).
|
|
146
|
+
*/
|
|
147
|
+
export function parseAgentFile(filePath) {
|
|
148
|
+
const absPath = resolve(filePath);
|
|
149
|
+
const raw = readFileSync(absPath, "utf-8");
|
|
150
|
+
const { meta, body } = parseFrontmatter(raw);
|
|
151
|
+
const frontmatter = validateFrontmatter(meta, absPath);
|
|
152
|
+
return {
|
|
153
|
+
frontmatter,
|
|
154
|
+
body,
|
|
155
|
+
sourcePath: absPath,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
export function resolveEvaluator(agent) {
|
|
159
|
+
if (!agent.frontmatter.script)
|
|
160
|
+
return undefined;
|
|
161
|
+
const scriptPath = resolve(dirname(agent.sourcePath), agent.frontmatter.script);
|
|
162
|
+
const candidatePaths = [
|
|
163
|
+
scriptPath,
|
|
164
|
+
scriptPath.replace(/\.ts$/, ".js"),
|
|
165
|
+
scriptPath
|
|
166
|
+
.replace(/\\src\\/g, "\\dist\\")
|
|
167
|
+
.replace(/\/src\//g, "/dist/")
|
|
168
|
+
.replace(/\.ts$/, ".js"),
|
|
169
|
+
resolve(process.cwd(), "dist", "evaluators", `${agent.frontmatter.id}.js`),
|
|
170
|
+
];
|
|
171
|
+
const req = createRequire(import.meta.url);
|
|
172
|
+
for (const candidate of candidatePaths) {
|
|
173
|
+
try {
|
|
174
|
+
const mod = req(candidate);
|
|
175
|
+
const pascalId = agent.frontmatter.id
|
|
176
|
+
.split("-")
|
|
177
|
+
.map((s) => s.charAt(0).toUpperCase() + s.slice(1))
|
|
178
|
+
.join("");
|
|
179
|
+
const fnName = `analyze${pascalId}`;
|
|
180
|
+
const maybeFn = mod?.[fnName];
|
|
181
|
+
if (typeof maybeFn === "function")
|
|
182
|
+
return maybeFn;
|
|
183
|
+
for (const key of Object.keys(mod || {})) {
|
|
184
|
+
const candidateFn = mod[key];
|
|
185
|
+
if (typeof candidateFn === "function" && key.startsWith("analyze"))
|
|
186
|
+
return candidateFn;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
catch {
|
|
190
|
+
// swallow and try next
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
return undefined;
|
|
194
|
+
}
|
|
195
|
+
// ─── Conversion to JudgeDefinition ───────────────────────────────────────────
|
|
196
|
+
/**
|
|
197
|
+
* Convert a parsed agent file to a JudgeDefinition, reconstructing the
|
|
198
|
+
* systemPrompt from the markdown body with the standard adversarial
|
|
199
|
+
* mandate appended.
|
|
200
|
+
*/
|
|
201
|
+
export function agentToJudgeDefinition(agent, analyze) {
|
|
202
|
+
const fm = agent.frontmatter;
|
|
203
|
+
// The markdown body IS the system prompt content. We prepend the persona
|
|
204
|
+
// line (which is typically the first line of the body) and leave the
|
|
205
|
+
// rest as structured evaluation criteria.
|
|
206
|
+
const systemPrompt = agent.body;
|
|
207
|
+
return {
|
|
208
|
+
id: fm.id,
|
|
209
|
+
name: fm.name,
|
|
210
|
+
domain: fm.domain,
|
|
211
|
+
description: fm.description,
|
|
212
|
+
rulePrefix: fm.rulePrefix,
|
|
213
|
+
tableDescription: fm.tableDescription,
|
|
214
|
+
promptDescription: fm.promptDescription,
|
|
215
|
+
systemPrompt,
|
|
216
|
+
...(analyze ? { analyze } : {}),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
// ─── Directory Loading ───────────────────────────────────────────────────────
|
|
220
|
+
/**
|
|
221
|
+
* Load all `.judge.md` files from a directory (legacy `.agent.md` supported)
|
|
222
|
+
* and return parsed agents sorted by priority (ascending — lower number =
|
|
223
|
+
* earlier in pipeline).
|
|
224
|
+
*/
|
|
225
|
+
export function loadAgentDirectory(dirPath) {
|
|
226
|
+
const absDir = resolve(dirPath);
|
|
227
|
+
if (!existsSync(absDir))
|
|
228
|
+
return [];
|
|
229
|
+
const files = readdirSync(absDir).filter((f) => /\.(agent|judge)\.md$/i.test(f));
|
|
230
|
+
return files
|
|
231
|
+
.map((f) => parseAgentFile(join(absDir, f)))
|
|
232
|
+
.sort((a, b) => (a.frontmatter.priority ?? 10) - (b.frontmatter.priority ?? 10));
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Load all agent files from a directory and register them with the
|
|
236
|
+
* JudgeRegistry. This is the main entry point for the hybrid phase.
|
|
237
|
+
*
|
|
238
|
+
* Returns the number of agents loaded.
|
|
239
|
+
*/
|
|
240
|
+
export function loadAndRegisterAgents(dirPath, registry) {
|
|
241
|
+
const agents = loadAgentDirectory(dirPath);
|
|
242
|
+
let count = 0;
|
|
243
|
+
for (const agent of agents) {
|
|
244
|
+
// Skip if a judge with this ID already exists (built-ins or previously loaded agents)
|
|
245
|
+
if (registry.getJudge(agent.frontmatter.id)) {
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
const analyze = resolveEvaluator(agent);
|
|
249
|
+
const judge = agentToJudgeDefinition(agent, analyze);
|
|
250
|
+
registry.register(judge);
|
|
251
|
+
count++;
|
|
252
|
+
}
|
|
253
|
+
return count;
|
|
254
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { EmbeddingCache } from "./embedding-cache.js";
|
|
2
|
+
export interface ContextOptions {
|
|
3
|
+
chunkSize?: number;
|
|
4
|
+
overlap?: number;
|
|
5
|
+
maxSnippets?: number;
|
|
6
|
+
embeddingCache?: EmbeddingCache;
|
|
7
|
+
embeddingSalt?: string;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Naive chunker for code/docs. Returns plain text chunks; embeddings computed for ranking.
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildContextSnippets(text: string, opts?: ContextOptions): Promise<Array<{
|
|
13
|
+
snippet: string;
|
|
14
|
+
score: number;
|
|
15
|
+
}>>;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { getOrCreateEmbedding, EmbeddingCache, FallbackEmbeddingProvider } from "./embedding-cache.js";
|
|
2
|
+
const DEFAULT_CHUNK_SIZE = 1200; // chars (~300 tokens)
|
|
3
|
+
const DEFAULT_OVERLAP = 200;
|
|
4
|
+
const DEFAULT_MAX_SNIPPETS = 5;
|
|
5
|
+
/**
|
|
6
|
+
* Naive chunker for code/docs. Returns plain text chunks; embeddings computed for ranking.
|
|
7
|
+
*/
|
|
8
|
+
export async function buildContextSnippets(text, opts = {}) {
|
|
9
|
+
const chunkSize = opts.chunkSize ?? DEFAULT_CHUNK_SIZE;
|
|
10
|
+
const overlap = opts.overlap ?? DEFAULT_OVERLAP;
|
|
11
|
+
const maxSnippets = opts.maxSnippets ?? DEFAULT_MAX_SNIPPETS;
|
|
12
|
+
const cache = opts.embeddingCache ?? new EmbeddingCache();
|
|
13
|
+
const provider = new FallbackEmbeddingProvider();
|
|
14
|
+
const chunks = [];
|
|
15
|
+
for (let i = 0; i < text.length; i += chunkSize - overlap) {
|
|
16
|
+
chunks.push(text.slice(i, i + chunkSize));
|
|
17
|
+
}
|
|
18
|
+
// Compute simple scores using fallback embeddings (dot product with a centroid)
|
|
19
|
+
const centroid = await getOrCreateEmbedding(cache, provider, text, opts.embeddingSalt);
|
|
20
|
+
const centroidVec = centroid.embedding;
|
|
21
|
+
const scored = [];
|
|
22
|
+
for (const c of chunks) {
|
|
23
|
+
const chunkEmbedding = await getOrCreateEmbedding(cache, provider, c, opts.embeddingSalt);
|
|
24
|
+
const score = dot(centroidVec, chunkEmbedding.embedding);
|
|
25
|
+
scored.push({ snippet: c, score });
|
|
26
|
+
}
|
|
27
|
+
scored.sort((a, b) => b.score - a.score);
|
|
28
|
+
return scored.slice(0, maxSnippets);
|
|
29
|
+
}
|
|
30
|
+
function dot(a, b) {
|
|
31
|
+
const len = Math.min(a.length, b.length);
|
|
32
|
+
let sum = 0;
|
|
33
|
+
for (let i = 0; i < len; i++)
|
|
34
|
+
sum += a[i] * b[i];
|
|
35
|
+
return sum;
|
|
36
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export interface EmbeddingProvider {
|
|
2
|
+
embed(text: string): Promise<number[]>;
|
|
3
|
+
}
|
|
4
|
+
export interface EmbeddingChunk {
|
|
5
|
+
hash: string;
|
|
6
|
+
embedding: number[];
|
|
7
|
+
text: string;
|
|
8
|
+
metadata?: Record<string, unknown>;
|
|
9
|
+
}
|
|
10
|
+
/** Simple SHA1 hash for cache keys (content + salt/context). */
|
|
11
|
+
export declare function hashKey(text: string, salt?: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* In-memory embedding cache (can be backed by disk later). Lightweight and
|
|
14
|
+
* dependency-free; callers can persist via JSON if desired.
|
|
15
|
+
*/
|
|
16
|
+
export declare class EmbeddingCache {
|
|
17
|
+
private cache;
|
|
18
|
+
get(key: string): EmbeddingChunk | undefined;
|
|
19
|
+
set(key: string, value: EmbeddingChunk): void;
|
|
20
|
+
clear(): void;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Trivial embedding provider (fallback) — returns normalized character code vector.
|
|
24
|
+
* Not semantically meaningful but keeps the pipeline working when no provider is configured.
|
|
25
|
+
*/
|
|
26
|
+
export declare class FallbackEmbeddingProvider implements EmbeddingProvider {
|
|
27
|
+
embed(text: string): Promise<number[]>;
|
|
28
|
+
}
|
|
29
|
+
/** Retrieve or compute an embedding, with caching. */
|
|
30
|
+
export declare function getOrCreateEmbedding(cache: EmbeddingCache, provider: EmbeddingProvider, text: string, salt?: string): Promise<EmbeddingChunk>;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
/** Simple SHA1 hash for cache keys (content + salt/context). */
|
|
3
|
+
export function hashKey(text, salt) {
|
|
4
|
+
return createHash("sha1")
|
|
5
|
+
.update(text + (salt ?? ""))
|
|
6
|
+
.digest("hex");
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* In-memory embedding cache (can be backed by disk later). Lightweight and
|
|
10
|
+
* dependency-free; callers can persist via JSON if desired.
|
|
11
|
+
*/
|
|
12
|
+
export class EmbeddingCache {
|
|
13
|
+
cache = new Map();
|
|
14
|
+
get(key) {
|
|
15
|
+
return this.cache.get(key);
|
|
16
|
+
}
|
|
17
|
+
set(key, value) {
|
|
18
|
+
this.cache.set(key, value);
|
|
19
|
+
}
|
|
20
|
+
clear() {
|
|
21
|
+
this.cache.clear();
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Trivial embedding provider (fallback) — returns normalized character code vector.
|
|
26
|
+
* Not semantically meaningful but keeps the pipeline working when no provider is configured.
|
|
27
|
+
*/
|
|
28
|
+
export class FallbackEmbeddingProvider {
|
|
29
|
+
async embed(text) {
|
|
30
|
+
const vec = new Array(32).fill(0);
|
|
31
|
+
for (let i = 0; i < text.length; i++) {
|
|
32
|
+
vec[i % vec.length] += text.charCodeAt(i);
|
|
33
|
+
}
|
|
34
|
+
const norm = Math.sqrt(vec.reduce((s, v) => s + v * v, 0)) || 1;
|
|
35
|
+
return vec.map((v) => v / norm);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
/** Retrieve or compute an embedding, with caching. */
|
|
39
|
+
export async function getOrCreateEmbedding(cache, provider, text, salt) {
|
|
40
|
+
const key = hashKey(text, salt);
|
|
41
|
+
const cached = cache.get(key);
|
|
42
|
+
if (cached)
|
|
43
|
+
return cached;
|
|
44
|
+
const embedding = await provider.embed(text);
|
|
45
|
+
const chunk = { hash: key, embedding, text };
|
|
46
|
+
cache.set(key, chunk);
|
|
47
|
+
return chunk;
|
|
48
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { TribunalVerdict } from "./types.js";
|
|
2
|
+
export interface SkillFrontmatter {
|
|
3
|
+
id: string;
|
|
4
|
+
name: string;
|
|
5
|
+
description: string;
|
|
6
|
+
agents: string[];
|
|
7
|
+
tags?: string[];
|
|
8
|
+
priority?: number;
|
|
9
|
+
}
|
|
10
|
+
export interface ParsedSkill {
|
|
11
|
+
frontmatter: SkillFrontmatter;
|
|
12
|
+
body: string;
|
|
13
|
+
sourcePath: string;
|
|
14
|
+
}
|
|
15
|
+
type SkillMeta = Record<string, unknown>;
|
|
16
|
+
export declare function parseSkillFrontmatter(raw: string): {
|
|
17
|
+
meta: SkillMeta;
|
|
18
|
+
body: string;
|
|
19
|
+
};
|
|
20
|
+
export declare function validateSkillFrontmatter(meta: SkillMeta, sourcePath: string): SkillFrontmatter;
|
|
21
|
+
export declare function parseSkillFile(filePath: string): ParsedSkill;
|
|
22
|
+
export declare function loadSkillDirectory(dirPath: string): ParsedSkill[];
|
|
23
|
+
/** List skills with metadata for display (id, name, description). */
|
|
24
|
+
export declare function listSkills(dirPath: string): Array<Pick<SkillFrontmatter, "id" | "name" | "description" | "tags" | "agents">>;
|
|
25
|
+
/**
|
|
26
|
+
* Run a skill by ID. Loads any missing agent judges, then evaluates code using
|
|
27
|
+
* only the judges referenced by the skill. Returns a tribunal verdict.
|
|
28
|
+
*/
|
|
29
|
+
export declare function runSkill(skillId: string, code: string, language: string, opts?: {
|
|
30
|
+
skillsDir?: string;
|
|
31
|
+
context?: unknown;
|
|
32
|
+
}): Promise<TribunalVerdict>;
|
|
33
|
+
export {};
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill Loader — reads `.skill.md` files and converts them into skill
|
|
3
|
+
* definitions that orchestrate sets of judges/agents. A skill represents a
|
|
4
|
+
* reusable review workflow (e.g., AI code review, security gate, release gate).
|
|
5
|
+
*/
|
|
6
|
+
import { readFileSync, readdirSync, existsSync } from "node:fs";
|
|
7
|
+
import { join, resolve, dirname } from "node:path";
|
|
8
|
+
import { fileURLToPath } from "node:url";
|
|
9
|
+
import { evaluateWithTribunal } from "./evaluators/index.js";
|
|
10
|
+
import { defaultRegistry } from "./judge-registry.js";
|
|
11
|
+
import { loadAgentJudges } from "./judges/index.js";
|
|
12
|
+
export function parseSkillFrontmatter(raw) {
|
|
13
|
+
const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
|
|
14
|
+
if (!match) {
|
|
15
|
+
return { meta: {}, body: raw };
|
|
16
|
+
}
|
|
17
|
+
const yamlBlock = match[1];
|
|
18
|
+
const body = match[2].trim();
|
|
19
|
+
const meta = {};
|
|
20
|
+
const lines = yamlBlock.split(/\r?\n/);
|
|
21
|
+
let i = 0;
|
|
22
|
+
while (i < lines.length) {
|
|
23
|
+
const line = lines[i];
|
|
24
|
+
if (!line.trim() || line.trim().startsWith("#")) {
|
|
25
|
+
i++;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\s*:\s*(.*)$/);
|
|
29
|
+
if (!kv) {
|
|
30
|
+
i++;
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
const key = kv[1];
|
|
34
|
+
let value = kv[2].trim();
|
|
35
|
+
// Multi-line array (YAML list)
|
|
36
|
+
if (!value || value === "|") {
|
|
37
|
+
// Peek ahead for indented or dash-prefixed lines
|
|
38
|
+
const items = [];
|
|
39
|
+
i++;
|
|
40
|
+
while (i < lines.length) {
|
|
41
|
+
const next = lines[i];
|
|
42
|
+
if (!next.trim()) {
|
|
43
|
+
i++;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
if (/^\s*-\s+/.test(next)) {
|
|
47
|
+
items.push(next.replace(/^\s*-\s+/, "").trim());
|
|
48
|
+
i++;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
if (/^\s{2,}\S/.test(next)) {
|
|
52
|
+
items.push(next.trim());
|
|
53
|
+
i++;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
break; // end of list
|
|
57
|
+
}
|
|
58
|
+
if (items.length > 0) {
|
|
59
|
+
meta[key] = items;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
// fall through if no items captured
|
|
63
|
+
}
|
|
64
|
+
if (typeof value === "string" && ((value.startsWith("[") && value.endsWith("]")) || value.includes(","))) {
|
|
65
|
+
// simple array parsing: split on comma
|
|
66
|
+
const normalized = value
|
|
67
|
+
.replace(/^\s*\[/, "")
|
|
68
|
+
.replace(/\]\s*$/, "")
|
|
69
|
+
.split(/\s*,\s*/)
|
|
70
|
+
.filter(Boolean);
|
|
71
|
+
value = normalized;
|
|
72
|
+
}
|
|
73
|
+
else if (typeof value === "string" &&
|
|
74
|
+
((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'")))) {
|
|
75
|
+
value = value.slice(1, -1);
|
|
76
|
+
}
|
|
77
|
+
meta[key] = value;
|
|
78
|
+
i++;
|
|
79
|
+
}
|
|
80
|
+
return { meta, body };
|
|
81
|
+
}
|
|
82
|
+
const REQUIRED_FIELDS = ["id", "name", "description", "agents"];
|
|
83
|
+
export function validateSkillFrontmatter(meta, sourcePath) {
|
|
84
|
+
for (const field of REQUIRED_FIELDS) {
|
|
85
|
+
if (!meta[field] || (Array.isArray(meta[field]) && meta[field].length === 0)) {
|
|
86
|
+
throw new Error(`Skill file ${sourcePath} is missing required field: "${field}"`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
id: String(meta.id),
|
|
91
|
+
name: String(meta.name),
|
|
92
|
+
description: String(meta.description),
|
|
93
|
+
agents: Array.isArray(meta.agents)
|
|
94
|
+
? meta.agents
|
|
95
|
+
: String(meta.agents ?? "")
|
|
96
|
+
.split(/\s*,\s*/)
|
|
97
|
+
.filter(Boolean),
|
|
98
|
+
tags: Array.isArray(meta.tags)
|
|
99
|
+
? meta.tags
|
|
100
|
+
: meta.tags
|
|
101
|
+
? String(meta.tags)
|
|
102
|
+
.split(/\s*,\s*/)
|
|
103
|
+
.filter(Boolean)
|
|
104
|
+
: undefined,
|
|
105
|
+
priority: meta.priority ? Number(meta.priority) : 10,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
export function parseSkillFile(filePath) {
|
|
109
|
+
const absPath = resolve(filePath);
|
|
110
|
+
const raw = readFileSync(absPath, "utf-8");
|
|
111
|
+
const { meta, body } = parseSkillFrontmatter(raw);
|
|
112
|
+
const frontmatter = validateSkillFrontmatter(meta, absPath);
|
|
113
|
+
return { frontmatter, body, sourcePath: absPath };
|
|
114
|
+
}
|
|
115
|
+
export function loadSkillDirectory(dirPath) {
|
|
116
|
+
const absDir = resolve(dirPath);
|
|
117
|
+
if (!existsSync(absDir))
|
|
118
|
+
return [];
|
|
119
|
+
return readdirSync(absDir)
|
|
120
|
+
.filter((f) => f.endsWith(".skill.md"))
|
|
121
|
+
.map((f) => parseSkillFile(join(absDir, f)))
|
|
122
|
+
.sort((a, b) => (a.frontmatter.priority ?? 10) - (b.frontmatter.priority ?? 10));
|
|
123
|
+
}
|
|
124
|
+
/** List skills with metadata for display (id, name, description). */
|
|
125
|
+
export function listSkills(dirPath) {
|
|
126
|
+
return loadSkillDirectory(dirPath).map((s) => ({
|
|
127
|
+
id: s.frontmatter.id,
|
|
128
|
+
name: s.frontmatter.name,
|
|
129
|
+
description: s.frontmatter.description,
|
|
130
|
+
tags: s.frontmatter.tags,
|
|
131
|
+
agents: s.frontmatter.agents,
|
|
132
|
+
}));
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Run a skill by ID. Loads any missing agent judges, then evaluates code using
|
|
136
|
+
* only the judges referenced by the skill. Returns a tribunal verdict.
|
|
137
|
+
*/
|
|
138
|
+
export async function runSkill(skillId, code, language, opts) {
|
|
139
|
+
const skillsDir = opts?.skillsDir ?? resolve(dirname(fileURLToPath(import.meta.url)), "..", "skills");
|
|
140
|
+
const skills = loadSkillDirectory(skillsDir);
|
|
141
|
+
const skill = skills.find((s) => s.frontmatter.id === skillId);
|
|
142
|
+
if (!skill)
|
|
143
|
+
throw new Error(`Skill not found: ${skillId}`);
|
|
144
|
+
// Load agent judges referenced by the skill
|
|
145
|
+
loadAgentJudges();
|
|
146
|
+
const judges = [];
|
|
147
|
+
for (const id of skill.frontmatter.agents) {
|
|
148
|
+
const judge = defaultRegistry.getJudge(id);
|
|
149
|
+
if (!judge) {
|
|
150
|
+
throw new Error(`Judge referenced by skill not found in registry: ${id}`);
|
|
151
|
+
}
|
|
152
|
+
judges.push(judge);
|
|
153
|
+
}
|
|
154
|
+
const allJudgeIds = defaultRegistry.getJudges().map((j) => j.id);
|
|
155
|
+
const enabled = new Set(skill.frontmatter.agents);
|
|
156
|
+
const disabled = allJudgeIds.filter((id) => !enabled.has(id));
|
|
157
|
+
return evaluateWithTribunal(code, language, `skill:${skill.frontmatter.id}`, {
|
|
158
|
+
config: {
|
|
159
|
+
disabledJudges: disabled,
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kevinrabun/judges",
|
|
3
|
-
"version": "3.115.
|
|
3
|
+
"version": "3.115.1",
|
|
4
4
|
"description": "45 specialized judges that evaluate AI-generated code for security, cost, and quality.",
|
|
5
5
|
"mcpName": "io.github.KevinRabun/judges",
|
|
6
6
|
"type": "module",
|
|
@@ -89,6 +89,12 @@
|
|
|
89
89
|
"scripts/generate-skills-docs.ts",
|
|
90
90
|
"src/agent-loader.ts",
|
|
91
91
|
"src/skill-loader.ts",
|
|
92
|
+
"dist/agent-loader.js",
|
|
93
|
+
"dist/agent-loader.d.ts",
|
|
94
|
+
"dist/skill-loader.js",
|
|
95
|
+
"dist/skill-loader.d.ts",
|
|
96
|
+
"dist/context/**/*.js",
|
|
97
|
+
"dist/context/**/*.d.ts",
|
|
92
98
|
"dist/github-app.js",
|
|
93
99
|
"dist/github-app.d.ts",
|
|
94
100
|
"dist/index.js",
|
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/kevinrabun/judges",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "3.115.
|
|
10
|
+
"version": "3.115.1",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@kevinrabun/judges",
|
|
15
|
-
"version": "3.115.
|
|
15
|
+
"version": "3.115.1",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|