@kevinrabun/judges 3.119.0 → 3.122.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/api.d.ts +2 -1
- package/dist/api.js +3 -1
- package/dist/cli-dispatch.d.ts +7 -0
- package/dist/cli-dispatch.js +654 -0
- package/dist/cli-formatters.d.ts +6 -0
- package/dist/cli-formatters.js +186 -0
- package/dist/cli.js +69 -4159
- package/dist/commands/baseline.js +2 -42
- package/dist/commands/coverage.js +3 -39
- package/dist/commands/diff.js +2 -38
- package/dist/commands/fix-pr.js +2 -23
- package/dist/commands/fix.js +3 -27
- package/dist/commands/llm-benchmark.d.ts +7 -0
- package/dist/commands/llm-benchmark.js +27 -1
- package/dist/commands/quality-gate.js +1 -12
- package/dist/commands/review-parallel.js +1 -19
- package/dist/commands/review.js +2 -33
- package/dist/commands/rule-test.js +1 -15
- package/dist/commands/tune.js +2 -29
- package/dist/commands/watch.js +3 -42
- package/dist/config.js +1 -1
- package/dist/evaluators/hallucination-detection.js +343 -0
- package/dist/evaluators/index.d.ts +2 -11
- package/dist/evaluators/index.js +3 -181
- package/dist/evaluators/security.js +226 -2
- package/dist/evaluators/suppressions.d.ts +49 -0
- package/dist/evaluators/suppressions.js +185 -0
- package/dist/ext-to-lang.d.ts +16 -0
- package/dist/ext-to-lang.js +60 -0
- package/dist/github-app.d.ts +1 -3
- package/dist/github-app.js +2 -34
- package/dist/parallel.js +2 -14
- package/dist/probabilistic/llm-response-validator.js +1 -1
- package/dist/reports/public-repo-report.js +9 -1
- package/dist/skill-loader.js +9 -6
- package/dist/tools/register-evaluation.js +2 -29
- package/package.json +1 -1
- package/server.json +2 -2
- package/src/skill-loader.ts +9 -6
|
@@ -5,7 +5,7 @@ const SEVERITY_SET = new Set(["critical", "high", "medium", "low", "info"]);
|
|
|
5
5
|
* Attempt to parse a JSON payload embedded in LLM output. Supports fenced code blocks and raw JSON.
|
|
6
6
|
*/
|
|
7
7
|
function parseJsonBlock(text) {
|
|
8
|
-
const fenceMatch = text.match(/```(?:json)
|
|
8
|
+
const fenceMatch = text.match(/```(?:json)?[ \t]*\n([\s\S]*?)\n[ \t]*```/i) ?? text.match(/```(?:json)?[ \t]*([\s\S]*?)```/i);
|
|
9
9
|
if (fenceMatch) {
|
|
10
10
|
try {
|
|
11
11
|
return JSON.parse(fenceMatch[1]);
|
|
@@ -215,7 +215,15 @@ function countBySeverity(findings) {
|
|
|
215
215
|
function compileExcludeRegexes(patterns) {
|
|
216
216
|
if (!patterns || patterns.length === 0)
|
|
217
217
|
return [];
|
|
218
|
-
return patterns.map((pattern) =>
|
|
218
|
+
return patterns.map((pattern) => {
|
|
219
|
+
try {
|
|
220
|
+
return new RegExp(pattern, "i");
|
|
221
|
+
}
|
|
222
|
+
catch {
|
|
223
|
+
// Invalid regex from user input — treat as literal string match
|
|
224
|
+
return new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i");
|
|
225
|
+
}
|
|
226
|
+
});
|
|
219
227
|
}
|
|
220
228
|
function isLikelyNonProductionPath(path) {
|
|
221
229
|
return /(^|\/)(test|tests|__tests__|spec|specs|e2e)(\/|\.|$)|\.(?:test|tests|spec|specs|e2e)\.[^/]+$|mock|fixture|fixtures|(^|\/)docs(-|\/)i18n(\/|$)|(^|\/)docs(\/|$)/i.test(path);
|
package/dist/skill-loader.js
CHANGED
|
@@ -25,7 +25,7 @@ export function parseSkillFrontmatter(raw) {
|
|
|
25
25
|
i++;
|
|
26
26
|
continue;
|
|
27
27
|
}
|
|
28
|
-
const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\
|
|
28
|
+
const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)[ \t]*:[ \t]*(.*)$/);
|
|
29
29
|
if (!kv) {
|
|
30
30
|
i++;
|
|
31
31
|
continue;
|
|
@@ -64,9 +64,10 @@ export function parseSkillFrontmatter(raw) {
|
|
|
64
64
|
if (typeof value === "string" && ((value.startsWith("[") && value.endsWith("]")) || value.includes(","))) {
|
|
65
65
|
// simple array parsing: split on comma
|
|
66
66
|
const normalized = value
|
|
67
|
-
.replace(
|
|
68
|
-
.replace(/\]\
|
|
69
|
-
.split(
|
|
67
|
+
.replace(/^[ \t]*\[/, "")
|
|
68
|
+
.replace(/\][ \t]*$/, "")
|
|
69
|
+
.split(",")
|
|
70
|
+
.map((s) => s.trim())
|
|
70
71
|
.filter(Boolean);
|
|
71
72
|
value = normalized;
|
|
72
73
|
}
|
|
@@ -93,13 +94,15 @@ export function validateSkillFrontmatter(meta, sourcePath) {
|
|
|
93
94
|
agents: Array.isArray(meta.agents)
|
|
94
95
|
? meta.agents
|
|
95
96
|
: String(meta.agents ?? "")
|
|
96
|
-
.split(
|
|
97
|
+
.split(",")
|
|
98
|
+
.map((s) => s.trim())
|
|
97
99
|
.filter(Boolean),
|
|
98
100
|
tags: Array.isArray(meta.tags)
|
|
99
101
|
? meta.tags
|
|
100
102
|
: meta.tags
|
|
101
103
|
? String(meta.tags)
|
|
102
|
-
.split(
|
|
104
|
+
.split(",")
|
|
105
|
+
.map((s) => s.trim())
|
|
103
106
|
.filter(Boolean)
|
|
104
107
|
: undefined,
|
|
105
108
|
priority: meta.priority ? Number(meta.priority) : 10,
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
// ──────────────────────────────────────────────────────────────────────────────
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
import { readFileSync, existsSync } from "fs";
|
|
6
|
-
import { extname } from "path";
|
|
7
6
|
import { JUDGES, getJudge, getJudgeSummaries } from "../judges/index.js";
|
|
8
7
|
import { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "../evaluators/index.js";
|
|
9
8
|
import { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "../evaluators/v2.js";
|
|
@@ -408,35 +407,9 @@ function registerEvaluateV2(server) {
|
|
|
408
407
|
});
|
|
409
408
|
}
|
|
410
409
|
// ─── evaluate_file ───────────────────────────────────────────────────────────
|
|
411
|
-
|
|
412
|
-
".ts": "typescript",
|
|
413
|
-
".tsx": "typescript",
|
|
414
|
-
".js": "javascript",
|
|
415
|
-
".jsx": "javascript",
|
|
416
|
-
".mjs": "javascript",
|
|
417
|
-
".cjs": "javascript",
|
|
418
|
-
".py": "python",
|
|
419
|
-
".rs": "rust",
|
|
420
|
-
".go": "go",
|
|
421
|
-
".java": "java",
|
|
422
|
-
".cs": "csharp",
|
|
423
|
-
".cpp": "cpp",
|
|
424
|
-
".cc": "cpp",
|
|
425
|
-
".h": "c",
|
|
426
|
-
".hpp": "cpp",
|
|
427
|
-
".ps1": "powershell",
|
|
428
|
-
".psm1": "powershell",
|
|
429
|
-
".bicep": "bicep",
|
|
430
|
-
".tf": "terraform",
|
|
431
|
-
".yaml": "yaml",
|
|
432
|
-
".yml": "yaml",
|
|
433
|
-
};
|
|
410
|
+
import { detectLanguageFromPath as _detectLangShared } from "../ext-to-lang.js";
|
|
434
411
|
function detectLanguageFromPath(filePath) {
|
|
435
|
-
|
|
436
|
-
if (lower.endsWith("dockerfile") || lower.includes("dockerfile."))
|
|
437
|
-
return "dockerfile";
|
|
438
|
-
const ext = extname(lower);
|
|
439
|
-
return EXT_TO_LANG[ext] || "typescript";
|
|
412
|
+
return _detectLangShared(filePath) ?? "typescript";
|
|
440
413
|
}
|
|
441
414
|
function registerEvaluateFile(server) {
|
|
442
415
|
server.tool("evaluate_file", `Read a file from disk and submit it to the full Judges Panel for evaluation. Automatically detects the programming language from the file extension. All ${JUDGES.length} judges review the code with pattern detection and deep contextual analysis.`, {
|
package/package.json
CHANGED
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/kevinrabun/judges",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "3.
|
|
10
|
+
"version": "3.122.0",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@kevinrabun/judges",
|
|
15
|
-
"version": "3.
|
|
15
|
+
"version": "3.122.0",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|
package/src/skill-loader.ts
CHANGED
|
@@ -44,7 +44,7 @@ export function parseSkillFrontmatter(raw: string): { meta: SkillMeta; body: str
|
|
|
44
44
|
i++;
|
|
45
45
|
continue;
|
|
46
46
|
}
|
|
47
|
-
const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)\
|
|
47
|
+
const kv = line.match(/^([a-zA-Z_][a-zA-Z0-9_-]*)[ \t]*:[ \t]*(.*)$/);
|
|
48
48
|
if (!kv) {
|
|
49
49
|
i++;
|
|
50
50
|
continue;
|
|
@@ -85,9 +85,10 @@ export function parseSkillFrontmatter(raw: string): { meta: SkillMeta; body: str
|
|
|
85
85
|
if (typeof value === "string" && ((value.startsWith("[") && value.endsWith("]")) || value.includes(","))) {
|
|
86
86
|
// simple array parsing: split on comma
|
|
87
87
|
const normalized = (value as string)
|
|
88
|
-
.replace(
|
|
89
|
-
.replace(/\]\
|
|
90
|
-
.split(
|
|
88
|
+
.replace(/^[ \t]*\[/, "")
|
|
89
|
+
.replace(/\][ \t]*$/, "")
|
|
90
|
+
.split(",")
|
|
91
|
+
.map((s) => s.trim())
|
|
91
92
|
.filter(Boolean);
|
|
92
93
|
value = normalized;
|
|
93
94
|
} else if (
|
|
@@ -117,13 +118,15 @@ export function validateSkillFrontmatter(meta: SkillMeta, sourcePath: string): S
|
|
|
117
118
|
agents: Array.isArray(meta.agents)
|
|
118
119
|
? (meta.agents as string[])
|
|
119
120
|
: String(meta.agents ?? "")
|
|
120
|
-
.split(
|
|
121
|
+
.split(",")
|
|
122
|
+
.map((s) => s.trim())
|
|
121
123
|
.filter(Boolean),
|
|
122
124
|
tags: Array.isArray(meta.tags)
|
|
123
125
|
? (meta.tags as string[])
|
|
124
126
|
: meta.tags
|
|
125
127
|
? String(meta.tags)
|
|
126
|
-
.split(
|
|
128
|
+
.split(",")
|
|
129
|
+
.map((s) => s.trim())
|
|
127
130
|
.filter(Boolean)
|
|
128
131
|
: undefined,
|
|
129
132
|
priority: meta.priority ? Number(meta.priority) : 10,
|