@kevinrabun/judges 3.123.3 → 3.123.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -1
- package/dist/ast/cross-file-taint.js +2 -40
- package/dist/ast/taint-tracker.d.ts +12 -0
- package/dist/ast/taint-tracker.js +4 -4
- package/dist/evaluators/cybersecurity.js +4 -1
- package/dist/formatters/html.js +1 -25
- package/dist/formatters/pdf.js +1 -24
- package/dist/formatters/shared.d.ts +4 -0
- package/dist/formatters/shared.js +29 -0
- package/package.json +1 -1
- package/server.json +2 -2
package/README.md
CHANGED
|
@@ -573,7 +573,7 @@ Evaluate a file with all 45 judges or a single judge.
|
|
|
573
573
|
| `--baseline <path>` / `-b <path>` | JSON baseline file — suppress known findings |
|
|
574
574
|
| `--summary` | Print a single summary line (ideal for scripts) |
|
|
575
575
|
| `--config <path>` | Load a `.judgesrc` / `.judgesrc.json` config file |
|
|
576
|
-
| `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all
|
|
576
|
+
| `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all 22 options) |
|
|
577
577
|
| `--min-score <n>` | Exit with code 1 if overall score is below this threshold |
|
|
578
578
|
| `--verbose` | Print timing and debug information |
|
|
579
579
|
| `--quiet` | Suppress non-essential output |
|
|
@@ -1130,6 +1130,32 @@ Re-run the tribunal with **prior findings as context** for iterative refinement.
|
|
|
1130
1130
|
| `relatedFiles` | array | no | Cross-file context `{ path, snippet, relationship? }[]` |
|
|
1131
1131
|
| `maxPromptChars` | number | no | Max character budget for LLM prompts (default: 100000, 0 = unlimited) |
|
|
1132
1132
|
|
|
1133
|
+
### Additional MCP Tools
|
|
1134
|
+
|
|
1135
|
+
| Tool | Description |
|
|
1136
|
+
|------|-------------|
|
|
1137
|
+
| `evaluate_file` | Read a file from disk and submit it to the full panel. Auto-detects language from extension. |
|
|
1138
|
+
| `evaluate_code_streaming` | Streaming evaluation — returns per-judge results as each judge completes with running aggregates. |
|
|
1139
|
+
| `evaluate_focused` | Run only specified judges. Use after an initial full evaluation to re-check specific areas. |
|
|
1140
|
+
| `evaluate_batch` | Evaluate multiple code files in a single call. Returns per-file verdicts plus aggregate statistics. |
|
|
1141
|
+
| `evaluate_then_fix` | Evaluate code and automatically generate fix patches for all findings with auto-fix support. |
|
|
1142
|
+
| `evaluate_with_progress` | Evaluate with progress callbacks for long-running evaluations. |
|
|
1143
|
+
| `evaluate_policy_aware` | Policy-aware evaluation with named profiles (startup, regulated, healthcare, fintech, public-sector). |
|
|
1144
|
+
| `fix_code` | Evaluate code and apply all available auto-fix patches. Returns fixed code with applied/remaining summary. |
|
|
1145
|
+
| `explain_finding` | Explain a finding in plain language with OWASP/CWE references, risk context, and remediation guidance. |
|
|
1146
|
+
| `triage_finding` | Set triage status of a finding (accepted-risk, deferred, wont-fix, false-positive) with attribution. |
|
|
1147
|
+
| `record_feedback` | Record user feedback (true-positive, false-positive, wont-fix) to calibrate confidence scores. |
|
|
1148
|
+
| `get_finding_stats` | Finding lifecycle statistics: open, fixed, recurring, and triaged counts plus trends. |
|
|
1149
|
+
| `get_suppression_analytics` | Analyze suppression patterns: FP rates by rule, suppression rates, auto-suppress candidates. |
|
|
1150
|
+
| `list_triaged_findings` | List triaged findings, optionally filtered by triage status. |
|
|
1151
|
+
| `benchmark_gate` | Run benchmarks against quality thresholds. Returns pass/fail with F1, precision, recall metrics. |
|
|
1152
|
+
| `run_benchmark` | Run the full benchmark suite with per-judge, per-category, per-difficulty breakdowns. |
|
|
1153
|
+
| `scaffold_judge` | Generate boilerplate files to add a new judge: definition, evaluator skeleton, and registration. |
|
|
1154
|
+
| `scaffold_plugin` | Generate a starter plugin template with custom rules, judges, and lifecycle hooks. |
|
|
1155
|
+
| `session_status` | Current evaluation session state: evaluation count, frameworks, verdict history, stability. |
|
|
1156
|
+
| `list_files` | List files and directories in the workspace for project exploration. |
|
|
1157
|
+
| `read_file` | Read file contents from the workspace. |
|
|
1158
|
+
|
|
1133
1159
|
#### Judge IDs
|
|
1134
1160
|
|
|
1135
1161
|
`data-security` · `cybersecurity` · `security` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `api-contract` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety` · `iac-security` · `hallucination-detection` · `intent-alignment` · `multi-turn-coherence` · `model-fingerprint` · `over-engineering` · `logic-review` · `false-positive-review`
|
|
@@ -12,47 +12,9 @@
|
|
|
12
12
|
// 4. Run a second pass on importing files with injected cross-file taint seeds
|
|
13
13
|
// 5. Emit CrossFileTaintFlow findings with full file-to-file provenance
|
|
14
14
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
15
|
+
import { SOURCE_PATTERNS, SINK_PATTERNS, isSanitized, } from "./taint-tracker.js";
|
|
15
16
|
import { normalizeLanguage } from "../language-patterns.js";
|
|
16
|
-
// ─── Source / Sink
|
|
17
|
-
const SOURCE_PATTERNS = [
|
|
18
|
-
{ pattern: /\breq(?:uest)?\.(?:body|query|params|headers|cookies)\b/i, kind: "http-param" },
|
|
19
|
-
{ pattern: /\brequest\.(?:form|args|json|data|values|files|get)\b/i, kind: "http-param" },
|
|
20
|
-
{ pattern: /\b(?:ctx|context)\.(?:query|params|request)\b/i, kind: "http-param" },
|
|
21
|
-
{ pattern: /\bgetParameter\s*\(/i, kind: "http-param" },
|
|
22
|
-
{ pattern: /\bRequest\.(?:Form|QueryString|Params)\b/i, kind: "http-param" },
|
|
23
|
-
{ pattern: /\b(?:process\.argv|sys\.argv|os\.Args|args)\b/i, kind: "user-input" },
|
|
24
|
-
{ pattern: /\b(?:prompt|readline|input)\s*\(/i, kind: "user-input" },
|
|
25
|
-
{ pattern: /\bsearchParams\.get\s*\(/i, kind: "url-param" },
|
|
26
|
-
{ pattern: /\.(?:useSearchParams|useParams)\b/i, kind: "url-param" },
|
|
27
|
-
];
|
|
28
|
-
const SINK_PATTERNS = [
|
|
29
|
-
{ pattern: /\beval\s*\(/i, kind: "code-execution" },
|
|
30
|
-
{ pattern: /\bnew\s+Function\s*\(/i, kind: "code-execution" },
|
|
31
|
-
{ pattern: /\b(?:exec|execSync|system|popen|subprocess\.(?:Popen|run|call)|os\.system)\s*\(/i, kind: "command-exec" },
|
|
32
|
-
{ pattern: /\b(?:spawn|spawnSync)\s*\(/i, kind: "command-exec" },
|
|
33
|
-
{ pattern: /\.(?:query|execute|exec)\s*\(/i, kind: "sql-query" },
|
|
34
|
-
{ pattern: /\.innerHTML\s*=/i, kind: "xss" },
|
|
35
|
-
{ pattern: /\bdocument\.write\s*\(/i, kind: "xss" },
|
|
36
|
-
{ pattern: /\bdangerouslySetInnerHTML/i, kind: "xss" },
|
|
37
|
-
{ pattern: /\b(?:readFile|readFileSync|open)\s*\(/i, kind: "path-traversal" },
|
|
38
|
-
{ pattern: /\.redirect\s*\(/i, kind: "redirect" },
|
|
39
|
-
{ pattern: /\b(?:render_template_string|nunjucks\.renderString|Handlebars\.compile)\s*\(/i, kind: "template" },
|
|
40
|
-
];
|
|
41
|
-
const SANITIZER_PATTERNS = [
|
|
42
|
-
/\bDOMPurify\.sanitize\s*\(/i,
|
|
43
|
-
/\bsanitizeHtml\s*\(/i,
|
|
44
|
-
/\bescapeHtml\s*\(/i,
|
|
45
|
-
/\bencodeURIComponent\s*\(/i,
|
|
46
|
-
/\bvalidator\.\w+\s*\(/i,
|
|
47
|
-
/\b(?:joi|yup|zod|ajv)\b.*\.(?:validate|parse|safeParse)\s*\(/i,
|
|
48
|
-
/\$\d+/,
|
|
49
|
-
/\?\s*(?:,|\))/,
|
|
50
|
-
/\bpath\.(?:normalize|resolve|basename)\s*\(/i,
|
|
51
|
-
/\bPreparedStatement\b/i,
|
|
52
|
-
];
|
|
53
|
-
function isSanitized(expression) {
|
|
54
|
-
return SANITIZER_PATTERNS.some((p) => p.test(expression));
|
|
55
|
-
}
|
|
17
|
+
// ─── Source / Sink patterns imported from taint-tracker.ts ───────────────────
|
|
56
18
|
// ─── Export Analysis ─────────────────────────────────────────────────────────
|
|
57
19
|
/**
|
|
58
20
|
* Analyze a file's exports to find which exported bindings carry taint.
|
|
@@ -24,6 +24,18 @@ export interface TaintFlow {
|
|
|
24
24
|
}
|
|
25
25
|
export type TaintSourceKind = "http-param" | "user-input" | "environment" | "url-param" | "external-data";
|
|
26
26
|
export type TaintSinkKind = "code-execution" | "command-exec" | "sql-query" | "xss" | "path-traversal" | "redirect" | "template" | "deserialization";
|
|
27
|
+
export declare const SOURCE_PATTERNS: Array<{
|
|
28
|
+
pattern: RegExp;
|
|
29
|
+
kind: TaintSourceKind;
|
|
30
|
+
}>;
|
|
31
|
+
export declare const SINK_PATTERNS: Array<{
|
|
32
|
+
pattern: RegExp;
|
|
33
|
+
kind: TaintSinkKind;
|
|
34
|
+
}>;
|
|
35
|
+
/** Known sanitizer/escaping functions that neutralize taint */
|
|
36
|
+
export declare const SANITIZER_PATTERNS: RegExp[];
|
|
37
|
+
/** Check if a code expression passes through a known sanitizer */
|
|
38
|
+
export declare function isSanitized(expression: string): boolean;
|
|
27
39
|
/**
|
|
28
40
|
* Analyze a source file for taint flows: paths from untrusted input to
|
|
29
41
|
* dangerous sinks through variable assignments and string concatenation.
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import ts from "typescript";
|
|
15
15
|
import { normalizeLanguage } from "../language-patterns.js";
|
|
16
16
|
// ─── Source / Sink Definitions ───────────────────────────────────────────────
|
|
17
|
-
const SOURCE_PATTERNS = [
|
|
17
|
+
export const SOURCE_PATTERNS = [
|
|
18
18
|
{ pattern: /\breq(?:uest)?\.(?:body|query|params|headers|cookies)\b/i, kind: "http-param" },
|
|
19
19
|
{ pattern: /\brequest\.(?:form|args|json|data|values|files|get)\b/i, kind: "http-param" },
|
|
20
20
|
{ pattern: /\b(?:ctx|context)\.(?:query|params|request)\b/i, kind: "http-param" },
|
|
@@ -25,7 +25,7 @@ const SOURCE_PATTERNS = [
|
|
|
25
25
|
{ pattern: /\bsearchParams\.get\s*\(/i, kind: "url-param" },
|
|
26
26
|
{ pattern: /\.(?:useSearchParams|useParams)\b/i, kind: "url-param" },
|
|
27
27
|
];
|
|
28
|
-
const SINK_PATTERNS = [
|
|
28
|
+
export const SINK_PATTERNS = [
|
|
29
29
|
{ pattern: /\beval\s*\(/i, kind: "code-execution" },
|
|
30
30
|
{ pattern: /\bnew\s+Function\s*\(/i, kind: "code-execution" },
|
|
31
31
|
{ pattern: /\bvm\.run(?:InContext|InNewContext|InThisContext)?\s*\(/i, kind: "code-execution" },
|
|
@@ -49,7 +49,7 @@ const SINK_PATTERNS = [
|
|
|
49
49
|
];
|
|
50
50
|
// ─── Sanitizer Recognition ──────────────────────────────────────────────────
|
|
51
51
|
/** Known sanitizer/escaping functions that neutralize taint */
|
|
52
|
-
const SANITIZER_PATTERNS = [
|
|
52
|
+
export const SANITIZER_PATTERNS = [
|
|
53
53
|
// DOM / HTML sanitizers
|
|
54
54
|
/\bDOMPurify\.sanitize\s*\(/i,
|
|
55
55
|
/\bsanitizeHtml\s*\(/i,
|
|
@@ -80,7 +80,7 @@ const SANITIZER_PATTERNS = [
|
|
|
80
80
|
/\b(?:html|url)\.EscapeString\s*\(/i,
|
|
81
81
|
];
|
|
82
82
|
/** Check if a code expression passes through a known sanitizer */
|
|
83
|
-
function isSanitized(expression) {
|
|
83
|
+
export function isSanitized(expression) {
|
|
84
84
|
return SANITIZER_PATTERNS.some((p) => p.test(expression));
|
|
85
85
|
}
|
|
86
86
|
// ─── Guard Clause Detection ─────────────────────────────────────────────────
|
|
@@ -761,7 +761,10 @@ export function analyzeCybersecurity(code, language, context) {
|
|
|
761
761
|
if (lang === "rust") {
|
|
762
762
|
const unsafeLines = getLineNumbers(code, /\bunsafe\s*\{/g);
|
|
763
763
|
if (unsafeLines.length > 0) {
|
|
764
|
-
|
|
764
|
+
// Check raw code (not comment-stripped) because SAFETY docs ARE comments
|
|
765
|
+
const safetyDocRe = /\/\/\s*SAFETY:|\/\/\s*UNSAFE:|#\[allow\(unsafe_code\)\]/gi;
|
|
766
|
+
safetyDocRe.lastIndex = 0;
|
|
767
|
+
const hasSafetyDoc = safetyDocRe.test(code);
|
|
765
768
|
if (!hasSafetyDoc) {
|
|
766
769
|
findings.push({
|
|
767
770
|
ruleId: `${prefix}-${String(ruleNum++).padStart(3, "0")}`,
|
package/dist/formatters/html.js
CHANGED
|
@@ -8,32 +8,8 @@
|
|
|
8
8
|
* - Finding details with line numbers and suggested fixes
|
|
9
9
|
* - Dark/light theme auto-detection
|
|
10
10
|
*/
|
|
11
|
-
|
|
12
|
-
function esc(text) {
|
|
13
|
-
return text
|
|
14
|
-
.replace(/&/g, "&")
|
|
15
|
-
.replace(/</g, "<")
|
|
16
|
-
.replace(/>/g, ">")
|
|
17
|
-
.replace(/"/g, """)
|
|
18
|
-
.replace(/'/g, "'");
|
|
19
|
-
}
|
|
11
|
+
import { esc, severityColor } from "./shared.js";
|
|
20
12
|
// ─── Severity Colors ────────────────────────────────────────────────────────
|
|
21
|
-
function severityColor(severity) {
|
|
22
|
-
switch (severity) {
|
|
23
|
-
case "critical":
|
|
24
|
-
return "#dc2626";
|
|
25
|
-
case "high":
|
|
26
|
-
return "#ea580c";
|
|
27
|
-
case "medium":
|
|
28
|
-
return "#ca8a04";
|
|
29
|
-
case "low":
|
|
30
|
-
return "#2563eb";
|
|
31
|
-
case "info":
|
|
32
|
-
return "#6b7280";
|
|
33
|
-
default:
|
|
34
|
-
return "#6b7280";
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
13
|
function verdictColor(verdict) {
|
|
38
14
|
switch (verdict) {
|
|
39
15
|
case "pass":
|
package/dist/formatters/pdf.js
CHANGED
|
@@ -8,30 +8,7 @@
|
|
|
8
8
|
* - @media print styles for proper pagination
|
|
9
9
|
* - Executive summary on first page
|
|
10
10
|
*/
|
|
11
|
-
|
|
12
|
-
return text
|
|
13
|
-
.replace(/&/g, "&")
|
|
14
|
-
.replace(/</g, "<")
|
|
15
|
-
.replace(/>/g, ">")
|
|
16
|
-
.replace(/"/g, """)
|
|
17
|
-
.replace(/'/g, "'");
|
|
18
|
-
}
|
|
19
|
-
function severityColor(severity) {
|
|
20
|
-
switch (severity) {
|
|
21
|
-
case "critical":
|
|
22
|
-
return "#dc2626";
|
|
23
|
-
case "high":
|
|
24
|
-
return "#ea580c";
|
|
25
|
-
case "medium":
|
|
26
|
-
return "#ca8a04";
|
|
27
|
-
case "low":
|
|
28
|
-
return "#2563eb";
|
|
29
|
-
case "info":
|
|
30
|
-
return "#6b7280";
|
|
31
|
-
default:
|
|
32
|
-
return "#6b7280";
|
|
33
|
-
}
|
|
34
|
-
}
|
|
11
|
+
import { esc, severityColor } from "./shared.js";
|
|
35
12
|
function renderFinding(f) {
|
|
36
13
|
const color = severityColor(f.severity);
|
|
37
14
|
return `<tr>
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
// ─── Shared Formatter Utilities ──────────────────────────────────────────────
|
|
2
|
+
// Common helpers used by multiple output formatters (HTML, PDF, etc.).
|
|
3
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
/** HTML-escape a string to prevent XSS in generated reports. */
|
|
5
|
+
export function esc(text) {
|
|
6
|
+
return text
|
|
7
|
+
.replace(/&/g, "&")
|
|
8
|
+
.replace(/</g, "<")
|
|
9
|
+
.replace(/>/g, ">")
|
|
10
|
+
.replace(/"/g, """)
|
|
11
|
+
.replace(/'/g, "'");
|
|
12
|
+
}
|
|
13
|
+
/** Map a severity level to its display hex color. */
|
|
14
|
+
export function severityColor(severity) {
|
|
15
|
+
switch (severity) {
|
|
16
|
+
case "critical":
|
|
17
|
+
return "#dc2626";
|
|
18
|
+
case "high":
|
|
19
|
+
return "#ea580c";
|
|
20
|
+
case "medium":
|
|
21
|
+
return "#ca8a04";
|
|
22
|
+
case "low":
|
|
23
|
+
return "#2563eb";
|
|
24
|
+
case "info":
|
|
25
|
+
return "#6b7280";
|
|
26
|
+
default:
|
|
27
|
+
return "#6b7280";
|
|
28
|
+
}
|
|
29
|
+
}
|
package/package.json
CHANGED
package/server.json
CHANGED
|
@@ -16,12 +16,12 @@
|
|
|
16
16
|
"mimeType": "image/png"
|
|
17
17
|
}
|
|
18
18
|
],
|
|
19
|
-
"version": "3.123.
|
|
19
|
+
"version": "3.123.4",
|
|
20
20
|
"packages": [
|
|
21
21
|
{
|
|
22
22
|
"registryType": "npm",
|
|
23
23
|
"identifier": "@kevinrabun/judges",
|
|
24
|
-
"version": "3.123.
|
|
24
|
+
"version": "3.123.4",
|
|
25
25
|
"transport": {
|
|
26
26
|
"type": "stdio"
|
|
27
27
|
}
|