@kevinrabun/judges 3.117.2 โ 3.117.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/dist/api.d.ts +3 -3
- package/dist/api.js +3 -3
- package/dist/evaluators/index.js +14 -10
- package/package.json +1 -1
- package/server.json +2 -2
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@ An MCP (Model Context Protocol) server that provides a panel of **45 specialized
|
|
|
15
15
|
[](https://www.npmjs.com/package/@kevinrabun/judges)
|
|
16
16
|
[](https://www.npmjs.com/package/@kevinrabun/judges)
|
|
17
17
|
[](https://opensource.org/licenses/MIT)
|
|
18
|
-
[](https://github.com/KevinRabun/judges/actions)
|
|
19
19
|
|
|
20
20
|
> ๐ฐ **Packages**
|
|
21
21
|
> - **CLI**: `@kevinrabun/judges-cli` โ binary `judges` (use `npx @kevinrabun/judges-cli eval --file app.ts`).
|
|
@@ -84,7 +84,7 @@ npx @kevinrabun/judges mcp
|
|
|
84
84
|
|
|
85
85
|
## Why Judges?
|
|
86
86
|
|
|
87
|
-
AI code generators (Copilot, Cursor, Claude, ChatGPT, etc.) write code fast โ but they routinely produce **insecure defaults, missing auth, hardcoded secrets, and poor error handling**. Human reviewers catch some of this, but nobody reviews
|
|
87
|
+
AI code generators (Copilot, Cursor, Claude, ChatGPT, etc.) write code fast โ but they routinely produce **insecure defaults, missing auth, hardcoded secrets, and poor error handling**. Human reviewers catch some of this, but nobody reviews 45 dimensions consistently.
|
|
88
88
|
|
|
89
89
|
| | ESLint / Biome | SonarQube | Semgrep / CodeQL | **Judges** |
|
|
90
90
|
|---|---|---|---|---|
|
|
@@ -550,13 +550,13 @@ Evaluate a file with all 45 judges or a single judge.
|
|
|
550
550
|
| `--file <path>` / positional | File to evaluate |
|
|
551
551
|
| `--judge <id>` / `-j <id>` | Single judge mode |
|
|
552
552
|
| `--language <lang>` / `-l <lang>` | Language hint (auto-detected from extension) |
|
|
553
|
-
| `--format <fmt>` / `-f <fmt>` | Output format: `text`, `json`, `sarif`, `markdown`, `html`, `junit`, `codeclimate` |
|
|
553
|
+
| `--format <fmt>` / `-f <fmt>` | Output format: `text`, `json`, `sarif`, `markdown`, `html`, `pdf`, `junit`, `codeclimate`, `github-actions` |
|
|
554
554
|
| `--output <path>` / `-o <path>` | Write output to file |
|
|
555
555
|
| `--fail-on-findings` | Exit with code 1 if verdict is FAIL |
|
|
556
556
|
| `--baseline <path>` / `-b <path>` | JSON baseline file โ suppress known findings |
|
|
557
557
|
| `--summary` | Print a single summary line (ideal for scripts) |
|
|
558
558
|
| `--config <path>` | Load a `.judgesrc` / `.judgesrc.json` config file |
|
|
559
|
-
| `--preset <name>` | Use a named preset
|
|
559
|
+
| `--preset <name>` | Use a named preset (see [Named Presets](#named-presets) for all 18 options) |
|
|
560
560
|
| `--min-score <n>` | Exit with code 1 if overall score is below this threshold |
|
|
561
561
|
| `--verbose` | Print timing and debug information |
|
|
562
562
|
| `--quiet` | Suppress non-essential output |
|
|
@@ -696,10 +696,24 @@ Use `--preset` to apply pre-configured evaluation settings:
|
|
|
696
696
|
|--------|-------------|
|
|
697
697
|
| `strict` | All severities, all judges โ maximum thoroughness |
|
|
698
698
|
| `lenient` | Only high and critical findings โ fast and focused |
|
|
699
|
-
| `security-only` | Security judges
|
|
699
|
+
| `security-only` | Security-focused โ disables non-security judges (cost, scalability, docs, a11y, i18n, UX, etc.) |
|
|
700
700
|
| `startup` | Skip compliance, sovereignty, i18n judges โ move fast |
|
|
701
701
|
| `compliance` | Only compliance, data-sovereignty, authentication โ regulatory focus |
|
|
702
702
|
| `performance` | Only performance, scalability, caching, cost-effectiveness |
|
|
703
|
+
| `react` | Tuned for React/Next.js apps โ enables accessibility, XSS protection |
|
|
704
|
+
| `express` | Tuned for Express.js APIs โ middleware security, auth, CORS, rate limiting |
|
|
705
|
+
| `fastapi` | Tuned for Python FastAPI โ input validation, async patterns, API security |
|
|
706
|
+
| `django` | Tuned for Django apps โ template security, ORM misuse, CSRF |
|
|
707
|
+
| `spring-boot` | Tuned for Java Spring Boot โ injection, configuration, actuator security |
|
|
708
|
+
| `rails` | Tuned for Ruby on Rails โ mass assignment, CSRF, SQL injection |
|
|
709
|
+
| `nextjs` | Tuned for Next.js โ server/client security, API routes, SSR/ISR |
|
|
710
|
+
| `terraform` | Tuned for Terraform/OpenTofu IaC โ infrastructure security, compliance |
|
|
711
|
+
| `kubernetes` | Tuned for K8s manifests โ security contexts, RBAC, resource limits |
|
|
712
|
+
| `onboarding` | Smart defaults for first-time adoption โ suppresses noisy rules |
|
|
713
|
+
| `fintech` | Financial services โ PCI DSS, cryptography, authentication, audit |
|
|
714
|
+
| `healthtech` | Healthcare โ HIPAA compliance, data sovereignty, encryption, audit trails |
|
|
715
|
+
| `saas` | Multi-tenant SaaS โ tenant isolation, rate limiting, scalability |
|
|
716
|
+
| `government` | Government/public sector โ compliance, sovereignty, authentication |
|
|
703
717
|
|
|
704
718
|
```bash
|
|
705
719
|
judges eval --preset security-only src/api.ts
|
|
@@ -1155,13 +1169,13 @@ Create a `.judgesrc.json` (or `.judgesrc`) file in your project root to customiz
|
|
|
1155
1169
|
| Field | Type | Default | Description |
|
|
1156
1170
|
|-------|------|---------|-------------|
|
|
1157
1171
|
| `$schema` | `string` | โ | JSON Schema URL for IDE validation |
|
|
1158
|
-
| `preset` | `string` | โ | Named preset
|
|
1172
|
+
| `preset` | `string` | โ | Named preset (see [Named Presets](#named-presets) for all 18 options) |
|
|
1159
1173
|
| `minSeverity` | `string` | `"info"` | Minimum severity to report: `critical` ยท `high` ยท `medium` ยท `low` ยท `info` |
|
|
1160
1174
|
| `disabledRules` | `string[]` | `[]` | Rule IDs or prefix wildcards to suppress (e.g. `"COST-*"`, `"SEC-003"`) |
|
|
1161
1175
|
| `disabledJudges` | `string[]` | `[]` | Judge IDs to skip entirely (e.g. `"cost-effectiveness"`) |
|
|
1162
1176
|
| `ruleOverrides` | `object` | `{}` | Per-rule overrides keyed by rule ID or wildcard โ `{ disabled?: boolean, severity?: string }` |
|
|
1163
1177
|
| `languages` | `string[]` | `[]` | Restrict analysis to specific languages (empty = all) |
|
|
1164
|
-
| `format` | `string` | `"text"` | Default output format: `text` ยท `json` ยท `sarif` ยท `markdown` ยท `html` ยท `junit` ยท `codeclimate` |
|
|
1178
|
+
| `format` | `string` | `"text"` | Default output format: `text` ยท `json` ยท `sarif` ยท `markdown` ยท `html` ยท `pdf` ยท `junit` ยท `codeclimate` ยท `github-actions` |
|
|
1165
1179
|
| `failOnFindings` | `boolean` | `false` | Exit code 1 when verdict is `fail` โ useful for CI gates |
|
|
1166
1180
|
| `baseline` | `string` | `""` | Path to a baseline JSON file โ matching findings are suppressed |
|
|
1167
1181
|
|
package/dist/api.d.ts
CHANGED
|
@@ -10,13 +10,13 @@
|
|
|
10
10
|
*/
|
|
11
11
|
export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, SuppressionRecord, SuppressionResult, ExecutionTrace, RuleTrace, StreamingBatch, JudgeSelectionContext, JudgeSelectionResult, SessionContext, } from "./types.js";
|
|
12
12
|
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
|
-
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
13
|
+
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
14
14
|
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
15
|
-
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
15
|
+
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, crossFileDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
16
16
|
export type { FindingDiff, NetChangeGateOptions, NetChangeGateResult, EvaluationOptions } from "./evaluators/index.js";
|
|
17
17
|
export { selectJudges } from "./evaluators/judge-selector.js";
|
|
18
18
|
export { EvaluationSession, getGlobalSession, resetGlobalSession } from "./evaluation-session.js";
|
|
19
|
-
export { getPreset, composePresets, PRESETS } from "./presets.js";
|
|
19
|
+
export { getPreset, composePresets, listPresets, PRESETS } from "./presets.js";
|
|
20
20
|
export type { Preset } from "./presets.js";
|
|
21
21
|
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
22
22
|
export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
|
package/dist/api.js
CHANGED
|
@@ -11,17 +11,17 @@
|
|
|
11
11
|
// โโโ Errors โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
12
12
|
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
13
|
// โโโ Config โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
14
|
-
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
14
|
+
export { parseConfig, defaultConfig, mergeConfigs, discoverCascadingConfigs, loadCascadingConfig, loadConfigFile, expandEnvPlaceholders, loadPluginJudges, validatePluginSpecifiers, isValidJudgeDefinition, validateJudgeDefinition, applyOverridesForFile, applyLanguageProfile, resolveExtendsConfig, } from "./config.js";
|
|
15
15
|
// โโโ Judge Registry โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
16
16
|
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
17
17
|
// โโโ Core Evaluation Functions โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
18
|
-
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
18
|
+
export { evaluateWithJudge, evaluateWithTribunal, evaluateWithTribunalStreaming, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, crossFileDedup, diffFindings, formatFindingDiff, evaluateNetChangeGate, applyInlineSuppressions, applyInlineSuppressionsWithAudit, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, clearEvaluationCaches, scanProjectWideSecurityPatterns, } from "./evaluators/index.js";
|
|
19
19
|
// โโโ Adaptive Judge Selection โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
20
20
|
export { selectJudges } from "./evaluators/judge-selector.js";
|
|
21
21
|
// โโโ Evaluation Session โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
22
22
|
export { EvaluationSession, getGlobalSession, resetGlobalSession } from "./evaluation-session.js";
|
|
23
23
|
// โโโ Presets โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
24
|
-
export { getPreset, composePresets, PRESETS } from "./presets.js";
|
|
24
|
+
export { getPreset, composePresets, listPresets, PRESETS } from "./presets.js";
|
|
25
25
|
// โโโ V2 Policy-Aware API โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
26
26
|
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
27
27
|
// โโโ Cross-File Taint Analysis โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
package/dist/evaluators/index.js
CHANGED
|
@@ -435,16 +435,8 @@ export function evaluateWithJudge(judge, code, language, context, options) {
|
|
|
435
435
|
: undefined;
|
|
436
436
|
findings.push(...judge.analyze(code, language, analyzeCtx));
|
|
437
437
|
}
|
|
438
|
-
//
|
|
439
|
-
|
|
440
|
-
if (boostResult.findings.length > 0) {
|
|
441
|
-
// Deduplicate: only add boost findings whose ruleId isn't already present
|
|
442
|
-
for (const bf of boostResult.findings) {
|
|
443
|
-
if (!findings.some((f) => f.ruleId === bf.ruleId)) {
|
|
444
|
-
findings.push(bf);
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
}
|
|
438
|
+
// NOTE: Recall boost (applyRecallBoost) is applied once in evaluateWithTribunal()
|
|
439
|
+
// rather than per-judge, to avoid generating N duplicate boost findings.
|
|
448
440
|
// โโ Absence gating โโ
|
|
449
441
|
// Absence-based findings ("no rate limiting", "no monitoring", etc.) are
|
|
450
442
|
// project-level concerns that cannot be accurately assessed from a single
|
|
@@ -706,6 +698,18 @@ export function evaluateWithTribunal(code, language, context, options) {
|
|
|
706
698
|
? "warning"
|
|
707
699
|
: "pass";
|
|
708
700
|
const rawFindings = evaluations.flatMap((e) => e.findings);
|
|
701
|
+
// โโ Recall boost (once, not per-judge) โโ
|
|
702
|
+
// Apply supplementary recall-boost patterns a single time and merge into
|
|
703
|
+
// the raw findings before cross-evaluator dedup. Previously this ran
|
|
704
|
+
// inside evaluateWithJudge(), producing N identical copies per judge.
|
|
705
|
+
const boostResult = applyRecallBoost(code, language);
|
|
706
|
+
if (boostResult.findings.length > 0) {
|
|
707
|
+
for (const bf of boostResult.findings) {
|
|
708
|
+
if (!rawFindings.some((f) => f.ruleId === bf.ruleId)) {
|
|
709
|
+
rawFindings.push(bf);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
}
|
|
709
713
|
const dedupedFindings = crossEvaluatorDedup(rawFindings);
|
|
710
714
|
const { filtered: fpFiltered } = filterFalsePositiveHeuristics(dedupedFindings, code, language, enrichedOptions?.filePath);
|
|
711
715
|
const configFiltered = applyConfig(fpFiltered, options?.config);
|
package/package.json
CHANGED
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/kevinrabun/judges",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "3.117.
|
|
10
|
+
"version": "3.117.4",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@kevinrabun/judges",
|
|
15
|
-
"version": "3.117.
|
|
15
|
+
"version": "3.117.4",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|