@kevinrabun/judges 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +203 -20
- package/dist/api.d.ts +40 -0
- package/dist/api.d.ts.map +1 -0
- package/dist/api.js +56 -0
- package/dist/api.js.map +1 -0
- package/dist/ast/cross-file-taint.d.ts +43 -0
- package/dist/ast/cross-file-taint.d.ts.map +1 -0
- package/dist/ast/cross-file-taint.js +713 -0
- package/dist/ast/cross-file-taint.js.map +1 -0
- package/dist/ast/index.d.ts +4 -0
- package/dist/ast/index.d.ts.map +1 -1
- package/dist/ast/index.js +5 -0
- package/dist/ast/index.js.map +1 -1
- package/dist/ast/structural-parser.d.ts.map +1 -1
- package/dist/ast/structural-parser.js +66 -11
- package/dist/ast/structural-parser.js.map +1 -1
- package/dist/ast/taint-tracker.d.ts +35 -0
- package/dist/ast/taint-tracker.d.ts.map +1 -0
- package/dist/ast/taint-tracker.js +518 -0
- package/dist/ast/taint-tracker.js.map +1 -0
- package/dist/ast/types.d.ts +2 -0
- package/dist/ast/types.d.ts.map +1 -1
- package/dist/ast/typescript-ast.d.ts.map +1 -1
- package/dist/ast/typescript-ast.js +25 -5
- package/dist/ast/typescript-ast.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +10 -9
- package/dist/config.js.map +1 -1
- package/dist/dedup.d.ts +19 -0
- package/dist/dedup.d.ts.map +1 -0
- package/dist/dedup.js +222 -0
- package/dist/dedup.js.map +1 -0
- package/dist/errors.d.ts +37 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +57 -0
- package/dist/errors.js.map +1 -0
- package/dist/evaluators/accessibility.d.ts +1 -1
- package/dist/evaluators/accessibility.d.ts.map +1 -1
- package/dist/evaluators/accessibility.js +45 -7
- package/dist/evaluators/accessibility.js.map +1 -1
- package/dist/evaluators/agent-instructions.d.ts +1 -1
- package/dist/evaluators/agent-instructions.d.ts.map +1 -1
- package/dist/evaluators/agent-instructions.js +60 -2
- package/dist/evaluators/agent-instructions.js.map +1 -1
- package/dist/evaluators/ai-code-safety.d.ts +9 -0
- package/dist/evaluators/ai-code-safety.d.ts.map +1 -0
- package/dist/evaluators/ai-code-safety.js +507 -0
- package/dist/evaluators/ai-code-safety.js.map +1 -0
- package/dist/evaluators/api-design.d.ts +1 -1
- package/dist/evaluators/api-design.d.ts.map +1 -1
- package/dist/evaluators/api-design.js +33 -17
- package/dist/evaluators/api-design.js.map +1 -1
- package/dist/evaluators/app-builder.d.ts +34 -0
- package/dist/evaluators/app-builder.d.ts.map +1 -0
- package/dist/evaluators/app-builder.js +156 -0
- package/dist/evaluators/app-builder.js.map +1 -0
- package/dist/evaluators/authentication.d.ts +1 -1
- package/dist/evaluators/authentication.d.ts.map +1 -1
- package/dist/evaluators/authentication.js +69 -75
- package/dist/evaluators/authentication.js.map +1 -1
- package/dist/evaluators/backwards-compatibility.d.ts +1 -1
- package/dist/evaluators/backwards-compatibility.d.ts.map +1 -1
- package/dist/evaluators/backwards-compatibility.js +25 -3
- package/dist/evaluators/backwards-compatibility.js.map +1 -1
- package/dist/evaluators/caching.d.ts +1 -1
- package/dist/evaluators/caching.d.ts.map +1 -1
- package/dist/evaluators/caching.js +25 -4
- package/dist/evaluators/caching.js.map +1 -1
- package/dist/evaluators/ci-cd.d.ts +1 -1
- package/dist/evaluators/ci-cd.d.ts.map +1 -1
- package/dist/evaluators/ci-cd.js +34 -12
- package/dist/evaluators/ci-cd.js.map +1 -1
- package/dist/evaluators/cloud-readiness.d.ts +1 -1
- package/dist/evaluators/cloud-readiness.d.ts.map +1 -1
- package/dist/evaluators/cloud-readiness.js +26 -0
- package/dist/evaluators/cloud-readiness.js.map +1 -1
- package/dist/evaluators/code-structure.d.ts +1 -1
- package/dist/evaluators/code-structure.d.ts.map +1 -1
- package/dist/evaluators/code-structure.js +19 -6
- package/dist/evaluators/code-structure.js.map +1 -1
- package/dist/evaluators/compliance.d.ts +1 -1
- package/dist/evaluators/compliance.d.ts.map +1 -1
- package/dist/evaluators/compliance.js +48 -10
- package/dist/evaluators/compliance.js.map +1 -1
- package/dist/evaluators/concurrency.d.ts +1 -1
- package/dist/evaluators/concurrency.d.ts.map +1 -1
- package/dist/evaluators/concurrency.js +29 -4
- package/dist/evaluators/concurrency.js.map +1 -1
- package/dist/evaluators/configuration-management.d.ts +1 -1
- package/dist/evaluators/configuration-management.d.ts.map +1 -1
- package/dist/evaluators/configuration-management.js +57 -13
- package/dist/evaluators/configuration-management.js.map +1 -1
- package/dist/evaluators/cost-effectiveness.d.ts +1 -1
- package/dist/evaluators/cost-effectiveness.d.ts.map +1 -1
- package/dist/evaluators/cost-effectiveness.js +27 -3
- package/dist/evaluators/cost-effectiveness.js.map +1 -1
- package/dist/evaluators/cybersecurity.d.ts +1 -1
- package/dist/evaluators/cybersecurity.d.ts.map +1 -1
- package/dist/evaluators/cybersecurity.js +190 -1
- package/dist/evaluators/cybersecurity.js.map +1 -1
- package/dist/evaluators/data-security.d.ts +1 -1
- package/dist/evaluators/data-security.d.ts.map +1 -1
- package/dist/evaluators/data-security.js +114 -66
- package/dist/evaluators/data-security.js.map +1 -1
- package/dist/evaluators/data-sovereignty.d.ts +1 -1
- package/dist/evaluators/data-sovereignty.d.ts.map +1 -1
- package/dist/evaluators/data-sovereignty.js +89 -2
- package/dist/evaluators/data-sovereignty.js.map +1 -1
- package/dist/evaluators/database.d.ts +1 -1
- package/dist/evaluators/database.d.ts.map +1 -1
- package/dist/evaluators/database.js +35 -9
- package/dist/evaluators/database.js.map +1 -1
- package/dist/evaluators/dependencies.d.ts +6 -0
- package/dist/evaluators/dependencies.d.ts.map +1 -0
- package/dist/evaluators/dependencies.js +204 -0
- package/dist/evaluators/dependencies.js.map +1 -0
- package/dist/evaluators/dependency-health.d.ts +1 -1
- package/dist/evaluators/dependency-health.d.ts.map +1 -1
- package/dist/evaluators/dependency-health.js +265 -11
- package/dist/evaluators/dependency-health.js.map +1 -1
- package/dist/evaluators/documentation.d.ts +1 -1
- package/dist/evaluators/documentation.d.ts.map +1 -1
- package/dist/evaluators/documentation.js +25 -2
- package/dist/evaluators/documentation.js.map +1 -1
- package/dist/evaluators/error-handling.d.ts +1 -1
- package/dist/evaluators/error-handling.d.ts.map +1 -1
- package/dist/evaluators/error-handling.js +89 -24
- package/dist/evaluators/error-handling.js.map +1 -1
- package/dist/evaluators/ethics-bias.d.ts +1 -1
- package/dist/evaluators/ethics-bias.d.ts.map +1 -1
- package/dist/evaluators/ethics-bias.js +30 -5
- package/dist/evaluators/ethics-bias.js.map +1 -1
- package/dist/evaluators/framework-safety.d.ts +13 -0
- package/dist/evaluators/framework-safety.d.ts.map +1 -0
- package/dist/evaluators/framework-safety.js +424 -0
- package/dist/evaluators/framework-safety.js.map +1 -0
- package/dist/evaluators/index.d.ts +21 -24
- package/dist/evaluators/index.d.ts.map +1 -1
- package/dist/evaluators/index.js +297 -677
- package/dist/evaluators/index.js.map +1 -1
- package/dist/evaluators/internationalization.d.ts +1 -1
- package/dist/evaluators/internationalization.d.ts.map +1 -1
- package/dist/evaluators/internationalization.js +55 -4
- package/dist/evaluators/internationalization.js.map +1 -1
- package/dist/evaluators/logging-privacy.d.ts +1 -1
- package/dist/evaluators/logging-privacy.d.ts.map +1 -1
- package/dist/evaluators/logging-privacy.js +68 -30
- package/dist/evaluators/logging-privacy.js.map +1 -1
- package/dist/evaluators/maintainability.d.ts +1 -1
- package/dist/evaluators/maintainability.d.ts.map +1 -1
- package/dist/evaluators/maintainability.js +53 -26
- package/dist/evaluators/maintainability.js.map +1 -1
- package/dist/evaluators/observability.d.ts +1 -1
- package/dist/evaluators/observability.d.ts.map +1 -1
- package/dist/evaluators/observability.js +22 -1
- package/dist/evaluators/observability.js.map +1 -1
- package/dist/evaluators/performance.d.ts +1 -1
- package/dist/evaluators/performance.d.ts.map +1 -1
- package/dist/evaluators/performance.js +209 -2
- package/dist/evaluators/performance.js.map +1 -1
- package/dist/evaluators/portability.d.ts +1 -1
- package/dist/evaluators/portability.d.ts.map +1 -1
- package/dist/evaluators/portability.js +24 -1
- package/dist/evaluators/portability.js.map +1 -1
- package/dist/evaluators/project.d.ts +16 -0
- package/dist/evaluators/project.d.ts.map +1 -0
- package/dist/evaluators/project.js +353 -0
- package/dist/evaluators/project.js.map +1 -0
- package/dist/evaluators/rate-limiting.d.ts +1 -1
- package/dist/evaluators/rate-limiting.d.ts.map +1 -1
- package/dist/evaluators/rate-limiting.js +33 -10
- package/dist/evaluators/rate-limiting.js.map +1 -1
- package/dist/evaluators/reliability.d.ts +1 -1
- package/dist/evaluators/reliability.d.ts.map +1 -1
- package/dist/evaluators/reliability.js +20 -0
- package/dist/evaluators/reliability.js.map +1 -1
- package/dist/evaluators/scalability.d.ts +1 -1
- package/dist/evaluators/scalability.d.ts.map +1 -1
- package/dist/evaluators/scalability.js +27 -1
- package/dist/evaluators/scalability.js.map +1 -1
- package/dist/evaluators/shared.d.ts +24 -2
- package/dist/evaluators/shared.d.ts.map +1 -1
- package/dist/evaluators/shared.js +194 -26
- package/dist/evaluators/shared.js.map +1 -1
- package/dist/evaluators/software-practices.d.ts +1 -1
- package/dist/evaluators/software-practices.d.ts.map +1 -1
- package/dist/evaluators/software-practices.js +50 -3
- package/dist/evaluators/software-practices.js.map +1 -1
- package/dist/evaluators/testing.d.ts +1 -1
- package/dist/evaluators/testing.d.ts.map +1 -1
- package/dist/evaluators/testing.js +32 -4
- package/dist/evaluators/testing.js.map +1 -1
- package/dist/evaluators/ux.d.ts +1 -1
- package/dist/evaluators/ux.d.ts.map +1 -1
- package/dist/evaluators/ux.js +24 -0
- package/dist/evaluators/ux.js.map +1 -1
- package/dist/evaluators/v2.d.ts +1 -1
- package/dist/evaluators/v2.d.ts.map +1 -1
- package/dist/evaluators/v2.js +15 -35
- package/dist/evaluators/v2.js.map +1 -1
- package/dist/formatters/sarif.d.ts +75 -0
- package/dist/formatters/sarif.d.ts.map +1 -0
- package/dist/formatters/sarif.js +93 -0
- package/dist/formatters/sarif.js.map +1 -0
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -782
- package/dist/index.js.map +1 -1
- package/dist/judges/accessibility.d.ts +1 -1
- package/dist/judges/accessibility.d.ts.map +1 -1
- package/dist/judges/agent-instructions.d.ts +1 -1
- package/dist/judges/agent-instructions.d.ts.map +1 -1
- package/dist/judges/ai-code-safety.d.ts +3 -0
- package/dist/judges/ai-code-safety.d.ts.map +1 -0
- package/dist/judges/ai-code-safety.js +45 -0
- package/dist/judges/ai-code-safety.js.map +1 -0
- package/dist/judges/api-design.d.ts +1 -1
- package/dist/judges/api-design.d.ts.map +1 -1
- package/dist/judges/authentication.d.ts +1 -1
- package/dist/judges/authentication.d.ts.map +1 -1
- package/dist/judges/backwards-compatibility.d.ts +1 -1
- package/dist/judges/backwards-compatibility.d.ts.map +1 -1
- package/dist/judges/caching.d.ts +1 -1
- package/dist/judges/caching.d.ts.map +1 -1
- package/dist/judges/ci-cd.d.ts +1 -1
- package/dist/judges/ci-cd.d.ts.map +1 -1
- package/dist/judges/cloud-readiness.d.ts +1 -1
- package/dist/judges/cloud-readiness.d.ts.map +1 -1
- package/dist/judges/code-structure.d.ts +1 -1
- package/dist/judges/code-structure.d.ts.map +1 -1
- package/dist/judges/compliance.d.ts +1 -1
- package/dist/judges/compliance.d.ts.map +1 -1
- package/dist/judges/concurrency.d.ts +1 -1
- package/dist/judges/concurrency.d.ts.map +1 -1
- package/dist/judges/configuration-management.d.ts +1 -1
- package/dist/judges/configuration-management.d.ts.map +1 -1
- package/dist/judges/cost-effectiveness.d.ts +1 -1
- package/dist/judges/cost-effectiveness.d.ts.map +1 -1
- package/dist/judges/cybersecurity.d.ts +1 -1
- package/dist/judges/cybersecurity.d.ts.map +1 -1
- package/dist/judges/data-security.d.ts +1 -1
- package/dist/judges/data-security.d.ts.map +1 -1
- package/dist/judges/data-sovereignty.d.ts +1 -1
- package/dist/judges/data-sovereignty.d.ts.map +1 -1
- package/dist/judges/database.d.ts +1 -1
- package/dist/judges/database.d.ts.map +1 -1
- package/dist/judges/dependency-health.d.ts +1 -1
- package/dist/judges/dependency-health.d.ts.map +1 -1
- package/dist/judges/documentation.d.ts +1 -1
- package/dist/judges/documentation.d.ts.map +1 -1
- package/dist/judges/error-handling.d.ts +1 -1
- package/dist/judges/error-handling.d.ts.map +1 -1
- package/dist/judges/ethics-bias.d.ts +1 -1
- package/dist/judges/ethics-bias.d.ts.map +1 -1
- package/dist/judges/framework-safety.d.ts +3 -0
- package/dist/judges/framework-safety.d.ts.map +1 -0
- package/dist/judges/framework-safety.js +25 -0
- package/dist/judges/framework-safety.js.map +1 -0
- package/dist/judges/index.d.ts +1 -1
- package/dist/judges/index.d.ts.map +1 -1
- package/dist/judges/index.js +76 -0
- package/dist/judges/index.js.map +1 -1
- package/dist/judges/internationalization.d.ts +1 -1
- package/dist/judges/internationalization.d.ts.map +1 -1
- package/dist/judges/logging-privacy.d.ts +1 -1
- package/dist/judges/logging-privacy.d.ts.map +1 -1
- package/dist/judges/maintainability.d.ts +1 -1
- package/dist/judges/maintainability.d.ts.map +1 -1
- package/dist/judges/observability.d.ts +1 -1
- package/dist/judges/observability.d.ts.map +1 -1
- package/dist/judges/performance.d.ts +1 -1
- package/dist/judges/performance.d.ts.map +1 -1
- package/dist/judges/portability.d.ts +1 -1
- package/dist/judges/portability.d.ts.map +1 -1
- package/dist/judges/rate-limiting.d.ts +1 -1
- package/dist/judges/rate-limiting.d.ts.map +1 -1
- package/dist/judges/reliability.d.ts +1 -1
- package/dist/judges/reliability.d.ts.map +1 -1
- package/dist/judges/scalability.d.ts +1 -1
- package/dist/judges/scalability.d.ts.map +1 -1
- package/dist/judges/software-practices.d.ts +1 -1
- package/dist/judges/software-practices.d.ts.map +1 -1
- package/dist/judges/testing.d.ts +1 -1
- package/dist/judges/testing.d.ts.map +1 -1
- package/dist/judges/ux.d.ts +1 -1
- package/dist/judges/ux.d.ts.map +1 -1
- package/dist/language-patterns.d.ts +37 -0
- package/dist/language-patterns.d.ts.map +1 -1
- package/dist/language-patterns.js +59 -4
- package/dist/language-patterns.js.map +1 -1
- package/dist/patches/index.d.ts +10 -0
- package/dist/patches/index.d.ts.map +1 -0
- package/dist/patches/index.js +533 -0
- package/dist/patches/index.js.map +1 -0
- package/dist/reports/public-repo-report.d.ts +3 -1
- package/dist/reports/public-repo-report.d.ts.map +1 -1
- package/dist/reports/public-repo-report.js +41 -0
- package/dist/reports/public-repo-report.js.map +1 -1
- package/dist/scoring.d.ts +18 -0
- package/dist/scoring.d.ts.map +1 -0
- package/dist/scoring.js +178 -0
- package/dist/scoring.js.map +1 -0
- package/dist/tools/deep-review.d.ts +4 -0
- package/dist/tools/deep-review.d.ts.map +1 -0
- package/dist/tools/deep-review.js +56 -0
- package/dist/tools/deep-review.js.map +1 -0
- package/dist/tools/prompts.d.ts +8 -0
- package/dist/tools/prompts.d.ts.map +1 -0
- package/dist/tools/prompts.js +66 -0
- package/dist/tools/prompts.js.map +1 -0
- package/dist/tools/register-evaluation.d.ts +7 -0
- package/dist/tools/register-evaluation.d.ts.map +1 -0
- package/dist/tools/register-evaluation.js +303 -0
- package/dist/tools/register-evaluation.js.map +1 -0
- package/dist/tools/register-workflow.d.ts +7 -0
- package/dist/tools/register-workflow.d.ts.map +1 -0
- package/dist/tools/register-workflow.js +395 -0
- package/dist/tools/register-workflow.js.map +1 -0
- package/dist/tools/register.d.ts +7 -0
- package/dist/tools/register.d.ts.map +1 -0
- package/dist/tools/register.js +14 -0
- package/dist/tools/register.js.map +1 -0
- package/dist/tools/schemas.d.ts +26 -0
- package/dist/tools/schemas.d.ts.map +1 -0
- package/dist/tools/schemas.js +42 -0
- package/dist/tools/schemas.js.map +1 -0
- package/dist/types.d.ts +53 -2
- package/dist/types.d.ts.map +1 -1
- package/package.json +42 -3
- package/server.json +51 -3
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Judges Panel
|
|
2
2
|
|
|
3
|
-
An MCP (Model Context Protocol) server that provides a panel of **
|
|
3
|
+
An MCP (Model Context Protocol) server that provides a panel of **35 specialized judges** to evaluate AI-generated code — acting as an independent quality gate regardless of which project is being reviewed. Includes **built-in AST analysis** powered by the TypeScript Compiler API — no separate parser server needed.
|
|
4
4
|
|
|
5
5
|
**Highlights:**
|
|
6
6
|
- Includes an **App Builder Workflow (3-step)** demo for release decisions, plain-language risk summaries, and prioritized fixes — see [Try the Demo](#2-try-the-demo).
|
|
@@ -26,7 +26,7 @@ npm run build
|
|
|
26
26
|
|
|
27
27
|
### 2. Try the Demo
|
|
28
28
|
|
|
29
|
-
Run the included demo to see all
|
|
29
|
+
Run the included demo to see all 35 judges evaluate a purposely flawed API server:
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
32
|
npm run demo
|
|
@@ -270,6 +270,8 @@ This helps keep Copilot feedback aligned with Judges findings.
|
|
|
270
270
|
| **CI/CD** | CI/CD Pipeline & Deployment Safety | `CICD-` | Test infrastructure, lint config, Docker tags, build scripts |
|
|
271
271
|
| **Code Structure** | Structural Analysis (AST) | `STRUCT-` | Cyclomatic complexity, nesting depth, function length, dead code, type safety |
|
|
272
272
|
| **Agent Instructions** | Agent Instruction Markdown Quality & Safety | `AGENT-` | Instruction hierarchy, conflict detection, unsafe overrides, scope, validation, policy guidance |
|
|
273
|
+
| **AI Code Safety** | AI-Generated Code Safety | `AICS-` | Prompt injection, insecure LLM output handling, debug defaults, missing validation, unsafe deserialization of AI responses |
|
|
274
|
+
| **Framework Safety** | Framework-Specific Safety | `FWSAFE-` | React hooks ordering, Express middleware chains, Next.js SSR/SSG pitfalls, Angular/Vue lifecycle patterns, framework-specific anti-patterns |
|
|
273
275
|
|
|
274
276
|
---
|
|
275
277
|
|
|
@@ -326,7 +328,7 @@ When your AI coding assistant connects to multiple MCP servers, each one contrib
|
|
|
326
328
|
│ Judges │ │ CVE / │ │ Linter │
|
|
327
329
|
│ Panel │ │ SBOM │ │ Server │
|
|
328
330
|
│ ─────────────│ └────────┘ └────────┘
|
|
329
|
-
│
|
|
331
|
+
│ 33 Heuristic │ Vuln DB Style &
|
|
330
332
|
│ judges │ scanning correctness
|
|
331
333
|
│ + AST judge │
|
|
332
334
|
└──────────────┘
|
|
@@ -337,7 +339,7 @@ When your AI coding assistant connects to multiple MCP servers, each one contrib
|
|
|
337
339
|
|
|
338
340
|
| Layer | What It Does | Example Servers |
|
|
339
341
|
|-------|-------------|-----------------|
|
|
340
|
-
| **Judges Panel** |
|
|
342
|
+
| **Judges Panel** | 35-judge quality gate — security patterns, AST analysis, cost, scalability, a11y, compliance, sovereignty, ethics, dependency health, agent instruction governance, AI code safety, framework safety | This server |
|
|
341
343
|
| **CVE / SBOM** | Vulnerability scanning against live databases — known CVEs, license risks, supply chain | OSV, Snyk, Trivy, Grype MCP servers |
|
|
342
344
|
| **Linting** | Language-specific style and correctness rules | ESLint, Ruff, Clippy MCP servers |
|
|
343
345
|
| **Runtime Profiling** | Memory, CPU, latency measurement on running code | Custom profiling MCP servers |
|
|
@@ -408,11 +410,6 @@ Supports:
|
|
|
408
410
|
### `evaluate_public_repo_report`
|
|
409
411
|
Clone a **public repository URL**, run the full judges panel across eligible source files, and generate a consolidated markdown report.
|
|
410
412
|
|
|
411
|
-
Report prioritization behavior includes:
|
|
412
|
-
- weighted risk ranking (`severity × confidence × fixability`)
|
|
413
|
-
- root-cause clustering to collapse duplicate findings across files
|
|
414
|
-
- actionable top-risk output with confidence and suggested-fix snippets when available
|
|
415
|
-
|
|
416
413
|
| Parameter | Type | Required | Description |
|
|
417
414
|
|-----------|------|----------|-------------|
|
|
418
415
|
| `repoUrl` | string | yes | Public repository URL (`https://...`) |
|
|
@@ -424,7 +421,9 @@ Report prioritization behavior includes:
|
|
|
424
421
|
| `credentialMode` | string | no | Credential detection mode: `standard` (default) or `strict` |
|
|
425
422
|
| `includeAstFindings` | boolean | no | Include AST/code-structure findings (default: true) |
|
|
426
423
|
| `minConfidence` | number | no | Minimum finding confidence to include (0-1, default: 0) |
|
|
427
|
-
| `
|
|
424
|
+
| `enableMustFixGate` | boolean | no | Enable must-fix gate summary for high-confidence dangerous findings (default: false) |
|
|
425
|
+
| `mustFixMinConfidence` | number | no | Confidence threshold for must-fix gate triggers (0-1, default: 0.85) |
|
|
426
|
+
| `mustFixDangerousRulePrefixes` | string[] | no | Optional dangerous rule prefixes for gate matching (e.g., `AUTH`, `CYBER`, `DATA`) |
|
|
428
427
|
| `keepClone` | boolean | no | Keep cloned repo on disk for inspection |
|
|
429
428
|
|
|
430
429
|
**Quick examples**
|
|
@@ -443,6 +442,9 @@ npm run report:public-repo -- --repoUrl https://github.com/openclaw/openclaw --i
|
|
|
443
442
|
# show only findings at 80%+ confidence
|
|
444
443
|
npm run report:public-repo -- --repoUrl https://github.com/openclaw/openclaw --minConfidence 0.8 --output reports/openclaw-judges-report-high-confidence.md
|
|
445
444
|
|
|
445
|
+
# include must-fix gate summary in the generated report
|
|
446
|
+
npm run report:public-repo -- --repoUrl https://github.com/openclaw/openclaw --enableMustFixGate true --mustFixMinConfidence 0.9 --mustFixDangerousPrefix AUTH --mustFixDangerousPrefix CYBER --output reports/openclaw-judges-report-mustfix.md
|
|
447
|
+
|
|
446
448
|
# opinionated quick-start mode (recommended first run)
|
|
447
449
|
npm run report:quickstart -- --repoUrl https://github.com/openclaw/openclaw --output reports/openclaw-quickstart.md
|
|
448
450
|
```
|
|
@@ -460,6 +462,9 @@ Call from MCP client:
|
|
|
460
462
|
"credentialMode": "strict",
|
|
461
463
|
"includeAstFindings": false,
|
|
462
464
|
"minConfidence": 0.8,
|
|
465
|
+
"enableMustFixGate": true,
|
|
466
|
+
"mustFixMinConfidence": 0.9,
|
|
467
|
+
"mustFixDangerousRulePrefixes": ["AUTH", "CYBER", "DATA"],
|
|
463
468
|
"outputPath": "reports/vscode-judges-report.md"
|
|
464
469
|
}
|
|
465
470
|
}
|
|
@@ -469,8 +474,7 @@ Typical response summary includes:
|
|
|
469
474
|
- overall verdict and average score
|
|
470
475
|
- analyzed file count and total findings
|
|
471
476
|
- per-judge score table
|
|
472
|
-
- highest-risk findings
|
|
473
|
-
- unique root-cause cluster count and lowest-scoring files
|
|
477
|
+
- highest-risk findings and lowest-scoring files
|
|
474
478
|
|
|
475
479
|
Sample report snippet:
|
|
476
480
|
|
|
@@ -489,7 +493,7 @@ Generated from https://github.com/microsoft/vscode on 2026-02-21T12:00:00.000Z.
|
|
|
489
493
|
List all available judges with their domains and descriptions.
|
|
490
494
|
|
|
491
495
|
### `evaluate_code`
|
|
492
|
-
Submit code to the **full judges panel**. All
|
|
496
|
+
Submit code to the **full judges panel**. All 35 judges evaluate independently and return a combined verdict.
|
|
493
497
|
|
|
494
498
|
| Parameter | Type | Required | Description |
|
|
495
499
|
|-----------|------|----------|-------------|
|
|
@@ -498,6 +502,7 @@ Submit code to the **full judges panel**. All 33 judges evaluate independently a
|
|
|
498
502
|
| `context` | string | no | Additional context about the code |
|
|
499
503
|
| `includeAstFindings` | boolean | no | Include AST/code-structure findings (default: true) |
|
|
500
504
|
| `minConfidence` | number | no | Minimum finding confidence to include (0-1, default: 0) |
|
|
505
|
+
| `config` | object | no | Inline configuration (see [Configuration](#configuration)) |
|
|
501
506
|
|
|
502
507
|
### `evaluate_code_single_judge`
|
|
503
508
|
Submit code to a **specific judge** for targeted review.
|
|
@@ -509,9 +514,10 @@ Submit code to a **specific judge** for targeted review.
|
|
|
509
514
|
| `judgeId` | string | yes | See [judge IDs](#judge-ids) below |
|
|
510
515
|
| `context` | string | no | Additional context |
|
|
511
516
|
| `minConfidence` | number | no | Minimum finding confidence to include (0-1, default: 0) |
|
|
517
|
+
| `config` | object | no | Inline configuration (see [Configuration](#configuration)) |
|
|
512
518
|
|
|
513
519
|
### `evaluate_project`
|
|
514
|
-
Submit multiple files for **project-level analysis**. All
|
|
520
|
+
Submit multiple files for **project-level analysis**. All 35 judges evaluate each file, plus cross-file architectural analysis detects code duplication, inconsistent error handling, and dependency cycles.
|
|
515
521
|
|
|
516
522
|
| Parameter | Type | Required | Description |
|
|
517
523
|
|-----------|------|----------|-------------|
|
|
@@ -519,9 +525,10 @@ Submit multiple files for **project-level analysis**. All 33 judges evaluate eac
|
|
|
519
525
|
| `context` | string | no | Optional project context |
|
|
520
526
|
| `includeAstFindings` | boolean | no | Include AST/code-structure findings (default: true) |
|
|
521
527
|
| `minConfidence` | number | no | Minimum finding confidence to include (0-1, default: 0) |
|
|
528
|
+
| `config` | object | no | Inline configuration (see [Configuration](#configuration)) |
|
|
522
529
|
|
|
523
530
|
### `evaluate_diff`
|
|
524
|
-
Evaluate only the **changed lines** in a code diff. Runs all
|
|
531
|
+
Evaluate only the **changed lines** in a code diff. Runs all 35 judges on the full file but filters findings to lines you specify. Ideal for PR reviews and incremental analysis.
|
|
525
532
|
|
|
526
533
|
| Parameter | Type | Required | Description |
|
|
527
534
|
|-----------|------|----------|-------------|
|
|
@@ -531,6 +538,7 @@ Evaluate only the **changed lines** in a code diff. Runs all 33 judges on the fu
|
|
|
531
538
|
| `context` | string | no | Optional context about the change |
|
|
532
539
|
| `includeAstFindings` | boolean | no | Include AST/code-structure findings (default: true) |
|
|
533
540
|
| `minConfidence` | number | no | Minimum finding confidence to include (0-1, default: 0) |
|
|
541
|
+
| `config` | object | no | Inline configuration (see [Configuration](#configuration)) |
|
|
534
542
|
|
|
535
543
|
### `analyze_dependencies`
|
|
536
544
|
Analyze a dependency manifest file for supply-chain risks, version pinning issues, typosquatting indicators, and dependency hygiene. Supports `package.json`, `requirements.txt`, `Cargo.toml`, `go.mod`, `pom.xml`, and `.csproj` files.
|
|
@@ -543,7 +551,7 @@ Analyze a dependency manifest file for supply-chain risks, version pinning issue
|
|
|
543
551
|
|
|
544
552
|
#### Judge IDs
|
|
545
553
|
|
|
546
|
-
`data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions`
|
|
554
|
+
`data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `data-sovereignty` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias` · `maintainability` · `error-handling` · `authentication` · `database` · `caching` · `configuration-management` · `backwards-compatibility` · `portability` · `ux` · `logging-privacy` · `rate-limiting` · `ci-cd` · `code-structure` · `agent-instructions` · `ai-code-safety` · `framework-safety`
|
|
547
555
|
|
|
548
556
|
---
|
|
549
557
|
|
|
@@ -586,7 +594,98 @@ Each judge has a corresponding prompt for LLM-powered deep analysis:
|
|
|
586
594
|
| `judge-ci-cd` | Deep CI/CD pipeline review |
|
|
587
595
|
| `judge-code-structure` | Deep AST-based structural analysis review |
|
|
588
596
|
| `judge-agent-instructions` | Deep review of agent instruction markdown quality and safety |
|
|
589
|
-
| `
|
|
597
|
+
| `judge-ai-code-safety` | Deep review of AI-generated code risks: prompt injection, insecure LLM output handling, debug defaults, missing validation |
|
|
598
|
+
| `judge-framework-safety` | Deep review of framework-specific safety: React hooks, Express middleware, Next.js SSR/SSG, Angular/Vue patterns |
|
|
599
|
+
| `full-tribunal` | All 35 judges in a single prompt |
|
|
600
|
+
|
|
601
|
+
---
|
|
602
|
+
|
|
603
|
+
## Configuration
|
|
604
|
+
|
|
605
|
+
All evaluation tools accept an optional `config` parameter for inline configuration. This is the same format as `.judgesrc` / `.judgesrc.json` project files.
|
|
606
|
+
|
|
607
|
+
```json
|
|
608
|
+
{
|
|
609
|
+
"config": {
|
|
610
|
+
"disabledRules": ["COST-*", "I18N-001"],
|
|
611
|
+
"disabledJudges": ["accessibility", "ethics-bias"],
|
|
612
|
+
"minSeverity": "medium",
|
|
613
|
+
"ruleOverrides": {
|
|
614
|
+
"SEC-003": { "severity": "critical" },
|
|
615
|
+
"DOC-*": { "disabled": true }
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
| Field | Type | Description |
|
|
622
|
+
|-------|------|-------------|
|
|
623
|
+
| `disabledRules` | `string[]` | Rule IDs or prefix wildcards to suppress (e.g., `"COST-*"`, `"SEC-003"`) |
|
|
624
|
+
| `disabledJudges` | `string[]` | Judge IDs to skip entirely (e.g., `"cost-effectiveness"`) |
|
|
625
|
+
| `minSeverity` | `string` | Minimum severity to report: `critical`, `high`, `medium`, `low`, `info` |
|
|
626
|
+
| `ruleOverrides` | `object` | Per-rule overrides keyed by rule ID or wildcard — `{ disabled?: boolean, severity?: string }` |
|
|
627
|
+
|
|
628
|
+
---
|
|
629
|
+
|
|
630
|
+
## Advanced Features
|
|
631
|
+
|
|
632
|
+
### Inline Suppressions
|
|
633
|
+
|
|
634
|
+
Suppress specific findings directly in source code using comment directives:
|
|
635
|
+
|
|
636
|
+
```typescript
|
|
637
|
+
const x = eval(input); // judges-ignore SEC-001
|
|
638
|
+
// judges-ignore-next-line CYBER-002
|
|
639
|
+
const y = dangerousOperation();
|
|
640
|
+
// judges-file-ignore DOC-* ← suppress globally for this file
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
Supported comment styles: `//`, `#`, `/* */`. Supports comma-separated rule IDs and wildcards (`*`, `SEC-*`).
|
|
644
|
+
|
|
645
|
+
### Auto-Fix Patches
|
|
646
|
+
|
|
647
|
+
Certain findings include machine-applicable patches in the `patch` field:
|
|
648
|
+
|
|
649
|
+
| Pattern | Auto-Fix |
|
|
650
|
+
|---------|----------|
|
|
651
|
+
| `new Buffer(x)` | → `Buffer.from(x)` |
|
|
652
|
+
| `http://` URLs (non-localhost) | → `https://` |
|
|
653
|
+
| `Math.random()` | → `crypto.randomUUID()` |
|
|
654
|
+
|
|
655
|
+
Patches include `oldText`, `newText`, `startLine`, and `endLine` for automated application.
|
|
656
|
+
|
|
657
|
+
### Cross-Evaluator Deduplication
|
|
658
|
+
|
|
659
|
+
When multiple judges flag the same issue (e.g., both Data Security and Cybersecurity detect SQL injection on line 15), findings are automatically deduplicated. The highest-severity finding wins, and the description is annotated with cross-references (e.g., *"Also identified by: CYBER-003"*).
|
|
660
|
+
|
|
661
|
+
### Taint Flow Analysis
|
|
662
|
+
|
|
663
|
+
The engine performs inter-procedural taint tracking to trace data from user-controlled sources (e.g., `req.body`, `process.env`) through transformations to security-sensitive sinks (e.g., `eval()`, `exec()`, SQL queries). Taint flows are used to boost confidence on true-positive findings and suppress false positives where sanitization is detected.
|
|
664
|
+
|
|
665
|
+
### Positive Signal Detection
|
|
666
|
+
|
|
667
|
+
Code that demonstrates good practices receives score bonuses (capped at +15):
|
|
668
|
+
|
|
669
|
+
| Signal | Bonus |
|
|
670
|
+
|--------|-------|
|
|
671
|
+
| Parameterized queries | +3 |
|
|
672
|
+
| Security headers (helmet) | +3 |
|
|
673
|
+
| Auth middleware (passport, etc.) | +3 |
|
|
674
|
+
| Proper error handling | +2 |
|
|
675
|
+
| Input validation libs (zod, joi, etc.) | +2 |
|
|
676
|
+
| Rate limiting | +2 |
|
|
677
|
+
| Structured logging (pino, winston) | +2 |
|
|
678
|
+
| CORS configuration | +1 |
|
|
679
|
+
| Strict mode / strictNullChecks | +1 |
|
|
680
|
+
| Test patterns (describe/it/expect) | +1 |
|
|
681
|
+
|
|
682
|
+
### Framework-Aware Rules
|
|
683
|
+
|
|
684
|
+
Judges include framework-specific detection for Express, Django, Flask, FastAPI, Spring, ASP.NET, Rails, and more. Framework middleware (e.g., `helmet()`, `express-rate-limit`, `passport.authenticate()`) is recognized as mitigation, reducing false positives.
|
|
685
|
+
|
|
686
|
+
### Cross-File Import Resolution
|
|
687
|
+
|
|
688
|
+
In project-level analysis, imports are resolved across files. If one file imports a security middleware module from another file in the project, findings about missing security controls are automatically adjusted with reduced confidence.
|
|
590
689
|
|
|
591
690
|
---
|
|
592
691
|
|
|
@@ -607,7 +706,7 @@ Each judge scores the code from **0 to 100**:
|
|
|
607
706
|
- **WARNING** — Any high finding, any medium finding, or score < 80
|
|
608
707
|
- **PASS** — Score ≥ 80 with no critical, high, or medium findings
|
|
609
708
|
|
|
610
|
-
The **overall tribunal score** is the average of all
|
|
709
|
+
The **overall tribunal score** is the average of all 35 judges. The overall verdict fails if **any** judge fails.
|
|
611
710
|
|
|
612
711
|
---
|
|
613
712
|
|
|
@@ -618,6 +717,8 @@ judges/
|
|
|
618
717
|
├── src/
|
|
619
718
|
│ ├── index.ts # MCP server entry point — tools, prompts, transport
|
|
620
719
|
│ ├── types.ts # TypeScript interfaces (Finding, JudgeEvaluation, etc.)
|
|
720
|
+
│ ├── config.ts # .judgesrc configuration parser and validation
|
|
721
|
+
│ ├── language-patterns.ts # Multi-language regex pattern constants and helpers
|
|
621
722
|
│ ├── ast/ # AST analysis engine (built-in, no external deps)
|
|
622
723
|
│ │ ├── index.ts # analyzeStructure() — routes to correct parser
|
|
623
724
|
│ │ ├── types.ts # FunctionInfo, CodeStructure interfaces
|
|
@@ -626,12 +727,12 @@ judges/
|
|
|
626
727
|
│ ├── evaluators/ # Analysis engine for each judge
|
|
627
728
|
│ │ ├── index.ts # evaluateWithJudge(), evaluateWithTribunal(), evaluateProject(), etc.
|
|
628
729
|
│ │ ├── shared.ts # Scoring, verdict logic, markdown formatters
|
|
629
|
-
│ │ └── *.ts # One analyzer per judge (
|
|
730
|
+
│ │ └── *.ts # One analyzer per judge (35 files)
|
|
630
731
|
│ ├── reports/
|
|
631
732
|
│ │ └── public-repo-report.ts # Public repo clone + full tribunal report generation
|
|
632
733
|
│ └── judges/ # Judge definitions (id, name, domain, system prompt)
|
|
633
734
|
│ ├── index.ts # JUDGES array, getJudge(), getJudgeSummaries()
|
|
634
|
-
│ └── *.ts # One definition per judge (
|
|
735
|
+
│ └── *.ts # One definition per judge (35 files)
|
|
635
736
|
├── scripts/
|
|
636
737
|
│ ├── generate-public-repo-report.ts # Run: npm run report:public-repo -- --repoUrl <url>
|
|
637
738
|
│ └── daily-popular-repo-autofix.ts # Run: npm run automation:daily-popular
|
|
@@ -674,6 +775,21 @@ This repo includes a scheduled workflow at `.github/workflows/daily-popular-repo
|
|
|
674
775
|
- skips repositories unless they are public and PR creation is possible with existing GitHub auth (no additional auth flow).
|
|
675
776
|
- enforces hard runtime caps of 10 repositories/day and 5 PRs/repository.
|
|
676
777
|
|
|
778
|
+
Each run writes `daily-autofix-summary.json` (or `SUMMARY_PATH`) with per-repository telemetry, including:
|
|
779
|
+
- `runAggregate` — compact run-level totals and cross-repo top prioritized rules,
|
|
780
|
+
- `runAggregate.totalCandidatesDiscovered` and `runAggregate.totalCandidatesAfterLocationDedupe` — signal how much overlap was removed before attempting fixes,
|
|
781
|
+
- `runAggregate.totalCandidatesAfterPriorityThreshold` — candidates that remain after applying minimum priority score,
|
|
782
|
+
- `runAggregate.dedupeReductionPercent` — percent reduction from location dedupe for quick runtime-efficiency tracking,
|
|
783
|
+
- `runAggregate.priorityThresholdReductionPercent` — percent reduction from minimum-priority filtering after dedupe,
|
|
784
|
+
- `priorityRulePrefixesUsed` — dangerous rule prefixes used during prioritization,
|
|
785
|
+
- `minPriorityScoreUsed` — minimum `candidatePriorityScore` applied for candidate inclusion,
|
|
786
|
+
- `candidatesDiscovered`, `candidatesAfterLocationDedupe`, and `candidatesAfterPriorityThreshold` — per-repo candidate counts after each filter stage,
|
|
787
|
+
- `topPrioritizedRuleCounts` — most common rule IDs among ranked candidates,
|
|
788
|
+
- `topPrioritizedCandidates` — top ranked candidate samples (rule, severity, confidence, file, line, priority score).
|
|
789
|
+
|
|
790
|
+
Optional runtime control:
|
|
791
|
+
- `AUTOFIX_MIN_PRIORITY_SCORE` — minimum candidate priority score required after dedupe (default: `0`, disabled).
|
|
792
|
+
|
|
677
793
|
Required secret:
|
|
678
794
|
- `JUDGES_AUTOFIX_GH_TOKEN` — GitHub token with permission to fork/push/create PRs for target repositories.
|
|
679
795
|
|
|
@@ -684,6 +800,73 @@ gh workflow run "Judges Daily Full-Run Autofix PRs" -f targetRepoUrl=https://git
|
|
|
684
800
|
|
|
685
801
|
---
|
|
686
802
|
|
|
803
|
+
## Programmatic API
|
|
804
|
+
|
|
805
|
+
Judges can be consumed as a library (not just via MCP). Import from `@kevinrabun/judges/api`:
|
|
806
|
+
|
|
807
|
+
```typescript
|
|
808
|
+
import {
|
|
809
|
+
evaluateCode,
|
|
810
|
+
evaluateProject,
|
|
811
|
+
evaluateCodeSingleJudge,
|
|
812
|
+
getJudge,
|
|
813
|
+
JUDGES,
|
|
814
|
+
findingsToSarif,
|
|
815
|
+
} from "@kevinrabun/judges/api";
|
|
816
|
+
|
|
817
|
+
// Full tribunal evaluation
|
|
818
|
+
const verdict = evaluateCode("const x = eval(input);", "typescript");
|
|
819
|
+
console.log(verdict.overallScore, verdict.overallVerdict);
|
|
820
|
+
|
|
821
|
+
// Single judge
|
|
822
|
+
const result = evaluateCodeSingleJudge("cybersecurity", code, "typescript");
|
|
823
|
+
|
|
824
|
+
// SARIF output for CI integration
|
|
825
|
+
const sarif = findingsToSarif(verdict.evaluations.flatMap(e => e.findings));
|
|
826
|
+
```
|
|
827
|
+
|
|
828
|
+
### Package Exports
|
|
829
|
+
|
|
830
|
+
| Entry Point | Description |
|
|
831
|
+
|---|---|
|
|
832
|
+
| `@kevinrabun/judges/api` | Programmatic API (default) |
|
|
833
|
+
| `@kevinrabun/judges/server` | MCP server entry point |
|
|
834
|
+
| `@kevinrabun/judges/sarif` | SARIF 2.1.0 formatter |
|
|
835
|
+
|
|
836
|
+
### SARIF Output
|
|
837
|
+
|
|
838
|
+
Convert findings to [SARIF 2.1.0](https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html) for GitHub Code Scanning, Azure DevOps, and other CI/CD tools:
|
|
839
|
+
|
|
840
|
+
```typescript
|
|
841
|
+
import { findingsToSarif, evaluationToSarif, verdictToSarif } from "@kevinrabun/judges/sarif";
|
|
842
|
+
|
|
843
|
+
const sarif = verdictToSarif(verdict, "src/app.ts");
|
|
844
|
+
fs.writeFileSync("results.sarif", JSON.stringify(sarif, null, 2));
|
|
845
|
+
```
|
|
846
|
+
|
|
847
|
+
---
|
|
848
|
+
|
|
849
|
+
## Custom Error Types
|
|
850
|
+
|
|
851
|
+
All thrown errors extend `JudgesError` with a machine-readable `code` property:
|
|
852
|
+
|
|
853
|
+
| Error Class | Code | When |
|
|
854
|
+
|---|---|---|
|
|
855
|
+
| `ConfigError` | `JUDGES_CONFIG_INVALID` | Malformed `.judgesrc` or invalid inline config |
|
|
856
|
+
| `EvaluationError` | `JUDGES_EVALUATION_FAILED` | Unknown judge, analyzer crash |
|
|
857
|
+
| `ParseError` | `JUDGES_PARSE_FAILED` | Unparseable source code or input data |
|
|
858
|
+
|
|
859
|
+
```typescript
|
|
860
|
+
import { ConfigError, EvaluationError } from "@kevinrabun/judges/api";
|
|
861
|
+
try {
|
|
862
|
+
evaluateCode(code, "typescript");
|
|
863
|
+
} catch (e) {
|
|
864
|
+
if (e instanceof ConfigError) console.error("Config issue:", e.code);
|
|
865
|
+
}
|
|
866
|
+
```
|
|
867
|
+
|
|
868
|
+
---
|
|
869
|
+
|
|
687
870
|
## License
|
|
688
871
|
|
|
689
872
|
MIT
|
package/dist/api.d.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Judges Panel — Programmatic API
|
|
3
|
+
*
|
|
4
|
+
* Public entry-point for consuming judges as a library (not via MCP).
|
|
5
|
+
*
|
|
6
|
+
* ```ts
|
|
7
|
+
* import { evaluateCode, evaluateProject, getJudges } from "@kevinrabun/judges/api";
|
|
8
|
+
* const result = evaluateCode("const x = eval(input);", "typescript");
|
|
9
|
+
* ```
|
|
10
|
+
*/
|
|
11
|
+
export type { Severity, Verdict, Finding, Patch, LangFamily, JudgesConfig, RuleOverride, ProjectFile, ProjectVerdict, DiffVerdict, DependencyEntry, DependencyVerdict, JudgeEvaluation, TribunalVerdict, JudgeDefinition, EvaluationContextV2, EvidenceBundleV2, SpecializedFindingV2, TribunalVerdictV2, MustFixGateOptions, MustFixGateResult, AppBuilderWorkflowResult, PlainLanguageFinding, WorkflowTask, PolicyProfile, } from "./types.js";
|
|
12
|
+
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
|
+
export { parseConfig, defaultConfig } from "./config.js";
|
|
14
|
+
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
15
|
+
export { evaluateWithJudge, evaluateWithTribunal, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, applyInlineSuppressions, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "./evaluators/index.js";
|
|
16
|
+
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
17
|
+
export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
|
|
18
|
+
export { findingsToSarif, evaluationToSarif, verdictToSarif } from "./formatters/sarif.js";
|
|
19
|
+
import type { EvaluationOptions } from "./evaluators/index.js";
|
|
20
|
+
import type { JudgeEvaluation, TribunalVerdict } from "./types.js";
|
|
21
|
+
/**
|
|
22
|
+
* Evaluate code against the full panel of judges (convenience wrapper).
|
|
23
|
+
*
|
|
24
|
+
* @param code - Source code to evaluate
|
|
25
|
+
* @param language - Programming language (e.g. "typescript", "python")
|
|
26
|
+
* @param options - Optional config, context, target judges, etc.
|
|
27
|
+
* @returns Full tribunal verdict with per-judge evaluations and overall score
|
|
28
|
+
*/
|
|
29
|
+
export declare function evaluateCode(code: string, language: string, options?: EvaluationOptions): TribunalVerdict;
|
|
30
|
+
/**
|
|
31
|
+
* Evaluate code with a single judge by name (convenience wrapper).
|
|
32
|
+
*
|
|
33
|
+
* @param judgeId - The judge identifier (e.g. "cybersecurity", "performance")
|
|
34
|
+
* @param code - Source code to evaluate
|
|
35
|
+
* @param language - Programming language
|
|
36
|
+
* @param options - Optional config
|
|
37
|
+
* @returns Single-judge evaluation with findings and score
|
|
38
|
+
*/
|
|
39
|
+
export declare function evaluateCodeSingleJudge(judgeId: string, code: string, language: string, options?: EvaluationOptions): JudgeEvaluation;
|
|
40
|
+
//# sourceMappingURL=api.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,YAAY,EACV,QAAQ,EACR,OAAO,EACP,OAAO,EACP,KAAK,EACL,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,WAAW,EACX,cAAc,EACd,WAAW,EACX,eAAe,EACf,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,eAAe,EACf,mBAAmB,EACnB,gBAAgB,EAChB,oBAAoB,EACpB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,wBAAwB,EACxB,oBAAoB,EACpB,YAAY,EACZ,aAAa,GACd,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGpF,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAGzD,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAIxE,OAAO,EACL,iBAAiB,EACjB,oBAAoB,EACpB,eAAe,EACf,YAAY,EACZ,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,uBAAuB,EACvB,qBAAqB,EACrB,uBAAuB,EACvB,0BAA0B,GAC3B,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAGnG,OAAO,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAGlE,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAK3F,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAInE;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,eAAe,CAEzG;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,iBAAiB,GAC1B,eAAe,CAMjB"}
|
package/dist/api.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Judges Panel — Programmatic API
|
|
3
|
+
*
|
|
4
|
+
* Public entry-point for consuming judges as a library (not via MCP).
|
|
5
|
+
*
|
|
6
|
+
* ```ts
|
|
7
|
+
* import { evaluateCode, evaluateProject, getJudges } from "@kevinrabun/judges/api";
|
|
8
|
+
* const result = evaluateCode("const x = eval(input);", "typescript");
|
|
9
|
+
* ```
|
|
10
|
+
*/
|
|
11
|
+
// ─── Errors ──────────────────────────────────────────────────────────────────
|
|
12
|
+
export { JudgesError, ConfigError, EvaluationError, ParseError } from "./errors.js";
|
|
13
|
+
// ─── Config ──────────────────────────────────────────────────────────────────
|
|
14
|
+
export { parseConfig, defaultConfig } from "./config.js";
|
|
15
|
+
// ─── Judge Registry ──────────────────────────────────────────────────────────
|
|
16
|
+
export { JUDGES, getJudge, getJudgeSummaries } from "./judges/index.js";
|
|
17
|
+
// ─── Core Evaluation Functions ───────────────────────────────────────────────
|
|
18
|
+
export { evaluateWithJudge, evaluateWithTribunal, evaluateProject, evaluateDiff, analyzeDependencies, enrichWithPatches, crossEvaluatorDedup, applyInlineSuppressions, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "./evaluators/index.js";
|
|
19
|
+
// ─── V2 Policy-Aware API ────────────────────────────────────────────────────
|
|
20
|
+
export { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles } from "./evaluators/v2.js";
|
|
21
|
+
// ─── Cross-File Taint Analysis ───────────────────────────────────────────────
|
|
22
|
+
export { analyzeCrossFileTaint } from "./ast/cross-file-taint.js";
|
|
23
|
+
// ─── Formatters ──────────────────────────────────────────────────────────────
|
|
24
|
+
export { findingsToSarif, evaluationToSarif, verdictToSarif } from "./formatters/sarif.js";
|
|
25
|
+
// ─── Convenience Aliases ─────────────────────────────────────────────────────
|
|
26
|
+
import { evaluateWithTribunal, evaluateWithJudge } from "./evaluators/index.js";
|
|
27
|
+
import { getJudge } from "./judges/index.js";
|
|
28
|
+
import { EvaluationError } from "./errors.js";
|
|
29
|
+
/**
|
|
30
|
+
* Evaluate code against the full panel of judges (convenience wrapper).
|
|
31
|
+
*
|
|
32
|
+
* @param code - Source code to evaluate
|
|
33
|
+
* @param language - Programming language (e.g. "typescript", "python")
|
|
34
|
+
* @param options - Optional config, context, target judges, etc.
|
|
35
|
+
* @returns Full tribunal verdict with per-judge evaluations and overall score
|
|
36
|
+
*/
|
|
37
|
+
export function evaluateCode(code, language, options) {
|
|
38
|
+
return evaluateWithTribunal(code, language, undefined, options);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Evaluate code with a single judge by name (convenience wrapper).
|
|
42
|
+
*
|
|
43
|
+
* @param judgeId - The judge identifier (e.g. "cybersecurity", "performance")
|
|
44
|
+
* @param code - Source code to evaluate
|
|
45
|
+
* @param language - Programming language
|
|
46
|
+
* @param options - Optional config
|
|
47
|
+
* @returns Single-judge evaluation with findings and score
|
|
48
|
+
*/
|
|
49
|
+
export function evaluateCodeSingleJudge(judgeId, code, language, options) {
|
|
50
|
+
const judge = getJudge(judgeId);
|
|
51
|
+
if (!judge) {
|
|
52
|
+
throw new EvaluationError(`Unknown judge: "${judgeId}"`, judgeId);
|
|
53
|
+
}
|
|
54
|
+
return evaluateWithJudge(judge, code, language, undefined, options);
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=api.js.map
|
package/dist/api.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AA+BH,gFAAgF;AAChF,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEpF,gFAAgF;AAChF,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEzD,gFAAgF;AAChF,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAExE,gFAAgF;AAEhF,OAAO,EACL,iBAAiB,EACjB,oBAAoB,EACpB,eAAe,EACf,YAAY,EACZ,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,uBAAuB,EACvB,qBAAqB,EACrB,uBAAuB,EACvB,0BAA0B,GAC3B,MAAM,uBAAuB,CAAC;AAE/B,+EAA+E;AAC/E,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAEnG,gFAAgF;AAChF,OAAO,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAElE,gFAAgF;AAChF,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAE3F,gFAAgF;AAEhF,OAAO,EAAE,oBAAoB,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAGhF,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C;;;;;;;GAOG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,QAAgB,EAAE,OAA2B;IACtF,OAAO,oBAAoB,CAAC,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;AAClE,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CACrC,OAAe,EACf,IAAY,EACZ,QAAgB,EAChB,OAA2B;IAE3B,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,eAAe,CAAC,mBAAmB,OAAO,GAAG,EAAE,OAAO,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,iBAAiB,CAAC,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;AACtE,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { type TaintSourceKind, type TaintSinkKind } from "./taint-tracker.js";
|
|
2
|
+
/**
|
|
3
|
+
* A cross-file taint flow: tainted data originates in one file and reaches
|
|
4
|
+
* a dangerous sink in a different file via an import/export boundary.
|
|
5
|
+
*/
|
|
6
|
+
export interface CrossFileTaintFlow {
|
|
7
|
+
/** File where the untrusted data originates */
|
|
8
|
+
sourceFile: string;
|
|
9
|
+
/** File where the tainted data reaches a dangerous sink */
|
|
10
|
+
sinkFile: string;
|
|
11
|
+
/** The original taint source (e.g., req.body) */
|
|
12
|
+
source: {
|
|
13
|
+
line: number;
|
|
14
|
+
expression: string;
|
|
15
|
+
kind: TaintSourceKind;
|
|
16
|
+
};
|
|
17
|
+
/** The dangerous sink in the consuming file */
|
|
18
|
+
sink: {
|
|
19
|
+
line: number;
|
|
20
|
+
api: string;
|
|
21
|
+
kind: TaintSinkKind;
|
|
22
|
+
};
|
|
23
|
+
/** The exported name that carries taint across the boundary */
|
|
24
|
+
exportedBinding: string;
|
|
25
|
+
/** The imported name in the consuming file */
|
|
26
|
+
importedAs: string;
|
|
27
|
+
/** Confidence score (reduced for indirect flows) */
|
|
28
|
+
confidence: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Analyze taint flows across multiple files in a project. Traces tainted data
|
|
32
|
+
* from sources in one file through export/import boundaries to sinks in
|
|
33
|
+
* another file.
|
|
34
|
+
*
|
|
35
|
+
* Returns both intra-file flows (from the standard taint tracker) and
|
|
36
|
+
* cross-file flows where taint crosses module boundaries.
|
|
37
|
+
*/
|
|
38
|
+
export declare function analyzeCrossFileTaint(files: Array<{
|
|
39
|
+
path: string;
|
|
40
|
+
content: string;
|
|
41
|
+
language: string;
|
|
42
|
+
}>): CrossFileTaintFlow[];
|
|
43
|
+
//# sourceMappingURL=cross-file-taint.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cross-file-taint.d.ts","sourceRoot":"","sources":["../../src/ast/cross-file-taint.ts"],"names":[],"mappings":"AAeA,OAAO,EAAqC,KAAK,eAAe,EAAE,KAAK,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAKjH;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,+CAA+C;IAC/C,UAAU,EAAE,MAAM,CAAC;IACnB,2DAA2D;IAC3D,QAAQ,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,MAAM,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,eAAe,CAAC;KACvB,CAAC;IACF,+CAA+C;IAC/C,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,IAAI,EAAE,aAAa,CAAC;KACrB,CAAC;IACF,+DAA+D;IAC/D,eAAe,EAAE,MAAM,CAAC;IACxB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,UAAU,EAAE,MAAM,CAAC;CACpB;AAkjBD;;;;;;;GAOG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,GAChE,kBAAkB,EAAE,CA4KtB"}
|