@kevinrabun/judges 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +264 -111
- package/dist/evaluators/shared.js +8 -8
- package/dist/evaluators/shared.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/judges/accessibility.d.ts.map +1 -1
- package/dist/judges/accessibility.js +7 -1
- package/dist/judges/accessibility.js.map +1 -1
- package/dist/judges/api-design.d.ts.map +1 -1
- package/dist/judges/api-design.js +7 -1
- package/dist/judges/api-design.js.map +1 -1
- package/dist/judges/cloud-readiness.d.ts.map +1 -1
- package/dist/judges/cloud-readiness.js +7 -1
- package/dist/judges/cloud-readiness.js.map +1 -1
- package/dist/judges/compliance.d.ts.map +1 -1
- package/dist/judges/compliance.js +7 -1
- package/dist/judges/compliance.js.map +1 -1
- package/dist/judges/concurrency.d.ts.map +1 -1
- package/dist/judges/concurrency.js +7 -1
- package/dist/judges/concurrency.js.map +1 -1
- package/dist/judges/cost-effectiveness.d.ts.map +1 -1
- package/dist/judges/cost-effectiveness.js +7 -1
- package/dist/judges/cost-effectiveness.js.map +1 -1
- package/dist/judges/cybersecurity.d.ts.map +1 -1
- package/dist/judges/cybersecurity.js +7 -1
- package/dist/judges/cybersecurity.js.map +1 -1
- package/dist/judges/data-security.d.ts.map +1 -1
- package/dist/judges/data-security.js +7 -1
- package/dist/judges/data-security.js.map +1 -1
- package/dist/judges/dependency-health.d.ts.map +1 -1
- package/dist/judges/dependency-health.js +7 -1
- package/dist/judges/dependency-health.js.map +1 -1
- package/dist/judges/documentation.d.ts.map +1 -1
- package/dist/judges/documentation.js +7 -1
- package/dist/judges/documentation.js.map +1 -1
- package/dist/judges/ethics-bias.d.ts.map +1 -1
- package/dist/judges/ethics-bias.js +7 -1
- package/dist/judges/ethics-bias.js.map +1 -1
- package/dist/judges/internationalization.d.ts.map +1 -1
- package/dist/judges/internationalization.js +7 -1
- package/dist/judges/internationalization.js.map +1 -1
- package/dist/judges/observability.d.ts.map +1 -1
- package/dist/judges/observability.js +7 -1
- package/dist/judges/observability.js.map +1 -1
- package/dist/judges/performance.d.ts.map +1 -1
- package/dist/judges/performance.js +7 -1
- package/dist/judges/performance.js.map +1 -1
- package/dist/judges/reliability.d.ts.map +1 -1
- package/dist/judges/reliability.js +7 -1
- package/dist/judges/reliability.js.map +1 -1
- package/dist/judges/scalability.d.ts.map +1 -1
- package/dist/judges/scalability.js +7 -1
- package/dist/judges/scalability.js.map +1 -1
- package/dist/judges/software-practices.d.ts.map +1 -1
- package/dist/judges/software-practices.js +8 -2
- package/dist/judges/software-practices.js.map +1 -1
- package/dist/judges/testing.d.ts.map +1 -1
- package/dist/judges/testing.js +7 -1
- package/dist/judges/testing.js.map +1 -1
- package/package.json +4 -1
- package/server.json +2 -2
package/README.md
CHANGED
|
@@ -2,173 +2,326 @@
|
|
|
2
2
|
|
|
3
3
|
An MCP (Model Context Protocol) server that provides a panel of **18 specialized judges** to evaluate AI-generated code — acting as an independent quality gate regardless of which project is being reviewed.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|-------|--------|-------------|-------------------|
|
|
9
|
-
| **Judge Data Security** | Data Security & Privacy | `DATA-` | Encryption, PII handling, secrets management, access controls, GDPR/CCPA/HIPAA compliance |
|
|
10
|
-
| **Judge Cybersecurity** | Cybersecurity & Threat Defense | `CYBER-` | Injection attacks, XSS, CSRF, auth flaws, dependency CVEs, OWASP Top 10 |
|
|
11
|
-
| **Judge Cost Effectiveness** | Cost Optimization | `COST-` | Algorithm efficiency, N+1 queries, memory waste, caching strategy, cloud spend |
|
|
12
|
-
| **Judge Scalability** | Scalability & Performance | `SCALE-` | Statelessness, horizontal scaling, concurrency, bottlenecks, rate limiting |
|
|
13
|
-
| **Judge Cloud Readiness** | Cloud-Native & DevOps | `CLOUD-` | 12-Factor compliance, containerization, observability, graceful shutdown, IaC |
|
|
14
|
-
| **Judge Software Practices** | Engineering Best Practices | `SWDEV-` | SOLID principles, type safety, error handling, testing, input validation, clean code |
|
|
15
|
-
| **Judge Accessibility** | Accessibility (a11y) | `A11Y-` | WCAG compliance, screen reader support, keyboard navigation, ARIA, color contrast |
|
|
16
|
-
| **Judge API Design** | API Design & Contracts | `API-` | REST conventions, versioning, pagination, error responses, consistency |
|
|
17
|
-
| **Judge Reliability** | Reliability & Resilience | `REL-` | Error handling, timeouts, retries, circuit breakers, graceful degradation |
|
|
18
|
-
| **Judge Observability** | Observability & Monitoring | `OBS-` | Structured logging, health checks, metrics, tracing, correlation IDs |
|
|
19
|
-
| **Judge Performance** | Performance & Efficiency | `PERF-` | N+1 queries, sync I/O, caching, memory leaks, algorithmic complexity |
|
|
20
|
-
| **Judge Compliance** | Regulatory Compliance | `COMP-` | GDPR/CCPA, PII protection, consent, data retention, audit trails |
|
|
21
|
-
| **Judge Testing** | Testing & Quality Assurance | `TEST-` | Test coverage, assertions, test isolation, naming, external dependencies |
|
|
22
|
-
| **Judge Documentation** | Documentation & Readability | `DOC-` | JSDoc/docstrings, magic numbers, TODOs, code comments, module docs |
|
|
23
|
-
| **Judge Internationalization** | Internationalization (i18n) | `I18N-` | Hardcoded strings, locale handling, currency formatting, RTL support |
|
|
24
|
-
| **Judge Dependency Health** | Dependency Management | `DEPS-` | Version pinning, deprecated packages, supply chain, import hygiene |
|
|
25
|
-
| **Judge Concurrency** | Concurrency & Async Safety | `CONC-` | Race conditions, unbounded parallelism, missing await, resource cleanup |
|
|
26
|
-
| **Judge Ethics & Bias** | Ethics & Bias | `ETHICS-` | Demographic logic, explainability, dark patterns, inclusive language |
|
|
27
|
-
|
|
28
|
-
## How It Works
|
|
5
|
+
[](https://github.com/KevinRabun/judges/actions/workflows/ci.yml)
|
|
6
|
+
[](https://www.npmjs.com/package/@kevinrabun/judges)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
29
8
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
1. **Pattern-Based Analysis (Tools)** — The `evaluate_code` and `evaluate_code_single_judge` tools perform heuristic analysis using pattern matching to catch common anti-patterns. This works entirely offline with zero external API calls.
|
|
9
|
+
---
|
|
33
10
|
|
|
34
|
-
|
|
11
|
+
## Quick Start
|
|
35
12
|
|
|
36
|
-
|
|
13
|
+
### 1. Install and Build
|
|
37
14
|
|
|
38
|
-
|
|
39
|
-
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/KevinRabun/judges.git
|
|
17
|
+
cd judges
|
|
18
|
+
npm install
|
|
19
|
+
npm run build
|
|
20
|
+
```
|
|
40
21
|
|
|
41
|
-
###
|
|
42
|
-
Submit code to the **full judges panel**. All 18 judges evaluate independently and return a combined verdict.
|
|
22
|
+
### 2. Try the Demo
|
|
43
23
|
|
|
44
|
-
|
|
45
|
-
- `code` (string, required) — The source code to evaluate
|
|
46
|
-
- `language` (string, required) — Programming language (e.g., "typescript", "python")
|
|
47
|
-
- `context` (string, optional) — Additional context about the code
|
|
24
|
+
Run the included demo to see all 18 judges evaluate a purposely flawed API server:
|
|
48
25
|
|
|
49
|
-
|
|
26
|
+
```bash
|
|
27
|
+
npm run demo
|
|
28
|
+
```
|
|
50
29
|
|
|
51
|
-
|
|
52
|
-
Submit code to a **specific judge** for targeted review.
|
|
30
|
+
This evaluates [`examples/sample-vulnerable-api.ts`](examples/sample-vulnerable-api.ts) — a file intentionally packed with security holes, performance anti-patterns, and code quality issues — and prints a full verdict with per-judge scores and findings.
|
|
53
31
|
|
|
54
|
-
**
|
|
55
|
-
- `code` (string, required) — The source code to evaluate
|
|
56
|
-
- `language` (string, required) — Programming language
|
|
57
|
-
- `judgeId` (string, required) — One of: `data-security`, `cybersecurity`, `cost-effectiveness`, `scalability`, `cloud-readiness`, `software-practices`, `accessibility`, `api-design`, `reliability`, `observability`, `performance`, `compliance`, `testing`, `documentation`, `internationalization`, `dependency-health`, `concurrency`, `ethics-bias`
|
|
58
|
-
- `context` (string, optional) — Additional context
|
|
32
|
+
**What you'll see:**
|
|
59
33
|
|
|
60
|
-
|
|
34
|
+
```
|
|
35
|
+
╔══════════════════════════════════════════════════════════════╗
|
|
36
|
+
║ Judges Panel — Full Tribunal Demo ║
|
|
37
|
+
╚══════════════════════════════════════════════════════════════╝
|
|
38
|
+
|
|
39
|
+
Overall Verdict : FAIL
|
|
40
|
+
Overall Score : 43/100
|
|
41
|
+
Critical Issues : 15
|
|
42
|
+
High Issues : 17
|
|
43
|
+
Total Findings : 83
|
|
44
|
+
Judges Run : 18
|
|
45
|
+
|
|
46
|
+
Per-Judge Breakdown:
|
|
47
|
+
────────────────────────────────────────────────────────────────
|
|
48
|
+
❌ Judge Data Security 0/100 7 finding(s)
|
|
49
|
+
❌ Judge Cybersecurity 0/100 7 finding(s)
|
|
50
|
+
❌ Judge Cost Effectiveness 52/100 5 finding(s)
|
|
51
|
+
⚠️ Judge Scalability 65/100 4 finding(s)
|
|
52
|
+
❌ Judge Cloud Readiness 61/100 4 finding(s)
|
|
53
|
+
❌ Judge Software Practices 45/100 6 finding(s)
|
|
54
|
+
❌ Judge Accessibility 0/100 8 finding(s)
|
|
55
|
+
❌ Judge API Design 0/100 9 finding(s)
|
|
56
|
+
❌ Judge Reliability 54/100 3 finding(s)
|
|
57
|
+
❌ Judge Observability 45/100 5 finding(s)
|
|
58
|
+
❌ Judge Performance 27/100 5 finding(s)
|
|
59
|
+
❌ Judge Compliance 0/100 4 finding(s)
|
|
60
|
+
⚠️ Judge Testing 90/100 1 finding(s)
|
|
61
|
+
⚠️ Judge Documentation 70/100 4 finding(s)
|
|
62
|
+
⚠️ Judge Internationalization 65/100 4 finding(s)
|
|
63
|
+
⚠️ Judge Dependency Health 90/100 1 finding(s)
|
|
64
|
+
❌ Judge Concurrency 44/100 4 finding(s)
|
|
65
|
+
❌ Judge Ethics & Bias 65/100 2 finding(s)
|
|
66
|
+
```
|
|
61
67
|
|
|
62
|
-
|
|
63
|
-
- `judge-cybersecurity` — Deep cybersecurity review via LLM
|
|
64
|
-
- `judge-cost-effectiveness` — Deep cost optimization review via LLM
|
|
65
|
-
- `judge-scalability` — Deep scalability review via LLM
|
|
66
|
-
- `judge-cloud-readiness` — Deep cloud readiness review via LLM
|
|
67
|
-
- `judge-software-practices` — Deep software practices review via LLM
|
|
68
|
-
- `judge-accessibility` — Deep accessibility/WCAG review via LLM
|
|
69
|
-
- `judge-api-design` — Deep API design review via LLM
|
|
70
|
-
- `judge-reliability` — Deep reliability & resilience review via LLM
|
|
71
|
-
- `judge-observability` — Deep observability & monitoring review via LLM
|
|
72
|
-
- `judge-performance` — Deep performance optimization review via LLM
|
|
73
|
-
- `judge-compliance` — Deep regulatory compliance review via LLM
|
|
74
|
-
- `judge-testing` — Deep testing quality review via LLM
|
|
75
|
-
- `judge-documentation` — Deep documentation quality review via LLM
|
|
76
|
-
- `judge-internationalization` — Deep i18n review via LLM
|
|
77
|
-
- `judge-dependency-health` — Deep dependency health review via LLM
|
|
78
|
-
- `judge-concurrency` — Deep concurrency & async safety review via LLM
|
|
79
|
-
- `judge-ethics-bias` — Deep ethics & bias review via LLM
|
|
80
|
-
- `full-tribunal` — All 18 judges via LLM in a single prompt
|
|
81
|
-
|
|
82
|
-
## Setup
|
|
83
|
-
|
|
84
|
-
### Build
|
|
68
|
+
### 3. Run the Tests
|
|
85
69
|
|
|
86
70
|
```bash
|
|
87
|
-
npm
|
|
88
|
-
npm run build
|
|
71
|
+
npm test
|
|
89
72
|
```
|
|
90
73
|
|
|
91
|
-
|
|
74
|
+
Runs 184 automated tests covering all 18 judges, markdown formatters, and edge cases.
|
|
92
75
|
|
|
93
|
-
|
|
76
|
+
### 4. Connect to Your Editor
|
|
77
|
+
|
|
78
|
+
Add the Judges Panel as an MCP server so your AI coding assistant can use it automatically.
|
|
79
|
+
|
|
80
|
+
**VS Code** — create `.vscode/mcp.json` in your project:
|
|
94
81
|
|
|
95
82
|
```json
|
|
96
83
|
{
|
|
97
|
-
"
|
|
84
|
+
"servers": {
|
|
98
85
|
"judges": {
|
|
99
86
|
"command": "node",
|
|
100
|
-
"args": ["
|
|
87
|
+
"args": ["/absolute/path/to/judges/dist/index.js"]
|
|
101
88
|
}
|
|
102
89
|
}
|
|
103
90
|
}
|
|
104
91
|
```
|
|
105
92
|
|
|
106
|
-
|
|
93
|
+
**Claude Desktop** — add to `claude_desktop_config.json`:
|
|
107
94
|
|
|
108
95
|
```json
|
|
109
96
|
{
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
"
|
|
113
|
-
|
|
114
|
-
"args": ["<path-to>/judges/dist/index.js"]
|
|
115
|
-
}
|
|
97
|
+
"mcpServers": {
|
|
98
|
+
"judges": {
|
|
99
|
+
"command": "node",
|
|
100
|
+
"args": ["/absolute/path/to/judges/dist/index.js"]
|
|
116
101
|
}
|
|
117
102
|
}
|
|
118
103
|
}
|
|
119
104
|
```
|
|
120
105
|
|
|
106
|
+
**Or install from npm** instead of cloning:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
npm install -g @kevinrabun/judges
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Then use `judges` as the command in your MCP config (no `args` needed).
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## The Judge Panel
|
|
117
|
+
|
|
118
|
+
| Judge | Domain | Rule Prefix | What It Evaluates |
|
|
119
|
+
|-------|--------|-------------|-------------------|
|
|
120
|
+
| **Data Security** | Data Security & Privacy | `DATA-` | Encryption, PII handling, secrets management, access controls |
|
|
121
|
+
| **Cybersecurity** | Cybersecurity & Threat Defense | `CYBER-` | Injection attacks, XSS, CSRF, auth flaws, OWASP Top 10 |
|
|
122
|
+
| **Cost Effectiveness** | Cost Optimization | `COST-` | Algorithm efficiency, N+1 queries, memory waste, caching strategy |
|
|
123
|
+
| **Scalability** | Scalability & Performance | `SCALE-` | Statelessness, horizontal scaling, concurrency, bottlenecks |
|
|
124
|
+
| **Cloud Readiness** | Cloud-Native & DevOps | `CLOUD-` | 12-Factor compliance, containerization, graceful shutdown, IaC |
|
|
125
|
+
| **Software Practices** | Engineering Best Practices | `SWDEV-` | SOLID principles, type safety, error handling, input validation |
|
|
126
|
+
| **Accessibility** | Accessibility (a11y) | `A11Y-` | WCAG compliance, screen reader support, keyboard navigation, ARIA |
|
|
127
|
+
| **API Design** | API Design & Contracts | `API-` | REST conventions, versioning, pagination, error responses |
|
|
128
|
+
| **Reliability** | Reliability & Resilience | `REL-` | Error handling, timeouts, retries, circuit breakers |
|
|
129
|
+
| **Observability** | Observability & Monitoring | `OBS-` | Structured logging, health checks, metrics, tracing |
|
|
130
|
+
| **Performance** | Performance & Efficiency | `PERF-` | N+1 queries, sync I/O, caching, memory leaks |
|
|
131
|
+
| **Compliance** | Regulatory Compliance | `COMP-` | GDPR/CCPA, PII protection, consent, data retention, audit trails |
|
|
132
|
+
| **Testing** | Testing & Quality Assurance | `TEST-` | Test coverage, assertions, test isolation, naming |
|
|
133
|
+
| **Documentation** | Documentation & Readability | `DOC-` | JSDoc/docstrings, magic numbers, TODOs, code comments |
|
|
134
|
+
| **Internationalization** | Internationalization (i18n) | `I18N-` | Hardcoded strings, locale handling, currency formatting |
|
|
135
|
+
| **Dependency Health** | Dependency Management | `DEPS-` | Version pinning, deprecated packages, supply chain |
|
|
136
|
+
| **Concurrency** | Concurrency & Async Safety | `CONC-` | Race conditions, unbounded parallelism, missing await |
|
|
137
|
+
| **Ethics & Bias** | Ethics & Bias | `ETHICS-` | Demographic logic, dark patterns, inclusive language |
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## How It Works
|
|
142
|
+
|
|
143
|
+
The tribunal operates in two modes:
|
|
144
|
+
|
|
145
|
+
1. **Pattern-Based Analysis (Tools)** — The `evaluate_code` and `evaluate_code_single_judge` tools perform heuristic analysis using pattern matching to catch common anti-patterns. This works entirely offline with zero external API calls.
|
|
146
|
+
|
|
147
|
+
2. **LLM-Powered Deep Analysis (Prompts)** — The server exposes MCP prompts (e.g., `judge-data-security`, `full-tribunal`) that provide each judge's expert persona as a system prompt. When used by an LLM-based client, this enables deeper, context-aware analysis beyond what pattern matching can detect.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Composable by Design
|
|
152
|
+
|
|
153
|
+
Judges Panel is intentionally focused on **heuristic pattern detection** — fast, offline, zero-dependency. It does not try to be an AST parser, a CVE scanner, or a linter. Those capabilities belong in dedicated MCP servers that an AI agent can orchestrate alongside Judges.
|
|
154
|
+
|
|
155
|
+
### Recommended MCP Stack
|
|
156
|
+
|
|
157
|
+
When your AI coding assistant connects to multiple MCP servers, each one contributes its specialty:
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
┌─────────────────────────────────────────────────────────┐
|
|
161
|
+
│ AI Coding Assistant │
|
|
162
|
+
│ (Claude, Copilot, Cursor, etc.) │
|
|
163
|
+
└──────┬──────────┬──────────┬──────────┬────────────────┘
|
|
164
|
+
│ │ │ │
|
|
165
|
+
▼ ▼ ▼ ▼
|
|
166
|
+
┌─────────┐ ┌────────┐ ┌────────┐ ┌────────┐
|
|
167
|
+
│ Judges │ │ AST │ │ CVE / │ │ Linter │
|
|
168
|
+
│ Panel │ │ Server │ │ SBOM │ │ Server │
|
|
169
|
+
└─────────┘ └────────┘ └────────┘ └────────┘
|
|
170
|
+
Heuristic Structural Vuln DB Style &
|
|
171
|
+
patterns analysis scanning correctness
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
| Layer | What It Does | Example Servers |
|
|
175
|
+
|-------|-------------|-----------------|
|
|
176
|
+
| **Judges Panel** | 18-judge quality gate — security patterns, cost, scalability, a11y, compliance, ethics | This server |
|
|
177
|
+
| **AST Analysis** | Deep structural analysis — data flow, complexity metrics, dead code, type tracking | Tree-sitter, Semgrep, SonarQube MCP servers |
|
|
178
|
+
| **CVE / SBOM** | Vulnerability scanning against live databases — known CVEs, license risks, supply chain | OSV, Snyk, Trivy, Grype MCP servers |
|
|
179
|
+
| **Linting** | Language-specific style and correctness rules | ESLint, Ruff, Clippy MCP servers |
|
|
180
|
+
| **Runtime Profiling** | Memory, CPU, latency measurement on running code | Custom profiling MCP servers |
|
|
181
|
+
|
|
182
|
+
### Why Orchestration Beats a Monolith
|
|
183
|
+
|
|
184
|
+
| | Monolith | Orchestrated MCP Stack |
|
|
185
|
+
|---|---|---|
|
|
186
|
+
| **Maintenance** | One team owns everything | Each server evolves independently |
|
|
187
|
+
| **Depth** | Shallow coverage of many domains | Deep expertise per server |
|
|
188
|
+
| **Updates** | CVE data stale = full redeploy | CVE server updates on its own |
|
|
189
|
+
| **Language support** | Must embed parsers for every language | AST server handles this |
|
|
190
|
+
| **User choice** | All or nothing | Pick the servers you need |
|
|
191
|
+
| **Offline capability** | Hard to achieve with CVE deps | Judges runs fully offline; CVE server handles network |
|
|
192
|
+
|
|
193
|
+
### What This Means in Practice
|
|
194
|
+
|
|
195
|
+
When you ask your AI assistant *"Is this code production-ready?"*, the agent can:
|
|
196
|
+
|
|
197
|
+
1. **Judges Panel** → Scan for hardcoded secrets, missing error handling, N+1 queries, accessibility gaps, compliance issues
|
|
198
|
+
2. **AST Server** → Analyze cyclomatic complexity, detect unreachable code, trace tainted data flows
|
|
199
|
+
3. **CVE Server** → Check every dependency in `package.json` against known vulnerabilities
|
|
200
|
+
4. **Linter Server** → Enforce team style rules, catch language-specific gotchas
|
|
201
|
+
|
|
202
|
+
Each server returns structured findings. The AI synthesizes everything into a single, actionable review — no single server needs to do it all.
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## MCP Tools
|
|
207
|
+
|
|
208
|
+
### `get_judges`
|
|
209
|
+
List all available judges with their domains and descriptions.
|
|
210
|
+
|
|
211
|
+
### `evaluate_code`
|
|
212
|
+
Submit code to the **full judges panel**. All 18 judges evaluate independently and return a combined verdict.
|
|
213
|
+
|
|
214
|
+
| Parameter | Type | Required | Description |
|
|
215
|
+
|-----------|------|----------|-------------|
|
|
216
|
+
| `code` | string | yes | The source code to evaluate |
|
|
217
|
+
| `language` | string | yes | Programming language (e.g., `typescript`, `python`) |
|
|
218
|
+
| `context` | string | no | Additional context about the code |
|
|
219
|
+
|
|
220
|
+
### `evaluate_code_single_judge`
|
|
221
|
+
Submit code to a **specific judge** for targeted review.
|
|
222
|
+
|
|
223
|
+
| Parameter | Type | Required | Description |
|
|
224
|
+
|-----------|------|----------|-------------|
|
|
225
|
+
| `code` | string | yes | The source code to evaluate |
|
|
226
|
+
| `language` | string | yes | Programming language |
|
|
227
|
+
| `judgeId` | string | yes | See [judge IDs](#judge-ids) below |
|
|
228
|
+
| `context` | string | no | Additional context |
|
|
229
|
+
|
|
230
|
+
#### Judge IDs
|
|
231
|
+
|
|
232
|
+
`data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias`
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## MCP Prompts
|
|
237
|
+
|
|
238
|
+
Each judge has a corresponding prompt for LLM-powered deep analysis:
|
|
239
|
+
|
|
240
|
+
| Prompt | Description |
|
|
241
|
+
|--------|-------------|
|
|
242
|
+
| `judge-data-security` | Deep data security review |
|
|
243
|
+
| `judge-cybersecurity` | Deep cybersecurity review |
|
|
244
|
+
| `judge-cost-effectiveness` | Deep cost optimization review |
|
|
245
|
+
| `judge-scalability` | Deep scalability review |
|
|
246
|
+
| `judge-cloud-readiness` | Deep cloud readiness review |
|
|
247
|
+
| `judge-software-practices` | Deep software practices review |
|
|
248
|
+
| `judge-accessibility` | Deep accessibility/WCAG review |
|
|
249
|
+
| `judge-api-design` | Deep API design review |
|
|
250
|
+
| `judge-reliability` | Deep reliability & resilience review |
|
|
251
|
+
| `judge-observability` | Deep observability & monitoring review |
|
|
252
|
+
| `judge-performance` | Deep performance optimization review |
|
|
253
|
+
| `judge-compliance` | Deep regulatory compliance review |
|
|
254
|
+
| `judge-testing` | Deep testing quality review |
|
|
255
|
+
| `judge-documentation` | Deep documentation quality review |
|
|
256
|
+
| `judge-internationalization` | Deep i18n review |
|
|
257
|
+
| `judge-dependency-health` | Deep dependency health review |
|
|
258
|
+
| `judge-concurrency` | Deep concurrency & async safety review |
|
|
259
|
+
| `judge-ethics-bias` | Deep ethics & bias review |
|
|
260
|
+
| `full-tribunal` | All 18 judges in a single prompt |
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
121
264
|
## Scoring
|
|
122
265
|
|
|
123
266
|
Each judge scores the code from **0 to 100**:
|
|
124
267
|
|
|
125
268
|
| Severity | Score Deduction |
|
|
126
269
|
|----------|----------------|
|
|
127
|
-
| Critical |
|
|
128
|
-
| High |
|
|
129
|
-
| Medium |
|
|
130
|
-
| Low |
|
|
131
|
-
| Info |
|
|
270
|
+
| Critical | −30 points |
|
|
271
|
+
| High | −18 points |
|
|
272
|
+
| Medium | −10 points |
|
|
273
|
+
| Low | −5 points |
|
|
274
|
+
| Info | −2 points |
|
|
132
275
|
|
|
133
276
|
**Verdict logic:**
|
|
134
|
-
- **FAIL** — Any critical finding, or score <
|
|
135
|
-
- **WARNING** — Any high finding, or score <
|
|
136
|
-
- **PASS** — Score ≥
|
|
277
|
+
- **FAIL** — Any critical finding, or score < 60
|
|
278
|
+
- **WARNING** — Any high finding, any medium finding, or score < 80
|
|
279
|
+
- **PASS** — Score ≥ 80 with no critical, high, or medium findings
|
|
137
280
|
|
|
138
281
|
The **overall tribunal score** is the average of all 18 judges. The overall verdict fails if **any** judge fails.
|
|
139
282
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
```
|
|
143
|
-
# Judges Panel — Verdict
|
|
144
|
-
|
|
145
|
-
**Overall Verdict: WARNING** | **Score: 68/100**
|
|
146
|
-
Total critical findings: 1 | Total high findings: 3
|
|
147
|
-
|
|
148
|
-
## Individual Judge Results
|
|
149
|
-
|
|
150
|
-
❌ **Judge Data Security** (FAIL, 60/100) — 3 finding(s)
|
|
151
|
-
⚠️ **Judge Cybersecurity** (WARNING, 76/100) — 2 finding(s)
|
|
152
|
-
✅ **Judge Cost Effectiveness** (PASS, 88/100) — 1 finding(s)
|
|
153
|
-
⚠️ **Judge Scalability** (WARNING, 70/100) — 2 finding(s)
|
|
154
|
-
✅ **Judge Cloud Readiness** (PASS, 82/100) — 1 finding(s)
|
|
155
|
-
⚠️ **Judge Software Practices** (WARNING, 72/100) — 3 finding(s)
|
|
156
|
-
```
|
|
283
|
+
---
|
|
157
284
|
|
|
158
285
|
## Project Structure
|
|
159
286
|
|
|
160
287
|
```
|
|
161
288
|
judges/
|
|
162
289
|
├── src/
|
|
163
|
-
│ ├── index.ts
|
|
164
|
-
│ ├── types.ts
|
|
165
|
-
│ ├──
|
|
166
|
-
│
|
|
290
|
+
│ ├── index.ts # MCP server entry point — tools, prompts, transport
|
|
291
|
+
│ ├── types.ts # TypeScript interfaces (Finding, JudgeEvaluation, etc.)
|
|
292
|
+
│ ├── evaluators/ # Pattern-based analysis engine for each judge
|
|
293
|
+
│ │ ├── index.ts # evaluateWithJudge(), evaluateWithTribunal()
|
|
294
|
+
│ │ ├── shared.ts # Scoring, verdict logic, markdown formatters
|
|
295
|
+
│ │ └── *.ts # One analyzer per judge (18 files)
|
|
296
|
+
│ └── judges/ # Judge definitions (id, name, domain, system prompt)
|
|
297
|
+
│ ├── index.ts # JUDGES array, getJudge(), getJudgeSummaries()
|
|
298
|
+
│ └── *.ts # One definition per judge (18 files)
|
|
299
|
+
├── examples/
|
|
300
|
+
│ ├── sample-vulnerable-api.ts # Intentionally flawed code (triggers all 18 judges)
|
|
301
|
+
│ └── demo.ts # Run: npm run demo
|
|
302
|
+
├── tests/
|
|
303
|
+
│ └── judges.test.ts # Run: npm test (184 tests)
|
|
304
|
+
├── server.json # MCP Registry manifest
|
|
167
305
|
├── package.json
|
|
168
306
|
├── tsconfig.json
|
|
169
307
|
└── README.md
|
|
170
308
|
```
|
|
171
309
|
|
|
310
|
+
---
|
|
311
|
+
|
|
312
|
+
## Scripts
|
|
313
|
+
|
|
314
|
+
| Command | Description |
|
|
315
|
+
|---------|-------------|
|
|
316
|
+
| `npm run build` | Compile TypeScript to `dist/` |
|
|
317
|
+
| `npm run dev` | Watch mode — recompile on save |
|
|
318
|
+
| `npm test` | Run the full test suite (184 tests) |
|
|
319
|
+
| `npm run demo` | Run the sample tribunal demo |
|
|
320
|
+
| `npm start` | Start the MCP server |
|
|
321
|
+
| `npm run clean` | Remove `dist/` |
|
|
322
|
+
|
|
323
|
+
---
|
|
324
|
+
|
|
172
325
|
## License
|
|
173
326
|
|
|
174
327
|
MIT
|
|
@@ -20,19 +20,19 @@ export function calculateScore(findings) {
|
|
|
20
20
|
for (const f of findings) {
|
|
21
21
|
switch (f.severity) {
|
|
22
22
|
case "critical":
|
|
23
|
-
score -=
|
|
23
|
+
score -= 30;
|
|
24
24
|
break;
|
|
25
25
|
case "high":
|
|
26
|
-
score -=
|
|
26
|
+
score -= 18;
|
|
27
27
|
break;
|
|
28
28
|
case "medium":
|
|
29
|
-
score -=
|
|
29
|
+
score -= 10;
|
|
30
30
|
break;
|
|
31
31
|
case "low":
|
|
32
|
-
score -=
|
|
32
|
+
score -= 5;
|
|
33
33
|
break;
|
|
34
34
|
case "info":
|
|
35
|
-
score -=
|
|
35
|
+
score -= 2;
|
|
36
36
|
break;
|
|
37
37
|
}
|
|
38
38
|
}
|
|
@@ -41,9 +41,9 @@ export function calculateScore(findings) {
|
|
|
41
41
|
export function deriveVerdict(findings, score) {
|
|
42
42
|
if (findings.some((f) => f.severity === "critical"))
|
|
43
43
|
return "fail";
|
|
44
|
-
if (score <
|
|
44
|
+
if (score < 60)
|
|
45
45
|
return "fail";
|
|
46
|
-
if (findings.some((f) => f.severity === "high") || score <
|
|
46
|
+
if (findings.some((f) => f.severity === "high") || findings.some((f) => f.severity === "medium") || score < 80)
|
|
47
47
|
return "warning";
|
|
48
48
|
return "pass";
|
|
49
49
|
}
|
|
@@ -57,7 +57,7 @@ export function buildSummary(judge, findings, score, verdict) {
|
|
|
57
57
|
summary += `Verdict: **${verdict.toUpperCase()}** | Score: **${score}/100**\n`;
|
|
58
58
|
summary += `Findings: ${critical} critical, ${high} high, ${medium} medium, ${low} low\n\n`;
|
|
59
59
|
if (findings.length === 0) {
|
|
60
|
-
summary += "No issues detected.
|
|
60
|
+
summary += "No pattern-based issues detected. Heuristic analysis has inherent limits — absence of findings does not guarantee the code is free of defects. Manual expert review is strongly recommended.";
|
|
61
61
|
}
|
|
62
62
|
else {
|
|
63
63
|
summary += "Key issues:\n";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/evaluators/shared.ts"],"names":[],"mappings":"AASA,gFAAgF;AAChF,2EAA2E;AAC3E,iFAAiF;AAEjF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,cAAc,CAAC,QAAmB;IAChD,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;YACnB,KAAK,UAAU;gBACb,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,QAAQ;gBACX,KAAK,IAAI,
|
|
1
|
+
{"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/evaluators/shared.ts"],"names":[],"mappings":"AASA,gFAAgF;AAChF,2EAA2E;AAC3E,iFAAiF;AAEjF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,cAAc,CAAC,QAAmB;IAChD,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;YACnB,KAAK,UAAU;gBACb,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,QAAQ;gBACX,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,KAAK;gBACR,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;QACV,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,QAAmB,EAAE,KAAa;IAC9D,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC;QAAE,OAAO,MAAM,CAAC;IACnE,IAAI,KAAK,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,GAAG,EAAE;QAAE,OAAO,SAAS,CAAC;IACjI,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,YAAY,CAC1B,KAAsB,EACtB,QAAmB,EACnB,KAAa,EACb,OAAgB;IAEhB,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC1E,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAClE,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;IACtE,MAAM,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,MAAM,CAAC;IAEhE,IAAI,OAAO,GAAG,KAAK,KAAK,CAAC,IAAI,QAAQ,KAAK,CAAC,MAAM,IAAI,CAAC;IACtD,OAAO,IAAI,cAAc,OAAO,CAAC,WAAW,EAAE,iBAAiB,KAAK,UAAU,CAAC;IAC/E,OAAO,IAAI,aAAa,QAAQ,cAAc,IAAI,UAAU,MAAM,YAAY,GAAG,UAAU,CAAC;IAE5F,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,8LAA8L,CAAC;IAC5M,CAAC;SAAM,CAAC;QACN,OAAO,IAAI,eAAe,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAC1C,EAAE,CAAC;YACF,OAAO,IAAI,MAAM,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,WAAW,IAAI,CAAC;QAC9E,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,WAA8B,EAC9B,OAAgB,EAChB,KAAa,EACb,aAAqB,EACrB,SAAiB;IAEjB,IAAI,OAAO,GAAG,8BAA8B,CAAC;IAC7C,OAAO,IAAI,sBAAsB,OAAO,CAAC,WAAW,EAAE,iBAAiB,KAAK,UAAU,CAAC;IACvF,OAAO,IAAI,4BAA4B,aAAa,2BAA2B,SAAS,MAAM,CAAC;IAC/F,OAAO,IAAI,iCAAiC,CAAC;IAE7C,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;QACpE,OAAO,IAAI,GAAG,IAAI,MAAM,CAAC,CAAC,SAAS,OAAO,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,QAAQ,CAAC,MAAM,eAAe,CAAC;IAC3H,CAAC;IAED,OAAO,IAAI,WAAW,CAAC;IAEvB,6BAA6B;IAC7B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,OAAO,IAAI,CAAC,CAAC,OAAO,GAAG,MAAM,CAAC;IAChC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,OAAwB;IAC9D,IAAI,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC;IAEzB,EAAE,IAAI,4BAA4B,CAAC;IAEnC,KAAK,MAAM,UAAU,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QAC7C,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC1C,MAAM,aAAa,GACjB,OAAO,CAAC,QAAQ,KAAK,UAAU;gBAC7B,CAAC,CAAC,aAAa;gBACf,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;oBAC7B,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,QAAQ;wBAC/B,CAAC,CAAC,WAAW;wBACb,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,KAAK;4BAC5B,CAAC,CAAC,QAAQ;4BACV,CAAC,CAAC,SAAS,CAAC;YAEhB,EAAE,IAAI,OAAO,aAAa,OAAO,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,KAAK,MAAM,CAAC;YACxE,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,MAAM,CAAC;YACnC,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1D,EAAE,IAAI,uBAAuB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YACpE,CAAC;YACD,EAAE,IAAI,uBAAuB,OAAO,CAAC,cAAc,MAAM,CAAC;YAC1D,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;gBACtB,EAAE,IAAI,kBAAkB,OAAO,CAAC,SAAS,MAAM,CAAC;YAClD,CAAC;YACD,EAAE,IAAI,SAAS,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B,CAAC,UAA2B;IACpE,IAAI,EAAE,GAAG,UAAU,CAAC,OAAO,GAAG,MAAM,CAAC;IAErC,EAAE,IAAI,0BAA0B,CAAC;IAEjC,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,aAAa,GACjB,OAAO,CAAC,QAAQ,KAAK,UAAU;YAC7B,CAAC,CAAC,aAAa;YACf,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;gBAC7B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,QAAQ;oBAC/B,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,KAAK;wBAC5B,CAAC,CAAC,QAAQ;wBACV,CAAC,CAAC,SAAS,CAAC;QAEhB,EAAE,IAAI,OAAO,aAAa,OAAO,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,KAAK,MAAM,CAAC;QACxE,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,MAAM,CAAC;QACnC,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1D,EAAE,IAAI,uBAAuB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QACpE,CAAC;QACD,EAAE,IAAI,uBAAuB,OAAO,CAAC,cAAc,MAAM,CAAC;QAC1D,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,EAAE,IAAI,kBAAkB,OAAO,CAAC,SAAS,MAAM,CAAC;QAClD,CAAC;QACD,EAAE,IAAI,SAAS,CAAC;IAClB,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -20,7 +20,7 @@ import { evaluateWithJudge, evaluateWithTribunal, formatVerdictAsMarkdown, forma
|
|
|
20
20
|
// ─── Create MCP Server ──────────────────────────────────────────────────────
|
|
21
21
|
const server = new McpServer({
|
|
22
22
|
name: "judges",
|
|
23
|
-
version: "1.
|
|
23
|
+
version: "1.2.0",
|
|
24
24
|
});
|
|
25
25
|
// ─── Tool: get_judges ────────────────────────────────────────────────────────
|
|
26
26
|
server.tool("get_judges", "List all available judges on the Agent Tribunal panel, including their areas of expertise and what they evaluate.", {}, async () => {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"accessibility.d.ts","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,
|
|
1
|
+
{"version":3,"file":"accessibility.d.ts","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAiChC,CAAC"}
|
|
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
23
23
|
- Reference specific WCAG 2.2 success criteria (e.g., "1.1.1 Non-text Content", "2.1.1 Keyboard").
|
|
24
24
|
- Indicate the WCAG conformance level impacted (A, AA, or AAA).
|
|
25
25
|
- Recommend fixes with code examples using proper ARIA patterns.
|
|
26
|
-
- Score from 0-100 where 100 means fully WCAG 2.2 AA compliant
|
|
26
|
+
- Score from 0-100 where 100 means fully WCAG 2.2 AA compliant.
|
|
27
|
+
|
|
28
|
+
ADVERSARIAL MANDATE:
|
|
29
|
+
- Your role is adversarial: assume the code has accessibility defects and actively hunt for them. Do not give the benefit of the doubt.
|
|
30
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
31
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed barriers.
|
|
32
|
+
- Absence of findings does not mean the code is accessible. It means your analysis reached its limits. State this explicitly.`,
|
|
27
33
|
};
|
|
28
34
|
//# sourceMappingURL=accessibility.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"accessibility.js","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sBAAsB;IAC9B,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"accessibility.js","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sBAAsB;IAC9B,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;8HAyB8G;CAC7H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api-design.d.ts","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,cAAc,EAAE,
|
|
1
|
+
{"version":3,"file":"api-design.d.ts","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,cAAc,EAAE,eAmC5B,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Reference REST API design guides (Google, Microsoft, Zalando API guidelines).
|
|
26
26
|
- Show corrected URL structures and response schemas in examples.
|
|
27
27
|
- Consider both API producer and consumer perspectives.
|
|
28
|
-
- Score from 0-100 where 100 means exemplary API design
|
|
28
|
+
- Score from 0-100 where 100 means exemplary API design.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the API has design flaws and actively hunt for them. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed design flaws.
|
|
34
|
+
- Absence of findings does not mean the API is well-designed. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=api-design.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api-design.js","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,cAAc,GAAoB;IAC7C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,wBAAwB;IAChC,WAAW,EACT,6KAA6K;IAC/K,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"api-design.js","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,cAAc,GAAoB;IAC7C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,wBAAwB;IAChC,WAAW,EACT,6KAA6K;IAC/K,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;gIA2BgH;CAC/H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cloud-readiness.d.ts","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,mBAAmB,EAAE,
|
|
1
|
+
{"version":3,"file":"cloud-readiness.d.ts","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,mBAAmB,EAAE,eAiCjC,CAAC"}
|
|
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
23
23
|
- Reference the 12-Factor App methodology, CNCF patterns, and Well-Architected Framework principles.
|
|
24
24
|
- Distinguish between "can run in the cloud" and "cloud-native."
|
|
25
25
|
- Recommend specific services or patterns (e.g., "Use Azure Key Vault instead of .env files in production").
|
|
26
|
-
- Score from 0-100 where 100 means fully cloud-native
|
|
26
|
+
- Score from 0-100 where 100 means fully cloud-native.
|
|
27
|
+
|
|
28
|
+
ADVERSARIAL MANDATE:
|
|
29
|
+
- Your role is adversarial: assume the code is not cloud-ready and actively hunt for problems. Do not give the benefit of the doubt.
|
|
30
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
31
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed cloud-readiness gaps.
|
|
32
|
+
- Absence of findings does not mean the code is cloud-native. It means your analysis reached its limits. State this explicitly.`,
|
|
27
33
|
};
|
|
28
34
|
//# sourceMappingURL=cloud-readiness.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cloud-readiness.js","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,mBAAmB,GAAoB;IAClD,EAAE,EAAE,iBAAiB;IACrB,IAAI,EAAE,uBAAuB;IAC7B,MAAM,EAAE,oCAAoC;IAC5C,WAAW,EACT,4JAA4J;IAC9J,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"cloud-readiness.js","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,mBAAmB,GAAoB;IAClD,EAAE,EAAE,iBAAiB;IACrB,IAAI,EAAE,uBAAuB;IAC7B,MAAM,EAAE,oCAAoC;IAC5C,WAAW,EACT,4JAA4J;IAC9J,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;gIAyBgH;CAC/H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compliance.d.ts","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,
|
|
1
|
+
{"version":3,"file":"compliance.d.ts","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,eAiC7B,CAAC"}
|
|
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
23
23
|
- Reference specific regulations and standards (SOC 2 CC6.1, PCI-DSS Req 3.4, GDPR Art. 17).
|
|
24
24
|
- Distinguish between "must comply" (legal obligation) and "should comply" (best practice).
|
|
25
25
|
- Recommend both code changes and process changes where applicable.
|
|
26
|
-
- Score from 0-100 where 100 means fully compliant
|
|
26
|
+
- Score from 0-100 where 100 means fully compliant.
|
|
27
|
+
|
|
28
|
+
ADVERSARIAL MANDATE:
|
|
29
|
+
- Your role is adversarial: assume the code has compliance gaps and actively hunt for them. Do not give the benefit of the doubt.
|
|
30
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
31
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed compliance violations.
|
|
32
|
+
- Absence of findings does not mean the code is compliant. It means your analysis reached its limits. State this explicitly.`,
|
|
27
33
|
};
|
|
28
34
|
//# sourceMappingURL=compliance.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compliance.js","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,iCAAiC;IACzC,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"compliance.js","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,iCAAiC;IACzC,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;6HAyB6G;CAC5H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,
|
|
1
|
+
{"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAmC9B,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Describe the exact sequence of events that could trigger a race condition or deadlock.
|
|
26
26
|
- Recommend specific concurrency primitives or patterns for each issue.
|
|
27
27
|
- Reference Java Concurrency in Practice, Go concurrency patterns, or Rust ownership model as applicable.
|
|
28
|
-
- Score from 0-100 where 100 means thread-safe and correctly concurrent
|
|
28
|
+
- Score from 0-100 where 100 means thread-safe and correctly concurrent.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the code has concurrency bugs and actively hunt for them. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed race conditions.
|
|
34
|
+
- Absence of findings does not mean the code is thread-safe. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=concurrency.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uIAAuI;IACzI,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uIAAuI;IACzI,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;+HA2B+G;CAC9H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cost-effectiveness.d.ts","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,
|
|
1
|
+
{"version":3,"file":"cost-effectiveness.d.ts","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,eAgCpC,CAAC"}
|
|
@@ -22,6 +22,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
22
22
|
- Quantify impact where possible (e.g. "This N+1 pattern will generate ~1000 extra queries per request at scale").
|
|
23
23
|
- Recommend specific optimizations with estimated savings.
|
|
24
24
|
- Consider both runtime cost and developer productivity cost.
|
|
25
|
-
- Score from 0-100 where 100 means optimally cost-effective
|
|
25
|
+
- Score from 0-100 where 100 means optimally cost-effective.
|
|
26
|
+
|
|
27
|
+
ADVERSARIAL MANDATE:
|
|
28
|
+
- Your role is adversarial: assume the code wastes resources and actively hunt for inefficiencies. Do not give the benefit of the doubt.
|
|
29
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
30
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed cost waste.
|
|
31
|
+
- Absence of findings does not mean the code is cost-effective. It means your analysis reached its limits. State this explicitly.`,
|
|
26
32
|
};
|
|
27
33
|
//# sourceMappingURL=cost-effectiveness.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cost-effectiveness.js","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,yCAAyC;IACjD,WAAW,EACT,sJAAsJ;IACxJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"cost-effectiveness.js","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,yCAAyC;IACjD,WAAW,EACT,sJAAsJ;IACxJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;kIAwBkH;CACjI,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cybersecurity.d.ts","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,
|
|
1
|
+
{"version":3,"file":"cybersecurity.d.ts","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAgChC,CAAC"}
|
|
@@ -22,6 +22,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
22
22
|
- Think like an attacker: describe how each vulnerability could be exploited.
|
|
23
23
|
- Provide concrete remediation steps with code examples where possible.
|
|
24
24
|
- Reference OWASP, CWE IDs, and CVE IDs where applicable.
|
|
25
|
-
- Score from 0-100 where 100 means no exploitable vulnerabilities found
|
|
25
|
+
- Score from 0-100 where 100 means no exploitable vulnerabilities found.
|
|
26
|
+
|
|
27
|
+
ADVERSARIAL MANDATE:
|
|
28
|
+
- Your role is adversarial: assume the code is vulnerable and actively hunt for exploits. Do not give the benefit of the doubt.
|
|
29
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
30
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed vulnerabilities.
|
|
31
|
+
- Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.`,
|
|
26
32
|
};
|
|
27
33
|
//# sourceMappingURL=cybersecurity.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cybersecurity.js","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,gCAAgC;IACxC,WAAW,EACT,0KAA0K;IAC5K,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"cybersecurity.js","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,gCAAgC;IACxC,WAAW,EACT,0KAA0K;IAC5K,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;0HAwB0G;CACzH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data-security.d.ts","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,iBAAiB,EAAE,
|
|
1
|
+
{"version":3,"file":"data-security.d.ts","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,iBAAiB,EAAE,eA8B/B,CAAC"}
|
|
@@ -20,6 +20,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
20
20
|
- Be specific: cite exact lines, variable names, or patterns.
|
|
21
21
|
- Always recommend a concrete fix, not just "fix this."
|
|
22
22
|
- Reference standards where applicable (OWASP, NIST 800-53, GDPR Article numbers).
|
|
23
|
-
- Score from 0-100 where 100 means fully compliant with no findings
|
|
23
|
+
- Score from 0-100 where 100 means fully compliant with no findings.
|
|
24
|
+
|
|
25
|
+
ADVERSARIAL MANDATE:
|
|
26
|
+
- Your role is adversarial: assume the code leaks or mishandles data and actively hunt for exposures. Do not give the benefit of the doubt.
|
|
27
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
28
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed data breaches.
|
|
29
|
+
- Absence of findings does not mean data is secure. It means your analysis reached its limits. State this explicitly.`,
|
|
24
30
|
};
|
|
25
31
|
//# sourceMappingURL=data-security.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"data-security.js","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,iBAAiB,GAAoB;IAChD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,8LAA8L;IAChM,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"data-security.js","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,iBAAiB,GAAoB;IAChD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,8LAA8L;IAChM,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;sHAsBsG;CACrH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dependency-health.d.ts","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,qBAAqB,EAAE,
|
|
1
|
+
{"version":3,"file":"dependency-health.d.ts","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,qBAAqB,EAAE,eAmCnC,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Reference OWASP Dependency-Check, OpenSSF Scorecard, and supply chain security best practices.
|
|
26
26
|
- Recommend specific alternatives for problematic dependencies.
|
|
27
27
|
- Distinguish between direct dependency risk and transitive dependency risk.
|
|
28
|
-
- Score from 0-100 where 100 means healthy, secure dependency tree
|
|
28
|
+
- Score from 0-100 where 100 means healthy, secure dependency tree.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the dependency tree has risks and actively hunt for them. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed supply chain risks.
|
|
34
|
+
- Absence of findings does not mean dependencies are healthy. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=dependency-health.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dependency-health.js","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,qBAAqB,GAAoB;IACpD,EAAE,EAAE,mBAAmB;IACvB,IAAI,EAAE,yBAAyB;IAC/B,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uJAAuJ;IACzJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"dependency-health.js","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,qBAAqB,GAAoB;IACpD,EAAE,EAAE,mBAAmB;IACvB,IAAI,EAAE,yBAAyB;IAC/B,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uJAAuJ;IACzJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;gIA2BgH;CAC/H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"documentation.d.ts","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,
|
|
1
|
+
{"version":3,"file":"documentation.d.ts","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAmChC,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Reference documentation best practices (Diátaxis framework, Google developer documentation style guide).
|
|
26
26
|
- Provide example documentation snippets in recommendations.
|
|
27
27
|
- Evaluate from the perspective of a new developer encountering the code for the first time.
|
|
28
|
-
- Score from 0-100 where 100 means exemplary documentation
|
|
28
|
+
- Score from 0-100 where 100 means exemplary documentation.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the documentation is inadequate and actively hunt for gaps. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed documentation gaps.
|
|
34
|
+
- Absence of findings does not mean the documentation is good. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=documentation.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"documentation.js","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sCAAsC;IAC9C,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"documentation.js","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sCAAsC;IAC9C,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;iIA2BiH;CAChI,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ethics-bias.d.ts","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,
|
|
1
|
+
{"version":3,"file":"ethics-bias.d.ts","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,eAmC7B,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Reference the EU AI Act, NIST AI RMF (AI 100-1), IEEE Ethically Aligned Design.
|
|
26
26
|
- Recommend specific fairness tools (Fairlearn, AI Fairness 360, What-If Tool).
|
|
27
27
|
- Evaluate proportionally: not all code involves AI/ML — score based on relevance.
|
|
28
|
-
- Score from 0-100 where 100 means fully ethical and bias-aware
|
|
28
|
+
- Score from 0-100 where 100 means fully ethical and bias-aware.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the code has ethical risks or bias and actively hunt for them. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed ethical violations.
|
|
34
|
+
- Absence of findings does not mean the code is ethical. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=ethics-bias.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ethics-bias.js","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,0JAA0J;IAC5J,UAAU,EAAE,QAAQ;IACpB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"ethics-bias.js","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,0JAA0J;IAC5J,UAAU,EAAE,QAAQ;IACpB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2B2G;CAC1H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"internationalization.d.ts","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,yBAAyB,EAAE,
|
|
1
|
+
{"version":3,"file":"internationalization.d.ts","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,yBAAyB,EAAE,eAiCvC,CAAC"}
|
|
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
23
23
|
- Reference Unicode standards, CLDR, W3C i18n best practices.
|
|
24
24
|
- Show corrected code using Intl APIs, ICU message format, or i18n library patterns.
|
|
25
25
|
- Consider the impact on languages with different scripts (CJK, Arabic, Thai, Devanagari).
|
|
26
|
-
- Score from 0-100 where 100 means fully internationalization-ready
|
|
26
|
+
- Score from 0-100 where 100 means fully internationalization-ready.
|
|
27
|
+
|
|
28
|
+
ADVERSARIAL MANDATE:
|
|
29
|
+
- Your role is adversarial: assume the code will break in non-English locales and actively hunt for i18n defects. Do not give the benefit of the doubt.
|
|
30
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
31
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed i18n problems.
|
|
32
|
+
- Absence of findings does not mean the code is internationalization-ready. It means your analysis reached its limits. State this explicitly.`,
|
|
27
33
|
};
|
|
28
34
|
//# sourceMappingURL=internationalization.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"internationalization.js","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,yBAAyB,GAAoB;IACxD,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,4BAA4B;IAClC,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,oJAAoJ;IACtJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"internationalization.js","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,yBAAyB,GAAoB;IACxD,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,4BAA4B;IAClC,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,oJAAoJ;IACtJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;8IAyB8H;CAC7I,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"observability.d.ts","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,
|
|
1
|
+
{"version":3,"file":"observability.d.ts","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAiChC,CAAC"}
|
|
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
23
23
|
- Reference OpenTelemetry semantic conventions and Prometheus best practices.
|
|
24
24
|
- Recommend specific instrumentation code snippets.
|
|
25
25
|
- Evaluate whether the observability data would be useful during a production incident.
|
|
26
|
-
- Score from 0-100 where 100 means fully observable and debuggable in production
|
|
26
|
+
- Score from 0-100 where 100 means fully observable and debuggable in production.
|
|
27
|
+
|
|
28
|
+
ADVERSARIAL MANDATE:
|
|
29
|
+
- Your role is adversarial: assume the code is unobservable and will be impossible to debug in production. Actively hunt for monitoring gaps. Do not give the benefit of the doubt.
|
|
30
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
31
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed observability gaps.
|
|
32
|
+
- Absence of findings does not mean the code is observable. It means your analysis reached its limits. State this explicitly.`,
|
|
27
33
|
};
|
|
28
34
|
//# sourceMappingURL=observability.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"observability.js","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,8JAA8J;IAChK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"observability.js","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,8JAA8J;IAChK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;8HAyB8G;CAC7H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"performance.d.ts","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,
|
|
1
|
+
{"version":3,"file":"performance.d.ts","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAmC9B,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Quantify impact where possible (e.g., "This creates ~10,000 objects per request that will pressure GC").
|
|
26
26
|
- Recommend specific optimizations with before/after code examples.
|
|
27
27
|
- Distinguish between premature optimization and genuine hot-path issues.
|
|
28
|
-
- Score from 0-100 where 100 means optimally performant
|
|
28
|
+
- Score from 0-100 where 100 means optimally performant.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the code has performance problems and actively hunt for bottlenecks. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed performance regressions.
|
|
34
|
+
- Absence of findings does not mean the code is performant. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=performance.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"performance.js","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,kKAAkK;IACpK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"performance.js","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,kKAAkK;IACpK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;8HA2B8G;CAC7H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reliability.d.ts","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,
|
|
1
|
+
{"version":3,"file":"reliability.d.ts","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAmC9B,CAAC"}
|
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Reference patterns from "Release It!" (Michael Nygard) and the SRE book (Google).
|
|
26
26
|
- Describe failure scenarios: "If X fails, then Y happens, causing Z impact."
|
|
27
27
|
- Recommend specific resilience libraries or patterns with configuration examples.
|
|
28
|
-
- Score from 0-100 where 100 means highly resilient and fault-tolerant
|
|
28
|
+
- Score from 0-100 where 100 means highly resilient and fault-tolerant.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the code will fail in production and actively hunt for reliability gaps. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed reliability risks.
|
|
34
|
+
- Absence of findings does not mean the code is reliable. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=reliability.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reliability.js","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"reliability.js","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;4HA2B4G;CAC3H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scalability.d.ts","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,
|
|
1
|
+
{"version":3,"file":"scalability.d.ts","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAiC9B,CAAC"}
|
|
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
23
23
|
- Think about what breaks first when traffic increases 10x or 100x.
|
|
24
24
|
- Distinguish between "works now" and "will work at scale."
|
|
25
25
|
- Recommend specific architectural patterns (CQRS, event sourcing, circuit breakers, etc.).
|
|
26
|
-
- Score from 0-100 where 100 means fully scalable with no bottlenecks
|
|
26
|
+
- Score from 0-100 where 100 means fully scalable with no bottlenecks.
|
|
27
|
+
|
|
28
|
+
ADVERSARIAL MANDATE:
|
|
29
|
+
- Your role is adversarial: assume the code will not scale and actively hunt for bottlenecks. Do not give the benefit of the doubt.
|
|
30
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
31
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed scalability limits.
|
|
32
|
+
- Absence of findings does not mean the code will scale. It means your analysis reached its limits. State this explicitly.`,
|
|
27
33
|
};
|
|
28
34
|
//# sourceMappingURL=scalability.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scalability.js","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,2BAA2B;IACnC,WAAW,EACT,+JAA+J;IACjK,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"scalability.js","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,2BAA2B;IACnC,WAAW,EACT,+JAA+J;IACjK,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;2HAyB2G;CAC1H,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"software-practices.d.ts","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,
|
|
1
|
+
{"version":3,"file":"software-practices.d.ts","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,eAmCpC,CAAC"}
|
|
@@ -22,9 +22,15 @@ YOUR EVALUATION CRITERIA:
|
|
|
22
22
|
|
|
23
23
|
RULES FOR YOUR EVALUATION:
|
|
24
24
|
- Assign rule IDs with prefix "SWDEV-" (e.g. SWDEV-001).
|
|
25
|
-
- Be
|
|
25
|
+
- Be direct: explain why the practice is a problem and what risk it introduces.
|
|
26
26
|
- Provide refactored code examples when recommending improvements.
|
|
27
27
|
- Reference Clean Code (Robert Martin), SOLID, DRY, KISS, YAGNI where applicable.
|
|
28
|
-
- Score from 0-100 where 100 means exemplary software engineering
|
|
28
|
+
- Score from 0-100 where 100 means exemplary software engineering.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the code has engineering quality problems and actively hunt for them. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed code quality issues.
|
|
34
|
+
- Absence of findings does not mean the code follows best practices. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=software-practices.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"software-practices.js","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,mDAAmD;IAC3D,WAAW,EACT,6LAA6L;IAC/L,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"software-practices.js","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,mDAAmD;IAC3D,WAAW,EACT,6LAA6L;IAC/L,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;uIA2BuH;CACtI,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"testing.d.ts","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"testing.d.ts","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,YAAY,EAAE,eAmC1B,CAAC"}
|
package/dist/judges/testing.js
CHANGED
|
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
|
|
|
25
25
|
- Reference testing best practices (Kent Beck, Martin Fowler's Test Pyramid, FIRST principles).
|
|
26
26
|
- Recommend specific test cases that should be written, with example test code.
|
|
27
27
|
- Evaluate both the tests AND the testability of the code under test.
|
|
28
|
-
- Score from 0-100 where 100 means comprehensive, well-structured test suite
|
|
28
|
+
- Score from 0-100 where 100 means comprehensive, well-structured test suite.
|
|
29
|
+
|
|
30
|
+
ADVERSARIAL MANDATE:
|
|
31
|
+
- Your role is adversarial: assume the test coverage is insufficient and actively hunt for gaps. Do not give the benefit of the doubt.
|
|
32
|
+
- Never praise or compliment the code. Report only problems, risks, and deficiencies.
|
|
33
|
+
- If you are uncertain whether something is an issue, flag it — false positives are preferred over missed testing gaps.
|
|
34
|
+
- Absence of findings does not mean the code is well-tested. It means your analysis reached its limits. State this explicitly.`,
|
|
29
35
|
};
|
|
30
36
|
//# sourceMappingURL=testing.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"testing.js","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,YAAY,GAAoB;IAC3C,EAAE,EAAE,SAAS;IACb,IAAI,EAAE,eAAe;IACrB,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,oKAAoK;IACtK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE
|
|
1
|
+
{"version":3,"file":"testing.js","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,YAAY,GAAoB;IAC3C,EAAE,EAAE,SAAS;IACb,IAAI,EAAE,eAAe;IACrB,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,oKAAoK;IACtK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;+HA2B+G;CAC9H,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kevinrabun/judges",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "18 specialized judges that evaluate AI-generated code for security, cost, and quality.",
|
|
5
5
|
"mcpName": "io.github.KevinRabun/judges",
|
|
6
6
|
"type": "module",
|
|
@@ -19,6 +19,8 @@
|
|
|
19
19
|
"start": "node dist/index.js",
|
|
20
20
|
"dev": "tsc --watch",
|
|
21
21
|
"clean": "rimraf dist",
|
|
22
|
+
"test": "npx tsx --test tests/judges.test.ts",
|
|
23
|
+
"demo": "npx tsx examples/demo.ts",
|
|
22
24
|
"prepublishOnly": "npm run build"
|
|
23
25
|
},
|
|
24
26
|
"keywords": [
|
|
@@ -48,6 +50,7 @@
|
|
|
48
50
|
},
|
|
49
51
|
"devDependencies": {
|
|
50
52
|
"@types/node": "^25.3.0",
|
|
53
|
+
"tsx": "^4.19.4",
|
|
51
54
|
"typescript": "^5.9.3"
|
|
52
55
|
}
|
|
53
56
|
}
|
package/server.json
CHANGED
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
"url": "https://github.com/kevinrabun/judges",
|
|
8
8
|
"source": "github"
|
|
9
9
|
},
|
|
10
|
-
"version": "1.0
|
|
10
|
+
"version": "1.2.0",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registryType": "npm",
|
|
14
14
|
"identifier": "@kevinrabun/judges",
|
|
15
|
-
"version": "1.0
|
|
15
|
+
"version": "1.2.0",
|
|
16
16
|
"transport": {
|
|
17
17
|
"type": "stdio"
|
|
18
18
|
}
|