@kevinrabun/judges 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +264 -111
  2. package/dist/evaluators/shared.js +8 -8
  3. package/dist/evaluators/shared.js.map +1 -1
  4. package/dist/index.js +1 -1
  5. package/dist/judges/accessibility.d.ts.map +1 -1
  6. package/dist/judges/accessibility.js +7 -1
  7. package/dist/judges/accessibility.js.map +1 -1
  8. package/dist/judges/api-design.d.ts.map +1 -1
  9. package/dist/judges/api-design.js +7 -1
  10. package/dist/judges/api-design.js.map +1 -1
  11. package/dist/judges/cloud-readiness.d.ts.map +1 -1
  12. package/dist/judges/cloud-readiness.js +7 -1
  13. package/dist/judges/cloud-readiness.js.map +1 -1
  14. package/dist/judges/compliance.d.ts.map +1 -1
  15. package/dist/judges/compliance.js +7 -1
  16. package/dist/judges/compliance.js.map +1 -1
  17. package/dist/judges/concurrency.d.ts.map +1 -1
  18. package/dist/judges/concurrency.js +7 -1
  19. package/dist/judges/concurrency.js.map +1 -1
  20. package/dist/judges/cost-effectiveness.d.ts.map +1 -1
  21. package/dist/judges/cost-effectiveness.js +7 -1
  22. package/dist/judges/cost-effectiveness.js.map +1 -1
  23. package/dist/judges/cybersecurity.d.ts.map +1 -1
  24. package/dist/judges/cybersecurity.js +7 -1
  25. package/dist/judges/cybersecurity.js.map +1 -1
  26. package/dist/judges/data-security.d.ts.map +1 -1
  27. package/dist/judges/data-security.js +7 -1
  28. package/dist/judges/data-security.js.map +1 -1
  29. package/dist/judges/dependency-health.d.ts.map +1 -1
  30. package/dist/judges/dependency-health.js +7 -1
  31. package/dist/judges/dependency-health.js.map +1 -1
  32. package/dist/judges/documentation.d.ts.map +1 -1
  33. package/dist/judges/documentation.js +7 -1
  34. package/dist/judges/documentation.js.map +1 -1
  35. package/dist/judges/ethics-bias.d.ts.map +1 -1
  36. package/dist/judges/ethics-bias.js +7 -1
  37. package/dist/judges/ethics-bias.js.map +1 -1
  38. package/dist/judges/internationalization.d.ts.map +1 -1
  39. package/dist/judges/internationalization.js +7 -1
  40. package/dist/judges/internationalization.js.map +1 -1
  41. package/dist/judges/observability.d.ts.map +1 -1
  42. package/dist/judges/observability.js +7 -1
  43. package/dist/judges/observability.js.map +1 -1
  44. package/dist/judges/performance.d.ts.map +1 -1
  45. package/dist/judges/performance.js +7 -1
  46. package/dist/judges/performance.js.map +1 -1
  47. package/dist/judges/reliability.d.ts.map +1 -1
  48. package/dist/judges/reliability.js +7 -1
  49. package/dist/judges/reliability.js.map +1 -1
  50. package/dist/judges/scalability.d.ts.map +1 -1
  51. package/dist/judges/scalability.js +7 -1
  52. package/dist/judges/scalability.js.map +1 -1
  53. package/dist/judges/software-practices.d.ts.map +1 -1
  54. package/dist/judges/software-practices.js +8 -2
  55. package/dist/judges/software-practices.js.map +1 -1
  56. package/dist/judges/testing.d.ts.map +1 -1
  57. package/dist/judges/testing.js +7 -1
  58. package/dist/judges/testing.js.map +1 -1
  59. package/package.json +4 -1
  60. package/server.json +2 -2
package/README.md CHANGED
@@ -2,173 +2,326 @@
2
2
 
3
3
  An MCP (Model Context Protocol) server that provides a panel of **18 specialized judges** to evaluate AI-generated code — acting as an independent quality gate regardless of which project is being reviewed.
4
4
 
5
- ## The Judge Panel
6
-
7
- | Judge | Domain | Rule Prefix | What It Evaluates |
8
- |-------|--------|-------------|-------------------|
9
- | **Judge Data Security** | Data Security & Privacy | `DATA-` | Encryption, PII handling, secrets management, access controls, GDPR/CCPA/HIPAA compliance |
10
- | **Judge Cybersecurity** | Cybersecurity & Threat Defense | `CYBER-` | Injection attacks, XSS, CSRF, auth flaws, dependency CVEs, OWASP Top 10 |
11
- | **Judge Cost Effectiveness** | Cost Optimization | `COST-` | Algorithm efficiency, N+1 queries, memory waste, caching strategy, cloud spend |
12
- | **Judge Scalability** | Scalability & Performance | `SCALE-` | Statelessness, horizontal scaling, concurrency, bottlenecks, rate limiting |
13
- | **Judge Cloud Readiness** | Cloud-Native & DevOps | `CLOUD-` | 12-Factor compliance, containerization, observability, graceful shutdown, IaC |
14
- | **Judge Software Practices** | Engineering Best Practices | `SWDEV-` | SOLID principles, type safety, error handling, testing, input validation, clean code |
15
- | **Judge Accessibility** | Accessibility (a11y) | `A11Y-` | WCAG compliance, screen reader support, keyboard navigation, ARIA, color contrast |
16
- | **Judge API Design** | API Design & Contracts | `API-` | REST conventions, versioning, pagination, error responses, consistency |
17
- | **Judge Reliability** | Reliability & Resilience | `REL-` | Error handling, timeouts, retries, circuit breakers, graceful degradation |
18
- | **Judge Observability** | Observability & Monitoring | `OBS-` | Structured logging, health checks, metrics, tracing, correlation IDs |
19
- | **Judge Performance** | Performance & Efficiency | `PERF-` | N+1 queries, sync I/O, caching, memory leaks, algorithmic complexity |
20
- | **Judge Compliance** | Regulatory Compliance | `COMP-` | GDPR/CCPA, PII protection, consent, data retention, audit trails |
21
- | **Judge Testing** | Testing & Quality Assurance | `TEST-` | Test coverage, assertions, test isolation, naming, external dependencies |
22
- | **Judge Documentation** | Documentation & Readability | `DOC-` | JSDoc/docstrings, magic numbers, TODOs, code comments, module docs |
23
- | **Judge Internationalization** | Internationalization (i18n) | `I18N-` | Hardcoded strings, locale handling, currency formatting, RTL support |
24
- | **Judge Dependency Health** | Dependency Management | `DEPS-` | Version pinning, deprecated packages, supply chain, import hygiene |
25
- | **Judge Concurrency** | Concurrency & Async Safety | `CONC-` | Race conditions, unbounded parallelism, missing await, resource cleanup |
26
- | **Judge Ethics & Bias** | Ethics & Bias | `ETHICS-` | Demographic logic, explainability, dark patterns, inclusive language |
27
-
28
- ## How It Works
5
+ [![CI](https://github.com/KevinRabun/judges/actions/workflows/ci.yml/badge.svg)](https://github.com/KevinRabun/judges/actions/workflows/ci.yml)
6
+ [![npm](https://img.shields.io/npm/v/@kevinrabun/judges)](https://www.npmjs.com/package/@kevinrabun/judges)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
29
8
 
30
- The tribunal operates in two modes:
31
-
32
- 1. **Pattern-Based Analysis (Tools)** — The `evaluate_code` and `evaluate_code_single_judge` tools perform heuristic analysis using pattern matching to catch common anti-patterns. This works entirely offline with zero external API calls.
9
+ ---
33
10
 
34
- 2. **LLM-Powered Deep Analysis (Prompts)** — The server exposes MCP prompts (`judge-data-security`, `judge-cybersecurity`, etc., and `full-tribunal`) that provide each judge's expert persona as a system prompt. When used by an LLM-based client, this enables much deeper, context-aware analysis.
11
+ ## Quick Start
35
12
 
36
- ## MCP Tools
13
+ ### 1. Install and Build
37
14
 
38
- ### `get_judges`
39
- List all available judges with their domains and descriptions.
15
+ ```bash
16
+ git clone https://github.com/KevinRabun/judges.git
17
+ cd judges
18
+ npm install
19
+ npm run build
20
+ ```
40
21
 
41
- ### `evaluate_code`
42
- Submit code to the **full judges panel**. All 18 judges evaluate independently and return a combined verdict.
22
+ ### 2. Try the Demo
43
23
 
44
- **Parameters:**
45
- - `code` (string, required) — The source code to evaluate
46
- - `language` (string, required) — Programming language (e.g., "typescript", "python")
47
- - `context` (string, optional) — Additional context about the code
24
+ Run the included demo to see all 18 judges evaluate a purposely flawed API server:
48
25
 
49
- **Returns:** Combined verdict with overall score, per-judge scores, all findings, and recommendations.
26
+ ```bash
27
+ npm run demo
28
+ ```
50
29
 
51
- ### `evaluate_code_single_judge`
52
- Submit code to a **specific judge** for targeted review.
30
+ This evaluates [`examples/sample-vulnerable-api.ts`](examples/sample-vulnerable-api.ts) — a file intentionally packed with security holes, performance anti-patterns, and code quality issues — and prints a full verdict with per-judge scores and findings.
53
31
 
54
- **Parameters:**
55
- - `code` (string, required) — The source code to evaluate
56
- - `language` (string, required) — Programming language
57
- - `judgeId` (string, required) — One of: `data-security`, `cybersecurity`, `cost-effectiveness`, `scalability`, `cloud-readiness`, `software-practices`, `accessibility`, `api-design`, `reliability`, `observability`, `performance`, `compliance`, `testing`, `documentation`, `internationalization`, `dependency-health`, `concurrency`, `ethics-bias`
58
- - `context` (string, optional) — Additional context
32
+ **What you'll see:**
59
33
 
60
- ## MCP Prompts
34
+ ```
35
+ ╔══════════════════════════════════════════════════════════════╗
36
+ ║ Judges Panel — Full Tribunal Demo ║
37
+ ╚══════════════════════════════════════════════════════════════╝
38
+
39
+ Overall Verdict : FAIL
40
+ Overall Score : 43/100
41
+ Critical Issues : 15
42
+ High Issues : 17
43
+ Total Findings : 83
44
+ Judges Run : 18
45
+
46
+ Per-Judge Breakdown:
47
+ ────────────────────────────────────────────────────────────────
48
+ ❌ Judge Data Security 0/100 7 finding(s)
49
+ ❌ Judge Cybersecurity 0/100 7 finding(s)
50
+ ❌ Judge Cost Effectiveness 52/100 5 finding(s)
51
+ ⚠️ Judge Scalability 65/100 4 finding(s)
52
+ ❌ Judge Cloud Readiness 61/100 4 finding(s)
53
+ ❌ Judge Software Practices 45/100 6 finding(s)
54
+ ❌ Judge Accessibility 0/100 8 finding(s)
55
+ ❌ Judge API Design 0/100 9 finding(s)
56
+ ❌ Judge Reliability 54/100 3 finding(s)
57
+ ❌ Judge Observability 45/100 5 finding(s)
58
+ ❌ Judge Performance 27/100 5 finding(s)
59
+ ❌ Judge Compliance 0/100 4 finding(s)
60
+ ⚠️ Judge Testing 90/100 1 finding(s)
61
+ ⚠️ Judge Documentation 70/100 4 finding(s)
62
+ ⚠️ Judge Internationalization 65/100 4 finding(s)
63
+ ⚠️ Judge Dependency Health 90/100 1 finding(s)
64
+ ❌ Judge Concurrency 44/100 4 finding(s)
65
+ ❌ Judge Ethics & Bias 65/100 2 finding(s)
66
+ ```
61
67
 
62
- - `judge-data-security` Deep data security review via LLM
63
- - `judge-cybersecurity` — Deep cybersecurity review via LLM
64
- - `judge-cost-effectiveness` — Deep cost optimization review via LLM
65
- - `judge-scalability` — Deep scalability review via LLM
66
- - `judge-cloud-readiness` — Deep cloud readiness review via LLM
67
- - `judge-software-practices` — Deep software practices review via LLM
68
- - `judge-accessibility` — Deep accessibility/WCAG review via LLM
69
- - `judge-api-design` — Deep API design review via LLM
70
- - `judge-reliability` — Deep reliability & resilience review via LLM
71
- - `judge-observability` — Deep observability & monitoring review via LLM
72
- - `judge-performance` — Deep performance optimization review via LLM
73
- - `judge-compliance` — Deep regulatory compliance review via LLM
74
- - `judge-testing` — Deep testing quality review via LLM
75
- - `judge-documentation` — Deep documentation quality review via LLM
76
- - `judge-internationalization` — Deep i18n review via LLM
77
- - `judge-dependency-health` — Deep dependency health review via LLM
78
- - `judge-concurrency` — Deep concurrency & async safety review via LLM
79
- - `judge-ethics-bias` — Deep ethics & bias review via LLM
80
- - `full-tribunal` — All 18 judges via LLM in a single prompt
81
-
82
- ## Setup
83
-
84
- ### Build
68
+ ### 3. Run the Tests
85
69
 
86
70
  ```bash
87
- npm install
88
- npm run build
71
+ npm test
89
72
  ```
90
73
 
91
- ### Configure in VS Code (GitHub Copilot / Claude Desktop)
74
+ Runs 184 automated tests covering all 18 judges, markdown formatters, and edge cases.
92
75
 
93
- Add to your MCP settings (`.vscode/mcp.json`, `claude_desktop_config.json`, etc.):
76
+ ### 4. Connect to Your Editor
77
+
78
+ Add the Judges Panel as an MCP server so your AI coding assistant can use it automatically.
79
+
80
+ **VS Code** — create `.vscode/mcp.json` in your project:
94
81
 
95
82
  ```json
96
83
  {
97
- "mcpServers": {
84
+ "servers": {
98
85
  "judges": {
99
86
  "command": "node",
100
- "args": ["<path-to>/judges/dist/index.js"]
87
+ "args": ["/absolute/path/to/judges/dist/index.js"]
101
88
  }
102
89
  }
103
90
  }
104
91
  ```
105
92
 
106
- ### Configure in VS Code Settings (settings.json)
93
+ **Claude Desktop** add to `claude_desktop_config.json`:
107
94
 
108
95
  ```json
109
96
  {
110
- "mcp": {
111
- "servers": {
112
- "judges": {
113
- "command": "node",
114
- "args": ["<path-to>/judges/dist/index.js"]
115
- }
97
+ "mcpServers": {
98
+ "judges": {
99
+ "command": "node",
100
+ "args": ["/absolute/path/to/judges/dist/index.js"]
116
101
  }
117
102
  }
118
103
  }
119
104
  ```
120
105
 
106
+ **Or install from npm** instead of cloning:
107
+
108
+ ```bash
109
+ npm install -g @kevinrabun/judges
110
+ ```
111
+
112
+ Then use `judges` as the command in your MCP config (no `args` needed).
113
+
114
+ ---
115
+
116
+ ## The Judge Panel
117
+
118
+ | Judge | Domain | Rule Prefix | What It Evaluates |
119
+ |-------|--------|-------------|-------------------|
120
+ | **Data Security** | Data Security & Privacy | `DATA-` | Encryption, PII handling, secrets management, access controls |
121
+ | **Cybersecurity** | Cybersecurity & Threat Defense | `CYBER-` | Injection attacks, XSS, CSRF, auth flaws, OWASP Top 10 |
122
+ | **Cost Effectiveness** | Cost Optimization | `COST-` | Algorithm efficiency, N+1 queries, memory waste, caching strategy |
123
+ | **Scalability** | Scalability & Performance | `SCALE-` | Statelessness, horizontal scaling, concurrency, bottlenecks |
124
+ | **Cloud Readiness** | Cloud-Native & DevOps | `CLOUD-` | 12-Factor compliance, containerization, graceful shutdown, IaC |
125
+ | **Software Practices** | Engineering Best Practices | `SWDEV-` | SOLID principles, type safety, error handling, input validation |
126
+ | **Accessibility** | Accessibility (a11y) | `A11Y-` | WCAG compliance, screen reader support, keyboard navigation, ARIA |
127
+ | **API Design** | API Design & Contracts | `API-` | REST conventions, versioning, pagination, error responses |
128
+ | **Reliability** | Reliability & Resilience | `REL-` | Error handling, timeouts, retries, circuit breakers |
129
+ | **Observability** | Observability & Monitoring | `OBS-` | Structured logging, health checks, metrics, tracing |
130
+ | **Performance** | Performance & Efficiency | `PERF-` | N+1 queries, sync I/O, caching, memory leaks |
131
+ | **Compliance** | Regulatory Compliance | `COMP-` | GDPR/CCPA, PII protection, consent, data retention, audit trails |
132
+ | **Testing** | Testing & Quality Assurance | `TEST-` | Test coverage, assertions, test isolation, naming |
133
+ | **Documentation** | Documentation & Readability | `DOC-` | JSDoc/docstrings, magic numbers, TODOs, code comments |
134
+ | **Internationalization** | Internationalization (i18n) | `I18N-` | Hardcoded strings, locale handling, currency formatting |
135
+ | **Dependency Health** | Dependency Management | `DEPS-` | Version pinning, deprecated packages, supply chain |
136
+ | **Concurrency** | Concurrency & Async Safety | `CONC-` | Race conditions, unbounded parallelism, missing await |
137
+ | **Ethics & Bias** | Ethics & Bias | `ETHICS-` | Demographic logic, dark patterns, inclusive language |
138
+
139
+ ---
140
+
141
+ ## How It Works
142
+
143
+ The tribunal operates in two modes:
144
+
145
+ 1. **Pattern-Based Analysis (Tools)** — The `evaluate_code` and `evaluate_code_single_judge` tools perform heuristic analysis using pattern matching to catch common anti-patterns. This works entirely offline with zero external API calls.
146
+
147
+ 2. **LLM-Powered Deep Analysis (Prompts)** — The server exposes MCP prompts (e.g., `judge-data-security`, `full-tribunal`) that provide each judge's expert persona as a system prompt. When used by an LLM-based client, this enables deeper, context-aware analysis beyond what pattern matching can detect.
148
+
149
+ ---
150
+
151
+ ## Composable by Design
152
+
153
+ Judges Panel is intentionally focused on **heuristic pattern detection** — fast, offline, zero-dependency. It does not try to be an AST parser, a CVE scanner, or a linter. Those capabilities belong in dedicated MCP servers that an AI agent can orchestrate alongside Judges.
154
+
155
+ ### Recommended MCP Stack
156
+
157
+ When your AI coding assistant connects to multiple MCP servers, each one contributes its specialty:
158
+
159
+ ```
160
+ ┌─────────────────────────────────────────────────────────┐
161
+ │ AI Coding Assistant │
162
+ │ (Claude, Copilot, Cursor, etc.) │
163
+ └──────┬──────────┬──────────┬──────────┬────────────────┘
164
+ │ │ │ │
165
+ ▼ ▼ ▼ ▼
166
+ ┌─────────┐ ┌────────┐ ┌────────┐ ┌────────┐
167
+ │ Judges │ │ AST │ │ CVE / │ │ Linter │
168
+ │ Panel │ │ Server │ │ SBOM │ │ Server │
169
+ └─────────┘ └────────┘ └────────┘ └────────┘
170
+ Heuristic Structural Vuln DB Style &
171
+ patterns analysis scanning correctness
172
+ ```
173
+
174
+ | Layer | What It Does | Example Servers |
175
+ |-------|-------------|-----------------|
176
+ | **Judges Panel** | 18-judge quality gate — security patterns, cost, scalability, a11y, compliance, ethics | This server |
177
+ | **AST Analysis** | Deep structural analysis — data flow, complexity metrics, dead code, type tracking | Tree-sitter, Semgrep, SonarQube MCP servers |
178
+ | **CVE / SBOM** | Vulnerability scanning against live databases — known CVEs, license risks, supply chain | OSV, Snyk, Trivy, Grype MCP servers |
179
+ | **Linting** | Language-specific style and correctness rules | ESLint, Ruff, Clippy MCP servers |
180
+ | **Runtime Profiling** | Memory, CPU, latency measurement on running code | Custom profiling MCP servers |
181
+
182
+ ### Why Orchestration Beats a Monolith
183
+
184
+ | | Monolith | Orchestrated MCP Stack |
185
+ |---|---|---|
186
+ | **Maintenance** | One team owns everything | Each server evolves independently |
187
+ | **Depth** | Shallow coverage of many domains | Deep expertise per server |
188
+ | **Updates** | CVE data stale = full redeploy | CVE server updates on its own |
189
+ | **Language support** | Must embed parsers for every language | AST server handles this |
190
+ | **User choice** | All or nothing | Pick the servers you need |
191
+ | **Offline capability** | Hard to achieve with CVE deps | Judges runs fully offline; CVE server handles network |
192
+
193
+ ### What This Means in Practice
194
+
195
+ When you ask your AI assistant *"Is this code production-ready?"*, the agent can:
196
+
197
+ 1. **Judges Panel** → Scan for hardcoded secrets, missing error handling, N+1 queries, accessibility gaps, compliance issues
198
+ 2. **AST Server** → Analyze cyclomatic complexity, detect unreachable code, trace tainted data flows
199
+ 3. **CVE Server** → Check every dependency in `package.json` against known vulnerabilities
200
+ 4. **Linter Server** → Enforce team style rules, catch language-specific gotchas
201
+
202
+ Each server returns structured findings. The AI synthesizes everything into a single, actionable review — no single server needs to do it all.
203
+
204
+ ---
205
+
206
+ ## MCP Tools
207
+
208
+ ### `get_judges`
209
+ List all available judges with their domains and descriptions.
210
+
211
+ ### `evaluate_code`
212
+ Submit code to the **full judges panel**. All 18 judges evaluate independently and return a combined verdict.
213
+
214
+ | Parameter | Type | Required | Description |
215
+ |-----------|------|----------|-------------|
216
+ | `code` | string | yes | The source code to evaluate |
217
+ | `language` | string | yes | Programming language (e.g., `typescript`, `python`) |
218
+ | `context` | string | no | Additional context about the code |
219
+
220
+ ### `evaluate_code_single_judge`
221
+ Submit code to a **specific judge** for targeted review.
222
+
223
+ | Parameter | Type | Required | Description |
224
+ |-----------|------|----------|-------------|
225
+ | `code` | string | yes | The source code to evaluate |
226
+ | `language` | string | yes | Programming language |
227
+ | `judgeId` | string | yes | See [judge IDs](#judge-ids) below |
228
+ | `context` | string | no | Additional context |
229
+
230
+ #### Judge IDs
231
+
232
+ `data-security` · `cybersecurity` · `cost-effectiveness` · `scalability` · `cloud-readiness` · `software-practices` · `accessibility` · `api-design` · `reliability` · `observability` · `performance` · `compliance` · `testing` · `documentation` · `internationalization` · `dependency-health` · `concurrency` · `ethics-bias`
233
+
234
+ ---
235
+
236
+ ## MCP Prompts
237
+
238
+ Each judge has a corresponding prompt for LLM-powered deep analysis:
239
+
240
+ | Prompt | Description |
241
+ |--------|-------------|
242
+ | `judge-data-security` | Deep data security review |
243
+ | `judge-cybersecurity` | Deep cybersecurity review |
244
+ | `judge-cost-effectiveness` | Deep cost optimization review |
245
+ | `judge-scalability` | Deep scalability review |
246
+ | `judge-cloud-readiness` | Deep cloud readiness review |
247
+ | `judge-software-practices` | Deep software practices review |
248
+ | `judge-accessibility` | Deep accessibility/WCAG review |
249
+ | `judge-api-design` | Deep API design review |
250
+ | `judge-reliability` | Deep reliability & resilience review |
251
+ | `judge-observability` | Deep observability & monitoring review |
252
+ | `judge-performance` | Deep performance optimization review |
253
+ | `judge-compliance` | Deep regulatory compliance review |
254
+ | `judge-testing` | Deep testing quality review |
255
+ | `judge-documentation` | Deep documentation quality review |
256
+ | `judge-internationalization` | Deep i18n review |
257
+ | `judge-dependency-health` | Deep dependency health review |
258
+ | `judge-concurrency` | Deep concurrency & async safety review |
259
+ | `judge-ethics-bias` | Deep ethics & bias review |
260
+ | `full-tribunal` | All 18 judges in a single prompt |
261
+
262
+ ---
263
+
121
264
  ## Scoring
122
265
 
123
266
  Each judge scores the code from **0 to 100**:
124
267
 
125
268
  | Severity | Score Deduction |
126
269
  |----------|----------------|
127
- | Critical | -20 points |
128
- | High | -12 points |
129
- | Medium | -6 points |
130
- | Low | -3 points |
131
- | Info | 0 points |
270
+ | Critical | −30 points |
271
+ | High | −18 points |
272
+ | Medium | −10 points |
273
+ | Low | −5 points |
274
+ | Info | −2 points |
132
275
 
133
276
  **Verdict logic:**
134
- - **FAIL** — Any critical finding, or score < 50
135
- - **WARNING** — Any high finding, or score < 75
136
- - **PASS** — Score ≥ 75 with no critical or high findings
277
+ - **FAIL** — Any critical finding, or score < 60
278
+ - **WARNING** — Any high finding, any medium finding, or score < 80
279
+ - **PASS** — Score ≥ 80 with no critical, high, or medium findings
137
280
 
138
281
  The **overall tribunal score** is the average of all 18 judges. The overall verdict fails if **any** judge fails.
139
282
 
140
- ## Example Output
141
-
142
- ```
143
- # Judges Panel — Verdict
144
-
145
- **Overall Verdict: WARNING** | **Score: 68/100**
146
- Total critical findings: 1 | Total high findings: 3
147
-
148
- ## Individual Judge Results
149
-
150
- ❌ **Judge Data Security** (FAIL, 60/100) — 3 finding(s)
151
- ⚠️ **Judge Cybersecurity** (WARNING, 76/100) — 2 finding(s)
152
- ✅ **Judge Cost Effectiveness** (PASS, 88/100) — 1 finding(s)
153
- ⚠️ **Judge Scalability** (WARNING, 70/100) — 2 finding(s)
154
- ✅ **Judge Cloud Readiness** (PASS, 82/100) — 1 finding(s)
155
- ⚠️ **Judge Software Practices** (WARNING, 72/100) — 3 finding(s)
156
- ```
283
+ ---
157
284
 
158
285
  ## Project Structure
159
286
 
160
287
  ```
161
288
  judges/
162
289
  ├── src/
163
- │ ├── index.ts # MCP server entry point — tools, prompts, transport
164
- │ ├── types.ts # TypeScript interfaces for judges, findings, verdicts
165
- │ ├── judges.ts # Judge definitions with expert system prompts
166
- └── evaluator.ts # Pattern-based analysis engine + scoring
290
+ │ ├── index.ts # MCP server entry point — tools, prompts, transport
291
+ │ ├── types.ts # TypeScript interfaces (Finding, JudgeEvaluation, etc.)
292
+ │ ├── evaluators/ # Pattern-based analysis engine for each judge
293
+ │ ├── index.ts # evaluateWithJudge(), evaluateWithTribunal()
294
+ │ │ ├── shared.ts # Scoring, verdict logic, markdown formatters
295
+ │ │ └── *.ts # One analyzer per judge (18 files)
296
+ │ └── judges/ # Judge definitions (id, name, domain, system prompt)
297
+ │ ├── index.ts # JUDGES array, getJudge(), getJudgeSummaries()
298
+ │ └── *.ts # One definition per judge (18 files)
299
+ ├── examples/
300
+ │ ├── sample-vulnerable-api.ts # Intentionally flawed code (triggers all 18 judges)
301
+ │ └── demo.ts # Run: npm run demo
302
+ ├── tests/
303
+ │ └── judges.test.ts # Run: npm test (184 tests)
304
+ ├── server.json # MCP Registry manifest
167
305
  ├── package.json
168
306
  ├── tsconfig.json
169
307
  └── README.md
170
308
  ```
171
309
 
310
+ ---
311
+
312
+ ## Scripts
313
+
314
+ | Command | Description |
315
+ |---------|-------------|
316
+ | `npm run build` | Compile TypeScript to `dist/` |
317
+ | `npm run dev` | Watch mode — recompile on save |
318
+ | `npm test` | Run the full test suite (184 tests) |
319
+ | `npm run demo` | Run the sample tribunal demo |
320
+ | `npm start` | Start the MCP server |
321
+ | `npm run clean` | Remove `dist/` |
322
+
323
+ ---
324
+
172
325
  ## License
173
326
 
174
327
  MIT
@@ -20,19 +20,19 @@ export function calculateScore(findings) {
20
20
  for (const f of findings) {
21
21
  switch (f.severity) {
22
22
  case "critical":
23
- score -= 20;
23
+ score -= 30;
24
24
  break;
25
25
  case "high":
26
- score -= 12;
26
+ score -= 18;
27
27
  break;
28
28
  case "medium":
29
- score -= 6;
29
+ score -= 10;
30
30
  break;
31
31
  case "low":
32
- score -= 3;
32
+ score -= 5;
33
33
  break;
34
34
  case "info":
35
- score -= 0;
35
+ score -= 2;
36
36
  break;
37
37
  }
38
38
  }
@@ -41,9 +41,9 @@ export function calculateScore(findings) {
41
41
  export function deriveVerdict(findings, score) {
42
42
  if (findings.some((f) => f.severity === "critical"))
43
43
  return "fail";
44
- if (score < 50)
44
+ if (score < 60)
45
45
  return "fail";
46
- if (findings.some((f) => f.severity === "high") || score < 75)
46
+ if (findings.some((f) => f.severity === "high") || findings.some((f) => f.severity === "medium") || score < 80)
47
47
  return "warning";
48
48
  return "pass";
49
49
  }
@@ -57,7 +57,7 @@ export function buildSummary(judge, findings, score, verdict) {
57
57
  summary += `Verdict: **${verdict.toUpperCase()}** | Score: **${score}/100**\n`;
58
58
  summary += `Findings: ${critical} critical, ${high} high, ${medium} medium, ${low} low\n\n`;
59
59
  if (findings.length === 0) {
60
- summary += "No issues detected. The code meets the expected standards for this domain.";
60
+ summary += "No pattern-based issues detected. Heuristic analysis has inherent limits — absence of findings does not guarantee the code is free of defects. Manual expert review is strongly recommended.";
61
61
  }
62
62
  else {
63
63
  summary += "Key issues:\n";
@@ -1 +1 @@
1
- {"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/evaluators/shared.ts"],"names":[],"mappings":"AASA,gFAAgF;AAChF,2EAA2E;AAC3E,iFAAiF;AAEjF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,cAAc,CAAC,QAAmB;IAChD,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;YACnB,KAAK,UAAU;gBACb,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,QAAQ;gBACX,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;YACR,KAAK,KAAK;gBACR,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;QACV,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,QAAmB,EAAE,KAAa;IAC9D,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC;QAAE,OAAO,MAAM,CAAC;IACnE,IAAI,KAAK,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,IAAI,KAAK,GAAG,EAAE;QAAE,OAAO,SAAS,CAAC;IAChF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,YAAY,CAC1B,KAAsB,EACtB,QAAmB,EACnB,KAAa,EACb,OAAgB;IAEhB,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC1E,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAClE,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;IACtE,MAAM,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,MAAM,CAAC;IAEhE,IAAI,OAAO,GAAG,KAAK,KAAK,CAAC,IAAI,QAAQ,KAAK,CAAC,MAAM,IAAI,CAAC;IACtD,OAAO,IAAI,cAAc,OAAO,CAAC,WAAW,EAAE,iBAAiB,KAAK,UAAU,CAAC;IAC/E,OAAO,IAAI,aAAa,QAAQ,cAAc,IAAI,UAAU,MAAM,YAAY,GAAG,UAAU,CAAC;IAE5F,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,4EAA4E,CAAC;IAC1F,CAAC;SAAM,CAAC;QACN,OAAO,IAAI,eAAe,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAC1C,EAAE,CAAC;YACF,OAAO,IAAI,MAAM,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,WAAW,IAAI,CAAC;QAC9E,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,WAA8B,EAC9B,OAAgB,EAChB,KAAa,EACb,aAAqB,EACrB,SAAiB;IAEjB,IAAI,OAAO,GAAG,8BAA8B,CAAC;IAC7C,OAAO,IAAI,sBAAsB,OAAO,CAAC,WAAW,EAAE,iBAAiB,KAAK,UAAU,CAAC;IACvF,OAAO,IAAI,4BAA4B,aAAa,2BAA2B,SAAS,MAAM,CAAC;IAC/F,OAAO,IAAI,iCAAiC,CAAC;IAE7C,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;QACpE,OAAO,IAAI,GAAG,IAAI,MAAM,CAAC,CAAC,SAAS,OAAO,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,QAAQ,CAAC,MAAM,eAAe,CAAC;IAC3H,CAAC;IAED,OAAO,IAAI,WAAW,CAAC;IAEvB,6BAA6B;IAC7B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,OAAO,IAAI,CAAC,CAAC,OAAO,GAAG,MAAM,CAAC;IAChC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,OAAwB;IAC9D,IAAI,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC;IAEzB,EAAE,IAAI,4BAA4B,CAAC;IAEnC,KAAK,MAAM,UAAU,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QAC7C,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC1C,MAAM,aAAa,GACjB,OAAO,CAAC,QAAQ,KAAK,UAAU;gBAC7B,CAAC,CAAC,aAAa;gBACf,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;oBAC7B,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,QAAQ;wBAC/B,CAAC,CAAC,WAAW;wBACb,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,KAAK;4BAC5B,CAAC,CAAC,QAAQ;4BACV,CAAC,CAAC,SAAS,CAAC;YAEhB,EAAE,IAAI,OAAO,aAAa,OAAO,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,KAAK,MAAM,CAAC;YACxE,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,MAAM,CAAC;YACnC,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1D,EAAE,IAAI,uBAAuB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YACpE,CAAC;YACD,EAAE,IAAI,uBAAuB,OAAO,CAAC,cAAc,MAAM,CAAC;YAC1D,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;gBACtB,EAAE,IAAI,kBAAkB,OAAO,CAAC,SAAS,MAAM,CAAC;YAClD,CAAC;YACD,EAAE,IAAI,SAAS,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B,CAAC,UAA2B;IACpE,IAAI,EAAE,GAAG,UAAU,CAAC,OAAO,GAAG,MAAM,CAAC;IAErC,EAAE,IAAI,0BAA0B,CAAC;IAEjC,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,aAAa,GACjB,OAAO,CAAC,QAAQ,KAAK,UAAU;YAC7B,CAAC,CAAC,aAAa;YACf,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;gBAC7B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,QAAQ;oBAC/B,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,KAAK;wBAC5B,CAAC,CAAC,QAAQ;wBACV,CAAC,CAAC,SAAS,CAAC;QAEhB,EAAE,IAAI,OAAO,aAAa,OAAO,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,KAAK,MAAM,CAAC;QACxE,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,MAAM,CAAC;QACnC,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1D,EAAE,IAAI,uBAAuB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QACpE,CAAC;QACD,EAAE,IAAI,uBAAuB,OAAO,CAAC,cAAc,MAAM,CAAC;QAC1D,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,EAAE,IAAI,kBAAkB,OAAO,CAAC,SAAS,MAAM,CAAC;QAClD,CAAC;QACD,EAAE,IAAI,SAAS,CAAC;IAClB,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC"}
1
+ {"version":3,"file":"shared.js","sourceRoot":"","sources":["../../src/evaluators/shared.ts"],"names":[],"mappings":"AASA,gFAAgF;AAChF,2EAA2E;AAC3E,iFAAiF;AAEjF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,cAAc,CAAC,QAAmB;IAChD,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;YACnB,KAAK,UAAU;gBACb,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,QAAQ;gBACX,KAAK,IAAI,EAAE,CAAC;gBACZ,MAAM;YACR,KAAK,KAAK;gBACR,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;YACR,KAAK,MAAM;gBACT,KAAK,IAAI,CAAC,CAAC;gBACX,MAAM;QACV,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,QAAmB,EAAE,KAAa;IAC9D,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC;QAAE,OAAO,MAAM,CAAC;IACnE,IAAI,KAAK,GAAG,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,GAAG,EAAE;QAAE,OAAO,SAAS,CAAC;IACjI,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,gFAAgF;AAEhF,MAAM,UAAU,YAAY,CAC1B,KAAsB,EACtB,QAAmB,EACnB,KAAa,EACb,OAAgB;IAEhB,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC1E,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IAClE,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;IACtE,MAAM,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,MAAM,CAAC;IAEhE,IAAI,OAAO,GAAG,KAAK,KAAK,CAAC,IAAI,QAAQ,KAAK,CAAC,MAAM,IAAI,CAAC;IACtD,OAAO,IAAI,cAAc,OAAO,CAAC,WAAW,EAAE,iBAAiB,KAAK,UAAU,CAAC;IAC/E,OAAO,IAAI,aAAa,QAAQ,cAAc,IAAI,UAAU,MAAM,YAAY,GAAG,UAAU,CAAC;IAE5F,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,8LAA8L,CAAC;IAC5M,CAAC;SAAM,CAAC;QACN,OAAO,IAAI,eAAe,CAAC;QAC3B,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAC1C,EAAE,CAAC;YACF,OAAO,IAAI,MAAM,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,WAAW,IAAI,CAAC;QAC9E,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,WAA8B,EAC9B,OAAgB,EAChB,KAAa,EACb,aAAqB,EACrB,SAAiB;IAEjB,IAAI,OAAO,GAAG,8BAA8B,CAAC;IAC7C,OAAO,IAAI,sBAAsB,OAAO,CAAC,WAAW,EAAE,iBAAiB,KAAK,UAAU,CAAC;IACvF,OAAO,IAAI,4BAA4B,aAAa,2BAA2B,SAAS,MAAM,CAAC;IAC/F,OAAO,IAAI,iCAAiC,CAAC;IAE7C,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,MAAM,IAAI,GACR,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;QACpE,OAAO,IAAI,GAAG,IAAI,MAAM,CAAC,CAAC,SAAS,OAAO,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,QAAQ,CAAC,MAAM,eAAe,CAAC;IAC3H,CAAC;IAED,OAAO,IAAI,WAAW,CAAC;IAEvB,6BAA6B;IAC7B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5B,OAAO,IAAI,CAAC,CAAC,OAAO,GAAG,MAAM,CAAC;IAChC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,OAAwB;IAC9D,IAAI,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC;IAEzB,EAAE,IAAI,4BAA4B,CAAC;IAEnC,KAAK,MAAM,UAAU,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QAC7C,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC1C,MAAM,aAAa,GACjB,OAAO,CAAC,QAAQ,KAAK,UAAU;gBAC7B,CAAC,CAAC,aAAa;gBACf,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;oBAC7B,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,QAAQ;wBAC/B,CAAC,CAAC,WAAW;wBACb,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,KAAK;4BAC5B,CAAC,CAAC,QAAQ;4BACV,CAAC,CAAC,SAAS,CAAC;YAEhB,EAAE,IAAI,OAAO,aAAa,OAAO,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,KAAK,MAAM,CAAC;YACxE,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,MAAM,CAAC;YACnC,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1D,EAAE,IAAI,uBAAuB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YACpE,CAAC;YACD,EAAE,IAAI,uBAAuB,OAAO,CAAC,cAAc,MAAM,CAAC;YAC1D,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;gBACtB,EAAE,IAAI,kBAAkB,OAAO,CAAC,SAAS,MAAM,CAAC;YAClD,CAAC;YACD,EAAE,IAAI,SAAS,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B,CAAC,UAA2B;IACpE,IAAI,EAAE,GAAG,UAAU,CAAC,OAAO,GAAG,MAAM,CAAC;IAErC,EAAE,IAAI,0BAA0B,CAAC;IAEjC,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,aAAa,GACjB,OAAO,CAAC,QAAQ,KAAK,UAAU;YAC7B,CAAC,CAAC,aAAa;YACf,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,MAAM;gBAC7B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,QAAQ;oBAC/B,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,KAAK;wBAC5B,CAAC,CAAC,QAAQ;wBACV,CAAC,CAAC,SAAS,CAAC;QAEhB,EAAE,IAAI,OAAO,aAAa,OAAO,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,KAAK,MAAM,CAAC;QACxE,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,MAAM,CAAC;QACnC,IAAI,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1D,EAAE,IAAI,uBAAuB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QACpE,CAAC;QACD,EAAE,IAAI,uBAAuB,OAAO,CAAC,cAAc,MAAM,CAAC;QAC1D,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,EAAE,IAAI,kBAAkB,OAAO,CAAC,SAAS,MAAM,CAAC;QAClD,CAAC;QACD,EAAE,IAAI,SAAS,CAAC;IAClB,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC"}
package/dist/index.js CHANGED
@@ -20,7 +20,7 @@ import { evaluateWithJudge, evaluateWithTribunal, formatVerdictAsMarkdown, forma
20
20
  // ─── Create MCP Server ──────────────────────────────────────────────────────
21
21
  const server = new McpServer({
22
22
  name: "judges",
23
- version: "1.0.0",
23
+ version: "1.2.0",
24
24
  });
25
25
  // ─── Tool: get_judges ────────────────────────────────────────────────────────
26
26
  server.tool("get_judges", "List all available judges on the Agent Tribunal panel, including their areas of expertise and what they evaluate.", {}, async () => {
@@ -1 +1 @@
1
- {"version":3,"file":"accessibility.d.ts","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eA2BhC,CAAC"}
1
+ {"version":3,"file":"accessibility.d.ts","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAiChC,CAAC"}
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
23
23
  - Reference specific WCAG 2.2 success criteria (e.g., "1.1.1 Non-text Content", "2.1.1 Keyboard").
24
24
  - Indicate the WCAG conformance level impacted (A, AA, or AAA).
25
25
  - Recommend fixes with code examples using proper ARIA patterns.
26
- - Score from 0-100 where 100 means fully WCAG 2.2 AA compliant.`,
26
+ - Score from 0-100 where 100 means fully WCAG 2.2 AA compliant.
27
+
28
+ ADVERSARIAL MANDATE:
29
+ - Your role is adversarial: assume the code has accessibility defects and actively hunt for them. Do not give the benefit of the doubt.
30
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
31
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed barriers.
32
+ - Absence of findings does not mean the code is accessible. It means your analysis reached its limits. State this explicitly.`,
27
33
  };
28
34
  //# sourceMappingURL=accessibility.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"accessibility.js","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sBAAsB;IAC9B,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;gEAmBgD;CAC/D,CAAC"}
1
+ {"version":3,"file":"accessibility.js","sourceRoot":"","sources":["../../src/judges/accessibility.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sBAAsB;IAC9B,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;8HAyB8G;CAC7H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"api-design.d.ts","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,cAAc,EAAE,eA6B5B,CAAC"}
1
+ {"version":3,"file":"api-design.d.ts","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,cAAc,EAAE,eAmC5B,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Reference REST API design guides (Google, Microsoft, Zalando API guidelines).
26
26
  - Show corrected URL structures and response schemas in examples.
27
27
  - Consider both API producer and consumer perspectives.
28
- - Score from 0-100 where 100 means exemplary API design.`,
28
+ - Score from 0-100 where 100 means exemplary API design.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the API has design flaws and actively hunt for them. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed design flaws.
34
+ - Absence of findings does not mean the API is well-designed. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=api-design.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"api-design.js","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,cAAc,GAAoB;IAC7C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,wBAAwB;IAChC,WAAW,EACT,6KAA6K;IAC/K,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;yDAqByC;CACxD,CAAC"}
1
+ {"version":3,"file":"api-design.js","sourceRoot":"","sources":["../../src/judges/api-design.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,cAAc,GAAoB;IAC7C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,wBAAwB;IAChC,WAAW,EACT,6KAA6K;IAC/K,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;gIA2BgH;CAC/H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"cloud-readiness.d.ts","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,mBAAmB,EAAE,eA2BjC,CAAC"}
1
+ {"version":3,"file":"cloud-readiness.d.ts","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,mBAAmB,EAAE,eAiCjC,CAAC"}
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
23
23
  - Reference the 12-Factor App methodology, CNCF patterns, and Well-Architected Framework principles.
24
24
  - Distinguish between "can run in the cloud" and "cloud-native."
25
25
  - Recommend specific services or patterns (e.g., "Use Azure Key Vault instead of .env files in production").
26
- - Score from 0-100 where 100 means fully cloud-native.`,
26
+ - Score from 0-100 where 100 means fully cloud-native.
27
+
28
+ ADVERSARIAL MANDATE:
29
+ - Your role is adversarial: assume the code is not cloud-ready and actively hunt for problems. Do not give the benefit of the doubt.
30
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
31
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed cloud-readiness gaps.
32
+ - Absence of findings does not mean the code is cloud-native. It means your analysis reached its limits. State this explicitly.`,
27
33
  };
28
34
  //# sourceMappingURL=cloud-readiness.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"cloud-readiness.js","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,mBAAmB,GAAoB;IAClD,EAAE,EAAE,iBAAiB;IACrB,IAAI,EAAE,uBAAuB;IAC7B,MAAM,EAAE,oCAAoC;IAC5C,WAAW,EACT,4JAA4J;IAC9J,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;uDAmBuC;CACtD,CAAC"}
1
+ {"version":3,"file":"cloud-readiness.js","sourceRoot":"","sources":["../../src/judges/cloud-readiness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,mBAAmB,GAAoB;IAClD,EAAE,EAAE,iBAAiB;IACrB,IAAI,EAAE,uBAAuB;IAC7B,MAAM,EAAE,oCAAoC;IAC5C,WAAW,EACT,4JAA4J;IAC9J,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;gIAyBgH;CAC/H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"compliance.d.ts","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,eA2B7B,CAAC"}
1
+ {"version":3,"file":"compliance.d.ts","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,eAiC7B,CAAC"}
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
23
23
  - Reference specific regulations and standards (SOC 2 CC6.1, PCI-DSS Req 3.4, GDPR Art. 17).
24
24
  - Distinguish between "must comply" (legal obligation) and "should comply" (best practice).
25
25
  - Recommend both code changes and process changes where applicable.
26
- - Score from 0-100 where 100 means fully compliant.`,
26
+ - Score from 0-100 where 100 means fully compliant.
27
+
28
+ ADVERSARIAL MANDATE:
29
+ - Your role is adversarial: assume the code has compliance gaps and actively hunt for them. Do not give the benefit of the doubt.
30
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
31
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed compliance violations.
32
+ - Absence of findings does not mean the code is compliant. It means your analysis reached its limits. State this explicitly.`,
27
33
  };
28
34
  //# sourceMappingURL=compliance.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"compliance.js","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,iCAAiC;IACzC,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;oDAmBoC;CACnD,CAAC"}
1
+ {"version":3,"file":"compliance.js","sourceRoot":"","sources":["../../src/judges/compliance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,YAAY;IAChB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,iCAAiC;IACzC,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;6HAyB6G;CAC5H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eA6B9B,CAAC"}
1
+ {"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAmC9B,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Describe the exact sequence of events that could trigger a race condition or deadlock.
26
26
  - Recommend specific concurrency primitives or patterns for each issue.
27
27
  - Reference Java Concurrency in Practice, Go concurrency patterns, or Rust ownership model as applicable.
28
- - Score from 0-100 where 100 means thread-safe and correctly concurrent.`,
28
+ - Score from 0-100 where 100 means thread-safe and correctly concurrent.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the code has concurrency bugs and actively hunt for them. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed race conditions.
34
+ - Absence of findings does not mean the code is thread-safe. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=concurrency.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uIAAuI;IACzI,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;yEAqByD;CACxE,CAAC"}
1
+ {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/judges/concurrency.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uIAAuI;IACzI,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;+HA2B+G;CAC9H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"cost-effectiveness.d.ts","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,eA0BpC,CAAC"}
1
+ {"version":3,"file":"cost-effectiveness.d.ts","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,eAgCpC,CAAC"}
@@ -22,6 +22,12 @@ RULES FOR YOUR EVALUATION:
22
22
  - Quantify impact where possible (e.g. "This N+1 pattern will generate ~1000 extra queries per request at scale").
23
23
  - Recommend specific optimizations with estimated savings.
24
24
  - Consider both runtime cost and developer productivity cost.
25
- - Score from 0-100 where 100 means optimally cost-effective.`,
25
+ - Score from 0-100 where 100 means optimally cost-effective.
26
+
27
+ ADVERSARIAL MANDATE:
28
+ - Your role is adversarial: assume the code wastes resources and actively hunt for inefficiencies. Do not give the benefit of the doubt.
29
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
30
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed cost waste.
31
+ - Absence of findings does not mean the code is cost-effective. It means your analysis reached its limits. State this explicitly.`,
26
32
  };
27
33
  //# sourceMappingURL=cost-effectiveness.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"cost-effectiveness.js","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,yCAAyC;IACjD,WAAW,EACT,sJAAsJ;IACxJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;6DAkB6C;CAC5D,CAAC"}
1
+ {"version":3,"file":"cost-effectiveness.js","sourceRoot":"","sources":["../../src/judges/cost-effectiveness.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,yCAAyC;IACjD,WAAW,EACT,sJAAsJ;IACxJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;kIAwBkH;CACjI,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"cybersecurity.d.ts","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eA0BhC,CAAC"}
1
+ {"version":3,"file":"cybersecurity.d.ts","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAgChC,CAAC"}
@@ -22,6 +22,12 @@ RULES FOR YOUR EVALUATION:
22
22
  - Think like an attacker: describe how each vulnerability could be exploited.
23
23
  - Provide concrete remediation steps with code examples where possible.
24
24
  - Reference OWASP, CWE IDs, and CVE IDs where applicable.
25
- - Score from 0-100 where 100 means no exploitable vulnerabilities found.`,
25
+ - Score from 0-100 where 100 means no exploitable vulnerabilities found.
26
+
27
+ ADVERSARIAL MANDATE:
28
+ - Your role is adversarial: assume the code is vulnerable and actively hunt for exploits. Do not give the benefit of the doubt.
29
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
30
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed vulnerabilities.
31
+ - Absence of findings does not mean the code is secure. It means your analysis reached its limits. State this explicitly.`,
26
32
  };
27
33
  //# sourceMappingURL=cybersecurity.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"cybersecurity.js","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,gCAAgC;IACxC,WAAW,EACT,0KAA0K;IAC5K,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;yEAkByD;CACxE,CAAC"}
1
+ {"version":3,"file":"cybersecurity.js","sourceRoot":"","sources":["../../src/judges/cybersecurity.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,gCAAgC;IACxC,WAAW,EACT,0KAA0K;IAC5K,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;0HAwB0G;CACzH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data-security.d.ts","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,iBAAiB,EAAE,eAwB/B,CAAC"}
1
+ {"version":3,"file":"data-security.d.ts","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,iBAAiB,EAAE,eA8B/B,CAAC"}
@@ -20,6 +20,12 @@ RULES FOR YOUR EVALUATION:
20
20
  - Be specific: cite exact lines, variable names, or patterns.
21
21
  - Always recommend a concrete fix, not just "fix this."
22
22
  - Reference standards where applicable (OWASP, NIST 800-53, GDPR Article numbers).
23
- - Score from 0-100 where 100 means fully compliant with no findings.`,
23
+ - Score from 0-100 where 100 means fully compliant with no findings.
24
+
25
+ ADVERSARIAL MANDATE:
26
+ - Your role is adversarial: assume the code leaks or mishandles data and actively hunt for exposures. Do not give the benefit of the doubt.
27
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
28
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed data breaches.
29
+ - Absence of findings does not mean data is secure. It means your analysis reached its limits. State this explicitly.`,
24
30
  };
25
31
  //# sourceMappingURL=data-security.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"data-security.js","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,iBAAiB,GAAoB;IAChD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,8LAA8L;IAChM,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;qEAgBqD;CACpE,CAAC"}
1
+ {"version":3,"file":"data-security.js","sourceRoot":"","sources":["../../src/judges/data-security.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,iBAAiB,GAAoB;IAChD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,8LAA8L;IAChM,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;sHAsBsG;CACrH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"dependency-health.d.ts","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,qBAAqB,EAAE,eA6BnC,CAAC"}
1
+ {"version":3,"file":"dependency-health.d.ts","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,qBAAqB,EAAE,eAmCnC,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Reference OWASP Dependency-Check, OpenSSF Scorecard, and supply chain security best practices.
26
26
  - Recommend specific alternatives for problematic dependencies.
27
27
  - Distinguish between direct dependency risk and transitive dependency risk.
28
- - Score from 0-100 where 100 means healthy, secure dependency tree.`,
28
+ - Score from 0-100 where 100 means healthy, secure dependency tree.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the dependency tree has risks and actively hunt for them. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed supply chain risks.
34
+ - Absence of findings does not mean dependencies are healthy. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=dependency-health.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"dependency-health.js","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,qBAAqB,GAAoB;IACpD,EAAE,EAAE,mBAAmB;IACvB,IAAI,EAAE,yBAAyB;IAC/B,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uJAAuJ;IACzJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;oEAqBoD;CACnE,CAAC"}
1
+ {"version":3,"file":"dependency-health.js","sourceRoot":"","sources":["../../src/judges/dependency-health.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,qBAAqB,GAAoB;IACpD,EAAE,EAAE,mBAAmB;IACvB,IAAI,EAAE,yBAAyB;IAC/B,MAAM,EAAE,6BAA6B;IACrC,WAAW,EACT,uJAAuJ;IACzJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;gIA2BgH;CAC/H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"documentation.d.ts","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eA6BhC,CAAC"}
1
+ {"version":3,"file":"documentation.d.ts","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAmChC,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Reference documentation best practices (Diátaxis framework, Google developer documentation style guide).
26
26
  - Provide example documentation snippets in recommendations.
27
27
  - Evaluate from the perspective of a new developer encountering the code for the first time.
28
- - Score from 0-100 where 100 means exemplary documentation.`,
28
+ - Score from 0-100 where 100 means exemplary documentation.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the documentation is inadequate and actively hunt for gaps. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed documentation gaps.
34
+ - Absence of findings does not mean the documentation is good. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=documentation.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"documentation.js","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sCAAsC;IAC9C,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;4DAqB4C;CAC3D,CAAC"}
1
+ {"version":3,"file":"documentation.js","sourceRoot":"","sources":["../../src/judges/documentation.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,sCAAsC;IAC9C,WAAW,EACT,6JAA6J;IAC/J,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;iIA2BiH;CAChI,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"ethics-bias.d.ts","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,eA6B7B,CAAC"}
1
+ {"version":3,"file":"ethics-bias.d.ts","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,eAAe,EAAE,eAmC7B,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Reference the EU AI Act, NIST AI RMF (AI 100-1), IEEE Ethically Aligned Design.
26
26
  - Recommend specific fairness tools (Fairlearn, AI Fairness 360, What-If Tool).
27
27
  - Evaluate proportionally: not all code involves AI/ML — score based on relevance.
28
- - Score from 0-100 where 100 means fully ethical and bias-aware.`,
28
+ - Score from 0-100 where 100 means fully ethical and bias-aware.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the code has ethical risks or bias and actively hunt for them. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed ethical violations.
34
+ - Absence of findings does not mean the code is ethical. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=ethics-bias.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"ethics-bias.js","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,0JAA0J;IAC5J,UAAU,EAAE,QAAQ;IACpB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;iEAqBiD;CAChE,CAAC"}
1
+ {"version":3,"file":"ethics-bias.js","sourceRoot":"","sources":["../../src/judges/ethics-bias.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,0JAA0J;IAC5J,UAAU,EAAE,QAAQ;IACpB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;2HA2B2G;CAC1H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"internationalization.d.ts","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,yBAAyB,EAAE,eA2BvC,CAAC"}
1
+ {"version":3,"file":"internationalization.d.ts","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,yBAAyB,EAAE,eAiCvC,CAAC"}
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
23
23
  - Reference Unicode standards, CLDR, W3C i18n best practices.
24
24
  - Show corrected code using Intl APIs, ICU message format, or i18n library patterns.
25
25
  - Consider the impact on languages with different scripts (CJK, Arabic, Thai, Devanagari).
26
- - Score from 0-100 where 100 means fully internationalization-ready.`,
26
+ - Score from 0-100 where 100 means fully internationalization-ready.
27
+
28
+ ADVERSARIAL MANDATE:
29
+ - Your role is adversarial: assume the code will break in non-English locales and actively hunt for i18n defects. Do not give the benefit of the doubt.
30
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
31
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed i18n problems.
32
+ - Absence of findings does not mean the code is internationalization-ready. It means your analysis reached its limits. State this explicitly.`,
27
33
  };
28
34
  //# sourceMappingURL=internationalization.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"internationalization.js","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,yBAAyB,GAAoB;IACxD,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,4BAA4B;IAClC,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,oJAAoJ;IACtJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;qEAmBqD;CACpE,CAAC"}
1
+ {"version":3,"file":"internationalization.js","sourceRoot":"","sources":["../../src/judges/internationalization.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,yBAAyB,GAAoB;IACxD,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,4BAA4B;IAClC,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,oJAAoJ;IACtJ,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;8IAyB8H;CAC7I,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"observability.d.ts","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eA2BhC,CAAC"}
1
+ {"version":3,"file":"observability.d.ts","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,kBAAkB,EAAE,eAiChC,CAAC"}
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
23
23
  - Reference OpenTelemetry semantic conventions and Prometheus best practices.
24
24
  - Recommend specific instrumentation code snippets.
25
25
  - Evaluate whether the observability data would be useful during a production incident.
26
- - Score from 0-100 where 100 means fully observable and debuggable in production.`,
26
+ - Score from 0-100 where 100 means fully observable and debuggable in production.
27
+
28
+ ADVERSARIAL MANDATE:
29
+ - Your role is adversarial: assume the code is unobservable and will be impossible to debug in production. Actively hunt for monitoring gaps. Do not give the benefit of the doubt.
30
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
31
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed observability gaps.
32
+ - Absence of findings does not mean the code is observable. It means your analysis reached its limits. State this explicitly.`,
27
33
  };
28
34
  //# sourceMappingURL=observability.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"observability.js","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,8JAA8J;IAChK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;kFAmBkE;CACjF,CAAC"}
1
+ {"version":3,"file":"observability.js","sourceRoot":"","sources":["../../src/judges/observability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,EAAE,EAAE,eAAe;IACnB,IAAI,EAAE,qBAAqB;IAC3B,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,8JAA8J;IAChK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;8HAyB8G;CAC7H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"performance.d.ts","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eA6B9B,CAAC"}
1
+ {"version":3,"file":"performance.d.ts","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAmC9B,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Quantify impact where possible (e.g., "This creates ~10,000 objects per request that will pressure GC").
26
26
  - Recommend specific optimizations with before/after code examples.
27
27
  - Distinguish between premature optimization and genuine hot-path issues.
28
- - Score from 0-100 where 100 means optimally performant.`,
28
+ - Score from 0-100 where 100 means optimally performant.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the code has performance problems and actively hunt for bottlenecks. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed performance regressions.
34
+ - Absence of findings does not mean the code is performant. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=performance.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"performance.js","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,kKAAkK;IACpK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;yDAqByC;CACxD,CAAC"}
1
+ {"version":3,"file":"performance.js","sourceRoot":"","sources":["../../src/judges/performance.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,qBAAqB;IAC7B,WAAW,EACT,kKAAkK;IACpK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;8HA2B8G;CAC7H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"reliability.d.ts","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eA6B9B,CAAC"}
1
+ {"version":3,"file":"reliability.d.ts","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAmC9B,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Reference patterns from "Release It!" (Michael Nygard) and the SRE book (Google).
26
26
  - Describe failure scenarios: "If X fails, then Y happens, causing Z impact."
27
27
  - Recommend specific resilience libraries or patterns with configuration examples.
28
- - Score from 0-100 where 100 means highly resilient and fault-tolerant.`,
28
+ - Score from 0-100 where 100 means highly resilient and fault-tolerant.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the code will fail in production and actively hunt for reliability gaps. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed reliability risks.
34
+ - Absence of findings does not mean the code is reliable. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=reliability.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"reliability.js","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;wEAqBwD;CACvE,CAAC"}
1
+ {"version":3,"file":"reliability.js","sourceRoot":"","sources":["../../src/judges/reliability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,0BAA0B;IAClC,WAAW,EACT,gKAAgK;IAClK,UAAU,EAAE,KAAK;IACjB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;4HA2B4G;CAC3H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"scalability.d.ts","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eA2B9B,CAAC"}
1
+ {"version":3,"file":"scalability.d.ts","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,gBAAgB,EAAE,eAiC9B,CAAC"}
@@ -23,6 +23,12 @@ RULES FOR YOUR EVALUATION:
23
23
  - Think about what breaks first when traffic increases 10x or 100x.
24
24
  - Distinguish between "works now" and "will work at scale."
25
25
  - Recommend specific architectural patterns (CQRS, event sourcing, circuit breakers, etc.).
26
- - Score from 0-100 where 100 means fully scalable with no bottlenecks.`,
26
+ - Score from 0-100 where 100 means fully scalable with no bottlenecks.
27
+
28
+ ADVERSARIAL MANDATE:
29
+ - Your role is adversarial: assume the code will not scale and actively hunt for bottlenecks. Do not give the benefit of the doubt.
30
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
31
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed scalability limits.
32
+ - Absence of findings does not mean the code will scale. It means your analysis reached its limits. State this explicitly.`,
27
33
  };
28
34
  //# sourceMappingURL=scalability.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"scalability.js","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,2BAA2B;IACnC,WAAW,EACT,+JAA+J;IACjK,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;uEAmBuD;CACtE,CAAC"}
1
+ {"version":3,"file":"scalability.js","sourceRoot":"","sources":["../../src/judges/scalability.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAoB;IAC/C,EAAE,EAAE,aAAa;IACjB,IAAI,EAAE,mBAAmB;IACzB,MAAM,EAAE,2BAA2B;IACnC,WAAW,EACT,+JAA+J;IACjK,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;2HAyB2G;CAC1H,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"software-practices.d.ts","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,eA6BpC,CAAC"}
1
+ {"version":3,"file":"software-practices.d.ts","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,sBAAsB,EAAE,eAmCpC,CAAC"}
@@ -22,9 +22,15 @@ YOUR EVALUATION CRITERIA:
22
22
 
23
23
  RULES FOR YOUR EVALUATION:
24
24
  - Assign rule IDs with prefix "SWDEV-" (e.g. SWDEV-001).
25
- - Be constructive: explain why a practice matters, not just that it's wrong.
25
+ - Be direct: explain why the practice is a problem and what risk it introduces.
26
26
  - Provide refactored code examples when recommending improvements.
27
27
  - Reference Clean Code (Robert Martin), SOLID, DRY, KISS, YAGNI where applicable.
28
- - Score from 0-100 where 100 means exemplary software engineering.`,
28
+ - Score from 0-100 where 100 means exemplary software engineering.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the code has engineering quality problems and actively hunt for them. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed code quality issues.
34
+ - Absence of findings does not mean the code follows best practices. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=software-practices.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"software-practices.js","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,mDAAmD;IAC3D,WAAW,EACT,6LAA6L;IAC/L,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;mEAqBmD;CAClE,CAAC"}
1
+ {"version":3,"file":"software-practices.js","sourceRoot":"","sources":["../../src/judges/software-practices.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,sBAAsB,GAAoB;IACrD,EAAE,EAAE,oBAAoB;IACxB,IAAI,EAAE,0BAA0B;IAChC,MAAM,EAAE,mDAAmD;IAC3D,WAAW,EACT,6LAA6L;IAC/L,UAAU,EAAE,OAAO;IACnB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;uIA2BuH;CACtI,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"testing.d.ts","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,YAAY,EAAE,eA6B1B,CAAC"}
1
+ {"version":3,"file":"testing.d.ts","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,eAAO,MAAM,YAAY,EAAE,eAmC1B,CAAC"}
@@ -25,6 +25,12 @@ RULES FOR YOUR EVALUATION:
25
25
  - Reference testing best practices (Kent Beck, Martin Fowler's Test Pyramid, FIRST principles).
26
26
  - Recommend specific test cases that should be written, with example test code.
27
27
  - Evaluate both the tests AND the testability of the code under test.
28
- - Score from 0-100 where 100 means comprehensive, well-structured test suite.`,
28
+ - Score from 0-100 where 100 means comprehensive, well-structured test suite.
29
+
30
+ ADVERSARIAL MANDATE:
31
+ - Your role is adversarial: assume the test coverage is insufficient and actively hunt for gaps. Do not give the benefit of the doubt.
32
+ - Never praise or compliment the code. Report only problems, risks, and deficiencies.
33
+ - If you are uncertain whether something is an issue, flag it — false positives are preferred over missed testing gaps.
34
+ - Absence of findings does not mean the code is well-tested. It means your analysis reached its limits. State this explicitly.`,
29
35
  };
30
36
  //# sourceMappingURL=testing.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"testing.js","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,YAAY,GAAoB;IAC3C,EAAE,EAAE,SAAS;IACb,IAAI,EAAE,eAAe;IACrB,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,oKAAoK;IACtK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;8EAqB8D;CAC7E,CAAC"}
1
+ {"version":3,"file":"testing.js","sourceRoot":"","sources":["../../src/judges/testing.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,YAAY,GAAoB;IAC3C,EAAE,EAAE,SAAS;IACb,IAAI,EAAE,eAAe;IACrB,MAAM,EAAE,yBAAyB;IACjC,WAAW,EACT,oKAAoK;IACtK,UAAU,EAAE,MAAM;IAClB,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;+HA2B+G;CAC9H,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kevinrabun/judges",
3
- "version": "1.0.2",
3
+ "version": "1.2.0",
4
4
  "description": "18 specialized judges that evaluate AI-generated code for security, cost, and quality.",
5
5
  "mcpName": "io.github.KevinRabun/judges",
6
6
  "type": "module",
@@ -19,6 +19,8 @@
19
19
  "start": "node dist/index.js",
20
20
  "dev": "tsc --watch",
21
21
  "clean": "rimraf dist",
22
+ "test": "npx tsx --test tests/judges.test.ts",
23
+ "demo": "npx tsx examples/demo.ts",
22
24
  "prepublishOnly": "npm run build"
23
25
  },
24
26
  "keywords": [
@@ -48,6 +50,7 @@
48
50
  },
49
51
  "devDependencies": {
50
52
  "@types/node": "^25.3.0",
53
+ "tsx": "^4.19.4",
51
54
  "typescript": "^5.9.3"
52
55
  }
53
56
  }
package/server.json CHANGED
@@ -7,12 +7,12 @@
7
7
  "url": "https://github.com/kevinrabun/judges",
8
8
  "source": "github"
9
9
  },
10
- "version": "1.0.2",
10
+ "version": "1.2.0",
11
11
  "packages": [
12
12
  {
13
13
  "registryType": "npm",
14
14
  "identifier": "@kevinrabun/judges",
15
- "version": "1.0.2",
15
+ "version": "1.2.0",
16
16
  "transport": {
17
17
  "type": "stdio"
18
18
  }