@kevinrabun/judges 3.115.4 → 3.117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/agents/accessibility.judge.md +7 -0
  2. package/agents/agent-instructions.judge.md +7 -0
  3. package/agents/ai-code-safety.judge.md +7 -0
  4. package/agents/api-contract.judge.md +7 -0
  5. package/agents/api-design.judge.md +7 -0
  6. package/agents/authentication.judge.md +7 -0
  7. package/agents/backwards-compatibility.judge.md +7 -0
  8. package/agents/caching.judge.md +7 -0
  9. package/agents/ci-cd.judge.md +7 -0
  10. package/agents/cloud-readiness.judge.md +7 -0
  11. package/agents/concurrency.judge.md +7 -0
  12. package/agents/configuration-management.judge.md +7 -0
  13. package/agents/cybersecurity.judge.md +7 -0
  14. package/agents/data-security.judge.md +7 -0
  15. package/agents/dependency-health.judge.md +7 -0
  16. package/agents/documentation.judge.md +7 -0
  17. package/agents/error-handling.judge.md +7 -0
  18. package/agents/ethics-bias.judge.md +7 -0
  19. package/agents/false-positive-review.judge.md +12 -0
  20. package/agents/framework-safety.judge.md +7 -0
  21. package/agents/hallucination-detection.judge.md +13 -0
  22. package/agents/iac-security.judge.md +7 -0
  23. package/agents/intent-alignment.judge.md +13 -0
  24. package/agents/logging-privacy.judge.md +7 -0
  25. package/agents/maintainability.judge.md +7 -0
  26. package/agents/multi-turn-coherence.judge.md +7 -0
  27. package/agents/observability.judge.md +7 -0
  28. package/agents/portability.judge.md +7 -0
  29. package/agents/rate-limiting.judge.md +7 -0
  30. package/agents/reliability.judge.md +7 -0
  31. package/agents/security.judge.md +13 -0
  32. package/agents/testing.judge.md +7 -0
  33. package/agents/ux.judge.md +7 -0
  34. package/dist/a2a-protocol.d.ts +136 -0
  35. package/dist/a2a-protocol.js +218 -0
  36. package/dist/api.d.ts +21 -3
  37. package/dist/api.js +21 -1
  38. package/dist/audit-trail.d.ts +245 -0
  39. package/dist/audit-trail.js +257 -0
  40. package/dist/commands/benchmark-advanced.js +51 -51
  41. package/dist/commands/benchmark-ai-agents.js +16 -16
  42. package/dist/commands/benchmark-compliance-ethics.js +12 -12
  43. package/dist/commands/benchmark-expanded-2.js +2 -2
  44. package/dist/commands/benchmark-expanded.js +2 -2
  45. package/dist/commands/benchmark-infrastructure.js +12 -12
  46. package/dist/commands/benchmark-languages.js +11 -11
  47. package/dist/commands/benchmark-quality-ops.js +7 -7
  48. package/dist/commands/benchmark-security-deep.js +9 -9
  49. package/dist/commands/benchmark.js +1 -1
  50. package/dist/commands/llm-benchmark-optimizer.d.ts +78 -0
  51. package/dist/commands/llm-benchmark-optimizer.js +241 -0
  52. package/dist/commands/llm-benchmark.d.ts +4 -2
  53. package/dist/commands/llm-benchmark.js +40 -12
  54. package/dist/escalation.d.ts +100 -0
  55. package/dist/escalation.js +292 -0
  56. package/dist/evaluation-session.d.ts +74 -0
  57. package/dist/evaluation-session.js +152 -0
  58. package/dist/evaluators/index.d.ts +23 -1
  59. package/dist/evaluators/index.js +192 -3
  60. package/dist/evaluators/judge-selector.d.ts +19 -0
  61. package/dist/evaluators/judge-selector.js +141 -0
  62. package/dist/evaluators/recall-boost.d.ts +27 -0
  63. package/dist/evaluators/recall-boost.js +409 -0
  64. package/dist/feedback-loop.d.ts +62 -0
  65. package/dist/feedback-loop.js +179 -0
  66. package/dist/index.js +2 -0
  67. package/dist/judges/accessibility.js +7 -0
  68. package/dist/judges/agent-instructions.js +7 -0
  69. package/dist/judges/ai-code-safety.js +7 -0
  70. package/dist/judges/api-contract.js +7 -0
  71. package/dist/judges/api-design.js +7 -0
  72. package/dist/judges/authentication.js +7 -0
  73. package/dist/judges/backwards-compatibility.js +7 -0
  74. package/dist/judges/caching.js +7 -0
  75. package/dist/judges/ci-cd.js +7 -0
  76. package/dist/judges/cloud-readiness.js +7 -0
  77. package/dist/judges/concurrency.js +7 -0
  78. package/dist/judges/configuration-management.js +7 -0
  79. package/dist/judges/cybersecurity.js +7 -0
  80. package/dist/judges/data-security.js +7 -0
  81. package/dist/judges/dependency-health.js +7 -0
  82. package/dist/judges/documentation.js +7 -0
  83. package/dist/judges/error-handling.js +7 -0
  84. package/dist/judges/ethics-bias.js +7 -0
  85. package/dist/judges/false-positive-review.js +13 -1
  86. package/dist/judges/framework-safety.js +7 -0
  87. package/dist/judges/hallucination-detection.js +14 -1
  88. package/dist/judges/iac-security.js +7 -0
  89. package/dist/judges/intent-alignment.js +14 -1
  90. package/dist/judges/logging-privacy.js +7 -0
  91. package/dist/judges/maintainability.js +7 -0
  92. package/dist/judges/multi-turn-coherence.js +7 -0
  93. package/dist/judges/observability.js +7 -0
  94. package/dist/judges/portability.js +7 -0
  95. package/dist/judges/rate-limiting.js +7 -0
  96. package/dist/judges/reliability.js +7 -0
  97. package/dist/judges/security.js +14 -1
  98. package/dist/judges/testing.js +7 -0
  99. package/dist/judges/ux.js +7 -0
  100. package/dist/review-conversation.d.ts +87 -0
  101. package/dist/review-conversation.js +307 -0
  102. package/dist/sast-integration.d.ts +112 -0
  103. package/dist/sast-integration.js +215 -0
  104. package/dist/tools/register-evaluation.js +208 -8
  105. package/dist/tools/register-fix.js +24 -1
  106. package/dist/tools/register-resources.d.ts +6 -0
  107. package/dist/tools/register-resources.js +177 -0
  108. package/dist/tools/register-review.js +26 -1
  109. package/dist/tools/register-workflow.js +384 -11
  110. package/dist/tools/validation.d.ts +13 -0
  111. package/dist/tools/validation.js +77 -0
  112. package/dist/types.d.ts +122 -0
  113. package/package.json +25 -12
  114. package/server.json +2 -2
@@ -0,0 +1,307 @@
1
+ /**
2
+ * Multi-Turn Review Conversation
3
+ *
4
+ * Enables interactive, stateful review sessions where developers and
5
+ * the tribunal engage in a conversation about findings. This transforms
6
+ * the evaluation from a one-shot report into a collaborative review:
7
+ *
8
+ * - Developer asks "why?" about a finding → tribunal explains reasoning
9
+ * - Developer provides context → tribunal adjusts confidence
10
+ * - Developer requests re-evaluation → tribunal focuses on specific areas
11
+ * - Developer disputes a finding → tribunal logs disagreement and adjusts
12
+ *
13
+ * Designed for MCP tool sessions and VS Code extension interactions.
14
+ */
15
+ // ─── Conversation Management ─────────────────────────────────────────────────
16
+ let messageCounter = 0;
17
+ function generateMessageId() {
18
+ messageCounter++;
19
+ return `msg_${Date.now().toString(36)}_${messageCounter.toString(36)}`;
20
+ }
21
+ function generateConversationId() {
22
+ return `conv_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
23
+ }
24
+ /**
25
+ * Start a new review conversation for a file evaluation.
26
+ */
27
+ export function startReviewConversation(filePath, language, verdict) {
28
+ const now = new Date().toISOString();
29
+ const conversation = {
30
+ conversationId: generateConversationId(),
31
+ filePath,
32
+ language,
33
+ state: "active",
34
+ messages: [],
35
+ findings: [...verdict.findings],
36
+ originalVerdict: verdict,
37
+ developerContext: {
38
+ explanations: new Map(),
39
+ disputed: new Set(),
40
+ accepted: new Set(),
41
+ additionalContext: [],
42
+ focusAreas: [],
43
+ },
44
+ startedAt: now,
45
+ lastActivityAt: now,
46
+ };
47
+ // Add system opening message
48
+ addMessage(conversation, "system", "general", buildOpeningMessage(verdict));
49
+ return conversation;
50
+ }
51
+ /**
52
+ * Process a developer message and generate a tribunal response.
53
+ */
54
+ export function processMessage(conversation, content, intent, findingRef) {
55
+ // Record the developer's message
56
+ addMessage(conversation, "developer", intent, content, findingRef);
57
+ // Generate tribunal response based on intent
58
+ let response;
59
+ switch (intent) {
60
+ case "explain":
61
+ response = handleExplainRequest(conversation, findingRef);
62
+ break;
63
+ case "context":
64
+ response = handleContextProvided(conversation, content, findingRef);
65
+ break;
66
+ case "dispute":
67
+ response = handleDispute(conversation, content, findingRef);
68
+ break;
69
+ case "accept":
70
+ response = handleAcceptance(conversation, findingRef);
71
+ break;
72
+ case "re-evaluate":
73
+ response = handleReEvaluateRequest(conversation, content);
74
+ break;
75
+ case "focus":
76
+ response = handleFocusRequest(conversation, content);
77
+ break;
78
+ case "summary":
79
+ response = buildConversationSummary(conversation);
80
+ break;
81
+ default:
82
+ response = handleGeneralMessage(conversation, content);
83
+ break;
84
+ }
85
+ // Record tribunal response
86
+ return addMessage(conversation, "tribunal", intent, response, findingRef);
87
+ }
88
+ /**
89
+ * Get the current state of outstanding findings in the conversation.
90
+ */
91
+ export function getOutstandingFindings(conversation) {
92
+ const accepted = [];
93
+ const disputed = [];
94
+ const unaddressed = [];
95
+ for (const f of conversation.findings) {
96
+ const key = f.ruleId;
97
+ if (conversation.developerContext.accepted.has(key)) {
98
+ accepted.push(f);
99
+ }
100
+ else if (conversation.developerContext.disputed.has(key)) {
101
+ disputed.push(f);
102
+ }
103
+ else {
104
+ unaddressed.push(f);
105
+ }
106
+ }
107
+ return { unaddressed, accepted, disputed };
108
+ }
109
+ /**
110
+ * Check if all findings have been addressed (accepted or disputed).
111
+ */
112
+ export function isConversationResolved(conversation) {
113
+ const { unaddressed } = getOutstandingFindings(conversation);
114
+ return unaddressed.length === 0;
115
+ }
116
+ /**
117
+ * Export the conversation as a reviewable markdown report.
118
+ */
119
+ export function exportConversationAsMarkdown(conversation) {
120
+ const lines = [
121
+ `# Review Conversation: ${conversation.filePath}`,
122
+ "",
123
+ `**Started**: ${conversation.startedAt}`,
124
+ `**State**: ${conversation.state}`,
125
+ `**Findings**: ${conversation.findings.length}`,
126
+ "",
127
+ "---",
128
+ "",
129
+ ];
130
+ for (const msg of conversation.messages) {
131
+ const role = msg.role === "developer" ? "**Developer**" : msg.role === "tribunal" ? "**Tribunal**" : "*System*";
132
+ const ref = msg.findingRef ? ` (re: ${msg.findingRef})` : "";
133
+ lines.push(`### ${role}${ref}`);
134
+ lines.push(`*${msg.timestamp}* — \`${msg.intent}\``);
135
+ lines.push("");
136
+ lines.push(msg.content);
137
+ lines.push("");
138
+ lines.push("---");
139
+ lines.push("");
140
+ }
141
+ const { unaddressed, accepted, disputed } = getOutstandingFindings(conversation);
142
+ lines.push("## Summary");
143
+ lines.push("");
144
+ lines.push(`- **Accepted**: ${accepted.length} finding(s)`);
145
+ lines.push(`- **Disputed**: ${disputed.length} finding(s)`);
146
+ lines.push(`- **Unaddressed**: ${unaddressed.length} finding(s)`);
147
+ return lines.join("\n");
148
+ }
149
+ // ─── Internal Helpers ────────────────────────────────────────────────────────
150
+ function addMessage(conversation, role, intent, content, findingRef) {
151
+ const msg = {
152
+ id: generateMessageId(),
153
+ role,
154
+ intent,
155
+ content,
156
+ timestamp: new Date().toISOString(),
157
+ findingRef,
158
+ };
159
+ conversation.messages.push(msg);
160
+ conversation.lastActivityAt = msg.timestamp;
161
+ // Check if conversation is resolved after each message
162
+ if (isConversationResolved(conversation)) {
163
+ conversation.state = "resolved";
164
+ }
165
+ return msg;
166
+ }
167
+ function findFinding(conversation, ref) {
168
+ if (!ref)
169
+ return undefined;
170
+ return conversation.findings.find((f) => f.ruleId === ref || f.title === ref);
171
+ }
172
+ function buildOpeningMessage(verdict) {
173
+ const critical = verdict.findings.filter((f) => f.severity === "critical").length;
174
+ const high = verdict.findings.filter((f) => f.severity === "high").length;
175
+ const medium = verdict.findings.filter((f) => f.severity === "medium").length;
176
+ const low = verdict.findings.filter((f) => f.severity === "low").length;
177
+ const parts = [`Code review complete. Found **${verdict.findings.length}** finding(s):`];
178
+ if (critical > 0)
179
+ parts.push(`- ${critical} critical`);
180
+ if (high > 0)
181
+ parts.push(`- ${high} high`);
182
+ if (medium > 0)
183
+ parts.push(`- ${medium} medium`);
184
+ if (low > 0)
185
+ parts.push(`- ${low} low`);
186
+ parts.push("");
187
+ parts.push("You can:");
188
+ parts.push("- Ask **why** a finding was flagged (`explain`)");
189
+ parts.push("- Provide **context** about your code (`context`)");
190
+ parts.push("- **Dispute** a false positive (`dispute`)");
191
+ parts.push("- **Accept** a finding to fix (`accept`)");
192
+ parts.push("- Request **re-evaluation** with new context (`re-evaluate`)");
193
+ return parts.join("\n");
194
+ }
195
+ function handleExplainRequest(conversation, ref) {
196
+ const finding = findFinding(conversation, ref);
197
+ if (!finding) {
198
+ // List all findings for the developer to pick
199
+ const list = conversation.findings
200
+ .map((f, i) => `${i + 1}. **${f.ruleId}** (${f.severity}): ${f.title}`)
201
+ .join("\n");
202
+ return `Which finding would you like explained?\n\n${list}`;
203
+ }
204
+ const parts = [
205
+ `## ${finding.ruleId}: ${finding.title}`,
206
+ "",
207
+ `**Severity**: ${finding.severity}`,
208
+ `**Confidence**: ${((finding.confidence ?? 0.5) * 100).toFixed(0)}%`,
209
+ `**Lines**: ${finding.lineNumbers?.join(", ") || "N/A"}`,
210
+ "",
211
+ finding.description,
212
+ ];
213
+ if (finding.recommendation) {
214
+ parts.push("", `**Recommendation**: ${finding.recommendation}`);
215
+ }
216
+ if (finding.reference) {
217
+ parts.push("", `**Reference**: ${finding.reference}`);
218
+ }
219
+ if (finding.provenance) {
220
+ parts.push("", `**Detection method**: ${finding.provenance}`);
221
+ }
222
+ return parts.join("\n");
223
+ }
224
+ function handleContextProvided(conversation, content, ref) {
225
+ conversation.developerContext.additionalContext.push(content);
226
+ if (ref) {
227
+ conversation.developerContext.explanations.set(ref, content);
228
+ const finding = findFinding(conversation, ref);
229
+ if (finding) {
230
+ // Reduce confidence since developer provided context
231
+ const oldConf = finding.confidence ?? 0.5;
232
+ finding.confidence = Math.max(0.1, oldConf * 0.7);
233
+ return `Context noted for **${ref}**. Confidence adjusted from ${(oldConf * 100).toFixed(0)}% to ${(finding.confidence * 100).toFixed(0)}%. This finding will be weighted less heavily.`;
234
+ }
235
+ }
236
+ return `Context recorded. This will be factored into any re-evaluation. (${conversation.developerContext.additionalContext.length} context note(s) total)`;
237
+ }
238
+ function handleDispute(conversation, content, ref) {
239
+ if (!ref) {
240
+ return "Please specify which finding you're disputing (e.g., by rule ID like SEC-001).";
241
+ }
242
+ conversation.developerContext.disputed.add(ref);
243
+ conversation.developerContext.explanations.set(ref, content);
244
+ const finding = findFinding(conversation, ref);
245
+ if (finding) {
246
+ finding.confidence = Math.max(0.05, (finding.confidence ?? 0.5) * 0.3);
247
+ return `Finding **${ref}** marked as disputed. Your reasoning has been recorded. Confidence reduced to ${(finding.confidence * 100).toFixed(0)}%. This will feed back into calibration to reduce false positives for similar patterns.`;
248
+ }
249
+ return `Dispute recorded for **${ref}**. Finding was not found in current results — it may have already been resolved.`;
250
+ }
251
+ function handleAcceptance(conversation, ref) {
252
+ if (!ref) {
253
+ return "Please specify which finding you accept (e.g., by rule ID like SEC-001), or say 'all' to accept all.";
254
+ }
255
+ if (ref === "all") {
256
+ for (const f of conversation.findings) {
257
+ conversation.developerContext.accepted.add(f.ruleId);
258
+ }
259
+ return `All ${conversation.findings.length} findings accepted. Good luck with the fixes!`;
260
+ }
261
+ conversation.developerContext.accepted.add(ref);
262
+ const { unaddressed } = getOutstandingFindings(conversation);
263
+ return `Finding **${ref}** accepted. ${unaddressed.length} finding(s) remaining.`;
264
+ }
265
+ function handleReEvaluateRequest(conversation, content) {
266
+ return `Re-evaluation requested. The following developer context will be applied:\n\n${conversation.developerContext.additionalContext.map((c, i) => `${i + 1}. ${c}`).join("\n")}\n\nRe-run the evaluation with this conversation's context for updated results.`;
267
+ }
268
+ function handleFocusRequest(conversation, content) {
269
+ conversation.developerContext.focusAreas.push(content);
270
+ return `Focus area recorded: "${content}". Re-evaluation will prioritize ${conversation.developerContext.focusAreas.join(", ")}.`;
271
+ }
272
+ function buildConversationSummary(conversation) {
273
+ const { unaddressed, accepted, disputed } = getOutstandingFindings(conversation);
274
+ const totalMessages = conversation.messages.length;
275
+ const parts = [
276
+ `## Conversation Summary`,
277
+ "",
278
+ `- **Total messages**: ${totalMessages}`,
279
+ `- **Findings**: ${conversation.findings.length}`,
280
+ `- **Accepted**: ${accepted.length}`,
281
+ `- **Disputed**: ${disputed.length}`,
282
+ `- **Unaddressed**: ${unaddressed.length}`,
283
+ `- **Context notes**: ${conversation.developerContext.additionalContext.length}`,
284
+ `- **State**: ${conversation.state}`,
285
+ ];
286
+ if (unaddressed.length > 0) {
287
+ parts.push("", "### Unaddressed Findings");
288
+ for (const f of unaddressed) {
289
+ parts.push(`- **${f.ruleId}** (${f.severity}): ${f.title}`);
290
+ }
291
+ }
292
+ if (disputed.length > 0) {
293
+ parts.push("", "### Disputed Findings");
294
+ for (const f of disputed) {
295
+ const reason = conversation.developerContext.explanations.get(f.ruleId);
296
+ parts.push(`- **${f.ruleId}**: ${reason || "(no reason given)"}`);
297
+ }
298
+ }
299
+ return parts.join("\n");
300
+ }
301
+ function handleGeneralMessage(conversation, _content) {
302
+ const { unaddressed } = getOutstandingFindings(conversation);
303
+ if (unaddressed.length === 0) {
304
+ return "All findings have been addressed. The review is complete.";
305
+ }
306
+ return `${unaddressed.length} finding(s) still need attention. You can \`explain\`, \`dispute\`, or \`accept\` each one.`;
307
+ }
@@ -0,0 +1,112 @@
1
+ /**
2
+ * SAST Integration Layer
3
+ *
4
+ * Bridges external Static Application Security Testing tools (CodeQL, Semgrep,
5
+ * Bandit, ESLint security rules, etc.) into the Judges evaluation pipeline.
6
+ *
7
+ * External SAST tools complement Judges' LLM-powered tribunal by providing:
8
+ * - Data-flow / taint analysis (CodeQL, Semgrep Pro)
9
+ * - Known CVE pattern matching
10
+ * - Language-specific semantic analysis
11
+ *
12
+ * This module:
13
+ * 1. Ingests SARIF (Static Analysis Results Interchange Format) reports
14
+ * 2. Normalizes external findings into Judges' Finding type
15
+ * 3. Deduplicates against existing Judges findings
16
+ * 4. Merges as supplementary evidence into tribunal verdicts
17
+ */
18
+ import type { Finding, Severity, TribunalVerdict } from "./types.js";
19
+ interface SarifLog {
20
+ $schema?: string;
21
+ version: string;
22
+ runs: SarifRun[];
23
+ }
24
+ interface SarifRun {
25
+ tool: {
26
+ driver: {
27
+ name: string;
28
+ version?: string;
29
+ rules?: SarifRule[];
30
+ };
31
+ };
32
+ results: SarifResult[];
33
+ }
34
+ interface SarifRule {
35
+ id: string;
36
+ name?: string;
37
+ shortDescription?: {
38
+ text: string;
39
+ };
40
+ fullDescription?: {
41
+ text: string;
42
+ };
43
+ defaultConfiguration?: {
44
+ level?: "none" | "note" | "warning" | "error";
45
+ };
46
+ properties?: Record<string, unknown>;
47
+ }
48
+ interface SarifResult {
49
+ ruleId: string;
50
+ message: {
51
+ text: string;
52
+ };
53
+ level?: "none" | "note" | "warning" | "error";
54
+ locations?: Array<{
55
+ physicalLocation?: {
56
+ artifactLocation?: {
57
+ uri?: string;
58
+ };
59
+ region?: {
60
+ startLine?: number;
61
+ endLine?: number;
62
+ startColumn?: number;
63
+ endColumn?: number;
64
+ };
65
+ };
66
+ }>;
67
+ fixes?: Array<{
68
+ description?: {
69
+ text: string;
70
+ };
71
+ }>;
72
+ properties?: Record<string, unknown>;
73
+ }
74
+ export interface SastProvider {
75
+ /** Provider name (e.g., "codeql", "semgrep", "bandit") */
76
+ name: string;
77
+ /** Parse provider-specific output into SARIF format */
78
+ parseSarif(content: string): SarifLog;
79
+ /** Map provider rule IDs to Judges rule ID prefixes */
80
+ mapRuleId(providerRuleId: string): string;
81
+ /** Map provider severity to Judges severity */
82
+ mapSeverity(level: string): Severity;
83
+ }
84
+ /** Register a SAST provider for integration. */
85
+ export declare function registerSastProvider(provider: SastProvider): void;
86
+ /** Get a registered SAST provider by name. */
87
+ export declare function getSastProvider(name: string): SastProvider | undefined;
88
+ /** List all registered SAST providers. */
89
+ export declare function listSastProviders(): string[];
90
+ /**
91
+ * Parse a SARIF file and convert results into Judges Finding objects.
92
+ */
93
+ export declare function ingestSarifFile(filePath: string, providerName?: string): {
94
+ findings: Finding[];
95
+ toolName: string;
96
+ toolVersion?: string;
97
+ };
98
+ /**
99
+ * Parse SARIF content string and convert results into Judges Finding objects.
100
+ */
101
+ export declare function ingestSarifContent(content: string, providerName?: string): {
102
+ findings: Finding[];
103
+ toolName: string;
104
+ toolVersion?: string;
105
+ };
106
+ /**
107
+ * Merge external SAST findings into a tribunal verdict.
108
+ * Deduplicates findings that overlap with existing judge findings
109
+ * (same file + overlapping line range + similar rule category).
110
+ */
111
+ export declare function mergeSastFindings(verdict: TribunalVerdict, sastFindings: Finding[]): TribunalVerdict;
112
+ export {};
@@ -0,0 +1,215 @@
1
+ /**
2
+ * SAST Integration Layer
3
+ *
4
+ * Bridges external Static Application Security Testing tools (CodeQL, Semgrep,
5
+ * Bandit, ESLint security rules, etc.) into the Judges evaluation pipeline.
6
+ *
7
+ * External SAST tools complement Judges' LLM-powered tribunal by providing:
8
+ * - Data-flow / taint analysis (CodeQL, Semgrep Pro)
9
+ * - Known CVE pattern matching
10
+ * - Language-specific semantic analysis
11
+ *
12
+ * This module:
13
+ * 1. Ingests SARIF (Static Analysis Results Interchange Format) reports
14
+ * 2. Normalizes external findings into Judges' Finding type
15
+ * 3. Deduplicates against existing Judges findings
16
+ * 4. Merges as supplementary evidence into tribunal verdicts
17
+ */
18
+ import { readFileSync, existsSync } from "fs";
19
+ const providers = new Map();
20
+ /** Register a SAST provider for integration. */
21
+ export function registerSastProvider(provider) {
22
+ providers.set(provider.name, provider);
23
+ }
24
+ /** Get a registered SAST provider by name. */
25
+ export function getSastProvider(name) {
26
+ return providers.get(name);
27
+ }
28
+ /** List all registered SAST providers. */
29
+ export function listSastProviders() {
30
+ return Array.from(providers.keys());
31
+ }
32
+ // ─── Default Providers ───────────────────────────────────────────────────────
33
+ /** Generic SARIF provider — works with any SARIF 2.1.0 output */
34
+ const genericSarifProvider = {
35
+ name: "sarif",
36
+ parseSarif(content) {
37
+ return JSON.parse(content);
38
+ },
39
+ mapRuleId(providerRuleId) {
40
+ return `SAST-${providerRuleId}`;
41
+ },
42
+ mapSeverity(level) {
43
+ switch (level) {
44
+ case "error":
45
+ return "high";
46
+ case "warning":
47
+ return "medium";
48
+ case "note":
49
+ return "low";
50
+ default:
51
+ return "medium";
52
+ }
53
+ },
54
+ };
55
+ /** CodeQL-specific provider */
56
+ const codeqlProvider = {
57
+ name: "codeql",
58
+ parseSarif(content) {
59
+ return JSON.parse(content);
60
+ },
61
+ mapRuleId(providerRuleId) {
62
+ // CodeQL rules like "js/xss" → "SAST-CODEQL-JS-XSS"
63
+ const normalized = providerRuleId.replace(/\//g, "-").toUpperCase();
64
+ return `SAST-CODEQL-${normalized}`;
65
+ },
66
+ mapSeverity(level) {
67
+ switch (level) {
68
+ case "error":
69
+ return "critical";
70
+ case "warning":
71
+ return "high";
72
+ case "note":
73
+ return "medium";
74
+ default:
75
+ return "medium";
76
+ }
77
+ },
78
+ };
79
+ /** Semgrep-specific provider */
80
+ const semgrepProvider = {
81
+ name: "semgrep",
82
+ parseSarif(content) {
83
+ return JSON.parse(content);
84
+ },
85
+ mapRuleId(providerRuleId) {
86
+ // Semgrep rules like "python.lang.security.audit.exec-detected" → "SAST-SEMGREP-EXEC-DETECTED"
87
+ const parts = providerRuleId.split(".");
88
+ const meaningful = parts.slice(-2).join("-").toUpperCase();
89
+ return `SAST-SEMGREP-${meaningful}`;
90
+ },
91
+ mapSeverity(level) {
92
+ switch (level) {
93
+ case "error":
94
+ return "high";
95
+ case "warning":
96
+ return "medium";
97
+ case "note":
98
+ return "low";
99
+ default:
100
+ return "medium";
101
+ }
102
+ },
103
+ };
104
+ // Register default providers
105
+ registerSastProvider(genericSarifProvider);
106
+ registerSastProvider(codeqlProvider);
107
+ registerSastProvider(semgrepProvider);
108
+ // ─── SARIF Ingestion ─────────────────────────────────────────────────────────
109
+ /**
110
+ * Parse a SARIF file and convert results into Judges Finding objects.
111
+ */
112
+ export function ingestSarifFile(filePath, providerName) {
113
+ if (!existsSync(filePath)) {
114
+ return { findings: [], toolName: "unknown" };
115
+ }
116
+ const content = readFileSync(filePath, "utf-8");
117
+ return ingestSarifContent(content, providerName);
118
+ }
119
+ /**
120
+ * Parse SARIF content string and convert results into Judges Finding objects.
121
+ */
122
+ export function ingestSarifContent(content, providerName) {
123
+ const provider = providerName ? providers.get(providerName) : genericSarifProvider;
124
+ if (!provider) {
125
+ return { findings: [], toolName: "unknown" };
126
+ }
127
+ let sarif;
128
+ try {
129
+ sarif = provider.parseSarif(content);
130
+ }
131
+ catch {
132
+ return { findings: [], toolName: "unknown" };
133
+ }
134
+ const findings = [];
135
+ let toolName = "unknown";
136
+ let toolVersion;
137
+ for (const run of sarif.runs) {
138
+ toolName = run.tool.driver.name;
139
+ toolVersion = run.tool.driver.version;
140
+ const ruleMap = new Map();
141
+ if (run.tool.driver.rules) {
142
+ for (const rule of run.tool.driver.rules) {
143
+ ruleMap.set(rule.id, rule);
144
+ }
145
+ }
146
+ for (const result of run.results) {
147
+ const rule = ruleMap.get(result.ruleId);
148
+ const level = result.level || rule?.defaultConfiguration?.level || "warning";
149
+ const severity = provider.mapSeverity(level);
150
+ const ruleId = provider.mapRuleId(result.ruleId);
151
+ const lineNumbers = [];
152
+ let sourceFile;
153
+ if (result.locations) {
154
+ for (const loc of result.locations) {
155
+ const region = loc.physicalLocation?.region;
156
+ if (region?.startLine) {
157
+ lineNumbers.push(region.startLine);
158
+ }
159
+ if (loc.physicalLocation?.artifactLocation?.uri) {
160
+ sourceFile = loc.physicalLocation.artifactLocation.uri;
161
+ }
162
+ }
163
+ }
164
+ const title = rule?.shortDescription?.text || rule?.name || result.ruleId;
165
+ const description = result.message.text || rule?.fullDescription?.text || "";
166
+ const fix = result.fixes?.[0]?.description?.text;
167
+ findings.push({
168
+ ruleId,
169
+ severity,
170
+ title,
171
+ description,
172
+ lineNumbers: lineNumbers.length > 0 ? lineNumbers : undefined,
173
+ recommendation: fix || `Address ${result.ruleId} finding from ${toolName}.`,
174
+ reference: `${toolName}: ${result.ruleId}`,
175
+ confidence: 0.9, // External SAST tools have high deterministic confidence
176
+ provenance: `sast-${toolName.toLowerCase()}`,
177
+ ...(sourceFile ? { filePath: sourceFile } : {}),
178
+ });
179
+ }
180
+ }
181
+ return { findings, toolName, toolVersion };
182
+ }
183
+ // ─── Merge with Tribunal Verdicts ────────────────────────────────────────────
184
+ /**
185
+ * Merge external SAST findings into a tribunal verdict.
186
+ * Deduplicates findings that overlap with existing judge findings
187
+ * (same file + overlapping line range + similar rule category).
188
+ */
189
+ export function mergeSastFindings(verdict, sastFindings) {
190
+ if (sastFindings.length === 0)
191
+ return verdict;
192
+ const existingKeys = new Set(verdict.findings.map((f) => {
193
+ const line = f.lineNumbers?.[0] || 0;
194
+ const bucket = Math.floor(line / 3) * 3;
195
+ return `${bucket}::${f.severity}`;
196
+ }));
197
+ const newFindings = [];
198
+ for (const sf of sastFindings) {
199
+ const line = sf.lineNumbers?.[0] || 0;
200
+ const bucket = Math.floor(line / 3) * 3;
201
+ const key = `${bucket}::${sf.severity}`;
202
+ // Only add if no existing finding covers roughly the same location and severity
203
+ if (!existingKeys.has(key)) {
204
+ newFindings.push(sf);
205
+ existingKeys.add(key);
206
+ }
207
+ }
208
+ if (newFindings.length === 0)
209
+ return verdict;
210
+ return {
211
+ ...verdict,
212
+ findings: [...verdict.findings, ...newFindings],
213
+ summary: `${verdict.summary}\n\n**SAST Supplement**: ${newFindings.length} additional finding(s) from external static analysis.`,
214
+ };
215
+ }