@kevinrabun/judges 2.3.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +177 -12
- package/dist/api.d.ts +40 -0
- package/dist/api.d.ts.map +1 -0
- package/dist/api.js +56 -0
- package/dist/api.js.map +1 -0
- package/dist/ast/cross-file-taint.d.ts +43 -0
- package/dist/ast/cross-file-taint.d.ts.map +1 -0
- package/dist/ast/cross-file-taint.js +713 -0
- package/dist/ast/cross-file-taint.js.map +1 -0
- package/dist/ast/index.d.ts +4 -0
- package/dist/ast/index.d.ts.map +1 -1
- package/dist/ast/index.js +5 -0
- package/dist/ast/index.js.map +1 -1
- package/dist/ast/structural-parser.d.ts.map +1 -1
- package/dist/ast/structural-parser.js +66 -11
- package/dist/ast/structural-parser.js.map +1 -1
- package/dist/ast/taint-tracker.d.ts +35 -0
- package/dist/ast/taint-tracker.d.ts.map +1 -0
- package/dist/ast/taint-tracker.js +518 -0
- package/dist/ast/taint-tracker.js.map +1 -0
- package/dist/ast/types.d.ts +2 -0
- package/dist/ast/types.d.ts.map +1 -1
- package/dist/ast/typescript-ast.d.ts.map +1 -1
- package/dist/ast/typescript-ast.js +25 -5
- package/dist/ast/typescript-ast.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +10 -9
- package/dist/config.js.map +1 -1
- package/dist/dedup.d.ts +19 -0
- package/dist/dedup.d.ts.map +1 -0
- package/dist/dedup.js +222 -0
- package/dist/dedup.js.map +1 -0
- package/dist/errors.d.ts +37 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +57 -0
- package/dist/errors.js.map +1 -0
- package/dist/evaluators/accessibility.d.ts +1 -1
- package/dist/evaluators/accessibility.d.ts.map +1 -1
- package/dist/evaluators/accessibility.js +22 -16
- package/dist/evaluators/accessibility.js.map +1 -1
- package/dist/evaluators/agent-instructions.d.ts +1 -1
- package/dist/evaluators/agent-instructions.d.ts.map +1 -1
- package/dist/evaluators/agent-instructions.js +1 -2
- package/dist/evaluators/agent-instructions.js.map +1 -1
- package/dist/evaluators/ai-code-safety.d.ts +1 -1
- package/dist/evaluators/ai-code-safety.d.ts.map +1 -1
- package/dist/evaluators/ai-code-safety.js +2 -6
- package/dist/evaluators/ai-code-safety.js.map +1 -1
- package/dist/evaluators/api-design.d.ts +1 -1
- package/dist/evaluators/api-design.d.ts.map +1 -1
- package/dist/evaluators/api-design.js +2 -1
- package/dist/evaluators/api-design.js.map +1 -1
- package/dist/evaluators/app-builder.d.ts +34 -0
- package/dist/evaluators/app-builder.d.ts.map +1 -0
- package/dist/evaluators/app-builder.js +156 -0
- package/dist/evaluators/app-builder.js.map +1 -0
- package/dist/evaluators/authentication.d.ts +1 -1
- package/dist/evaluators/authentication.d.ts.map +1 -1
- package/dist/evaluators/authentication.js +2 -66
- package/dist/evaluators/authentication.js.map +1 -1
- package/dist/evaluators/backwards-compatibility.d.ts +1 -1
- package/dist/evaluators/backwards-compatibility.d.ts.map +1 -1
- package/dist/evaluators/backwards-compatibility.js.map +1 -1
- package/dist/evaluators/caching.d.ts +1 -1
- package/dist/evaluators/caching.d.ts.map +1 -1
- package/dist/evaluators/caching.js.map +1 -1
- package/dist/evaluators/ci-cd.d.ts +1 -1
- package/dist/evaluators/ci-cd.d.ts.map +1 -1
- package/dist/evaluators/ci-cd.js +4 -4
- package/dist/evaluators/ci-cd.js.map +1 -1
- package/dist/evaluators/cloud-readiness.d.ts +1 -1
- package/dist/evaluators/cloud-readiness.d.ts.map +1 -1
- package/dist/evaluators/cloud-readiness.js.map +1 -1
- package/dist/evaluators/code-structure.d.ts +1 -1
- package/dist/evaluators/code-structure.d.ts.map +1 -1
- package/dist/evaluators/code-structure.js +2 -6
- package/dist/evaluators/code-structure.js.map +1 -1
- package/dist/evaluators/compliance.d.ts +1 -1
- package/dist/evaluators/compliance.d.ts.map +1 -1
- package/dist/evaluators/compliance.js +15 -6
- package/dist/evaluators/compliance.js.map +1 -1
- package/dist/evaluators/concurrency.d.ts +1 -1
- package/dist/evaluators/concurrency.d.ts.map +1 -1
- package/dist/evaluators/concurrency.js +9 -4
- package/dist/evaluators/concurrency.js.map +1 -1
- package/dist/evaluators/configuration-management.d.ts +1 -1
- package/dist/evaluators/configuration-management.d.ts.map +1 -1
- package/dist/evaluators/configuration-management.js +7 -2
- package/dist/evaluators/configuration-management.js.map +1 -1
- package/dist/evaluators/cost-effectiveness.d.ts +1 -1
- package/dist/evaluators/cost-effectiveness.d.ts.map +1 -1
- package/dist/evaluators/cost-effectiveness.js +1 -3
- package/dist/evaluators/cost-effectiveness.js.map +1 -1
- package/dist/evaluators/cybersecurity.d.ts +1 -1
- package/dist/evaluators/cybersecurity.d.ts.map +1 -1
- package/dist/evaluators/cybersecurity.js +50 -1
- package/dist/evaluators/cybersecurity.js.map +1 -1
- package/dist/evaluators/data-security.d.ts +1 -1
- package/dist/evaluators/data-security.d.ts.map +1 -1
- package/dist/evaluators/data-security.js +9 -66
- package/dist/evaluators/data-security.js.map +1 -1
- package/dist/evaluators/data-sovereignty.d.ts +1 -1
- package/dist/evaluators/data-sovereignty.d.ts.map +1 -1
- package/dist/evaluators/data-sovereignty.js +4 -2
- package/dist/evaluators/data-sovereignty.js.map +1 -1
- package/dist/evaluators/database.d.ts +1 -1
- package/dist/evaluators/database.d.ts.map +1 -1
- package/dist/evaluators/database.js +3 -1
- package/dist/evaluators/database.js.map +1 -1
- package/dist/evaluators/dependencies.d.ts +6 -0
- package/dist/evaluators/dependencies.d.ts.map +1 -0
- package/dist/evaluators/dependencies.js +204 -0
- package/dist/evaluators/dependencies.js.map +1 -0
- package/dist/evaluators/dependency-health.d.ts +1 -1
- package/dist/evaluators/dependency-health.d.ts.map +1 -1
- package/dist/evaluators/dependency-health.js +198 -6
- package/dist/evaluators/dependency-health.js.map +1 -1
- package/dist/evaluators/documentation.d.ts +1 -1
- package/dist/evaluators/documentation.d.ts.map +1 -1
- package/dist/evaluators/documentation.js +5 -2
- package/dist/evaluators/documentation.js.map +1 -1
- package/dist/evaluators/error-handling.d.ts +1 -1
- package/dist/evaluators/error-handling.d.ts.map +1 -1
- package/dist/evaluators/error-handling.js.map +1 -1
- package/dist/evaluators/ethics-bias.d.ts +1 -1
- package/dist/evaluators/ethics-bias.d.ts.map +1 -1
- package/dist/evaluators/ethics-bias.js +10 -5
- package/dist/evaluators/ethics-bias.js.map +1 -1
- package/dist/evaluators/framework-safety.d.ts +13 -0
- package/dist/evaluators/framework-safety.d.ts.map +1 -0
- package/dist/evaluators/framework-safety.js +424 -0
- package/dist/evaluators/framework-safety.js.map +1 -0
- package/dist/evaluators/index.d.ts +20 -24
- package/dist/evaluators/index.d.ts.map +1 -1
- package/dist/evaluators/index.js +294 -728
- package/dist/evaluators/index.js.map +1 -1
- package/dist/evaluators/internationalization.d.ts +1 -1
- package/dist/evaluators/internationalization.d.ts.map +1 -1
- package/dist/evaluators/internationalization.js +14 -6
- package/dist/evaluators/internationalization.js.map +1 -1
- package/dist/evaluators/logging-privacy.d.ts +1 -1
- package/dist/evaluators/logging-privacy.d.ts.map +1 -1
- package/dist/evaluators/logging-privacy.js +3 -1
- package/dist/evaluators/logging-privacy.js.map +1 -1
- package/dist/evaluators/maintainability.d.ts +1 -1
- package/dist/evaluators/maintainability.d.ts.map +1 -1
- package/dist/evaluators/maintainability.js +15 -9
- package/dist/evaluators/maintainability.js.map +1 -1
- package/dist/evaluators/observability.d.ts +1 -1
- package/dist/evaluators/observability.d.ts.map +1 -1
- package/dist/evaluators/observability.js +2 -1
- package/dist/evaluators/observability.js.map +1 -1
- package/dist/evaluators/performance.d.ts +1 -1
- package/dist/evaluators/performance.d.ts.map +1 -1
- package/dist/evaluators/performance.js +181 -4
- package/dist/evaluators/performance.js.map +1 -1
- package/dist/evaluators/portability.d.ts +1 -1
- package/dist/evaluators/portability.d.ts.map +1 -1
- package/dist/evaluators/portability.js +2 -1
- package/dist/evaluators/portability.js.map +1 -1
- package/dist/evaluators/project.d.ts +16 -0
- package/dist/evaluators/project.d.ts.map +1 -0
- package/dist/evaluators/project.js +353 -0
- package/dist/evaluators/project.js.map +1 -0
- package/dist/evaluators/rate-limiting.d.ts +1 -1
- package/dist/evaluators/rate-limiting.d.ts.map +1 -1
- package/dist/evaluators/rate-limiting.js.map +1 -1
- package/dist/evaluators/reliability.d.ts +1 -1
- package/dist/evaluators/reliability.d.ts.map +1 -1
- package/dist/evaluators/reliability.js.map +1 -1
- package/dist/evaluators/scalability.d.ts +1 -1
- package/dist/evaluators/scalability.d.ts.map +1 -1
- package/dist/evaluators/scalability.js +3 -1
- package/dist/evaluators/scalability.js.map +1 -1
- package/dist/evaluators/shared.d.ts +24 -2
- package/dist/evaluators/shared.d.ts.map +1 -1
- package/dist/evaluators/shared.js +190 -2
- package/dist/evaluators/shared.js.map +1 -1
- package/dist/evaluators/software-practices.d.ts +1 -1
- package/dist/evaluators/software-practices.d.ts.map +1 -1
- package/dist/evaluators/software-practices.js +3 -3
- package/dist/evaluators/software-practices.js.map +1 -1
- package/dist/evaluators/testing.d.ts +1 -1
- package/dist/evaluators/testing.d.ts.map +1 -1
- package/dist/evaluators/testing.js +12 -4
- package/dist/evaluators/testing.js.map +1 -1
- package/dist/evaluators/ux.d.ts +1 -1
- package/dist/evaluators/ux.d.ts.map +1 -1
- package/dist/evaluators/ux.js.map +1 -1
- package/dist/evaluators/v2.d.ts +1 -1
- package/dist/evaluators/v2.d.ts.map +1 -1
- package/dist/evaluators/v2.js +13 -35
- package/dist/evaluators/v2.js.map +1 -1
- package/dist/formatters/sarif.d.ts +75 -0
- package/dist/formatters/sarif.d.ts.map +1 -0
- package/dist/formatters/sarif.js +93 -0
- package/dist/formatters/sarif.js.map +1 -0
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -806
- package/dist/index.js.map +1 -1
- package/dist/judges/accessibility.d.ts +1 -1
- package/dist/judges/accessibility.d.ts.map +1 -1
- package/dist/judges/agent-instructions.d.ts +1 -1
- package/dist/judges/agent-instructions.d.ts.map +1 -1
- package/dist/judges/ai-code-safety.d.ts +1 -1
- package/dist/judges/ai-code-safety.d.ts.map +1 -1
- package/dist/judges/api-design.d.ts +1 -1
- package/dist/judges/api-design.d.ts.map +1 -1
- package/dist/judges/authentication.d.ts +1 -1
- package/dist/judges/authentication.d.ts.map +1 -1
- package/dist/judges/backwards-compatibility.d.ts +1 -1
- package/dist/judges/backwards-compatibility.d.ts.map +1 -1
- package/dist/judges/caching.d.ts +1 -1
- package/dist/judges/caching.d.ts.map +1 -1
- package/dist/judges/ci-cd.d.ts +1 -1
- package/dist/judges/ci-cd.d.ts.map +1 -1
- package/dist/judges/cloud-readiness.d.ts +1 -1
- package/dist/judges/cloud-readiness.d.ts.map +1 -1
- package/dist/judges/code-structure.d.ts +1 -1
- package/dist/judges/code-structure.d.ts.map +1 -1
- package/dist/judges/compliance.d.ts +1 -1
- package/dist/judges/compliance.d.ts.map +1 -1
- package/dist/judges/concurrency.d.ts +1 -1
- package/dist/judges/concurrency.d.ts.map +1 -1
- package/dist/judges/configuration-management.d.ts +1 -1
- package/dist/judges/configuration-management.d.ts.map +1 -1
- package/dist/judges/cost-effectiveness.d.ts +1 -1
- package/dist/judges/cost-effectiveness.d.ts.map +1 -1
- package/dist/judges/cybersecurity.d.ts +1 -1
- package/dist/judges/cybersecurity.d.ts.map +1 -1
- package/dist/judges/data-security.d.ts +1 -1
- package/dist/judges/data-security.d.ts.map +1 -1
- package/dist/judges/data-sovereignty.d.ts +1 -1
- package/dist/judges/data-sovereignty.d.ts.map +1 -1
- package/dist/judges/database.d.ts +1 -1
- package/dist/judges/database.d.ts.map +1 -1
- package/dist/judges/dependency-health.d.ts +1 -1
- package/dist/judges/dependency-health.d.ts.map +1 -1
- package/dist/judges/documentation.d.ts +1 -1
- package/dist/judges/documentation.d.ts.map +1 -1
- package/dist/judges/error-handling.d.ts +1 -1
- package/dist/judges/error-handling.d.ts.map +1 -1
- package/dist/judges/ethics-bias.d.ts +1 -1
- package/dist/judges/ethics-bias.d.ts.map +1 -1
- package/dist/judges/framework-safety.d.ts +3 -0
- package/dist/judges/framework-safety.d.ts.map +1 -0
- package/dist/judges/framework-safety.js +25 -0
- package/dist/judges/framework-safety.js.map +1 -0
- package/dist/judges/index.d.ts +1 -1
- package/dist/judges/index.d.ts.map +1 -1
- package/dist/judges/index.js +74 -0
- package/dist/judges/index.js.map +1 -1
- package/dist/judges/internationalization.d.ts +1 -1
- package/dist/judges/internationalization.d.ts.map +1 -1
- package/dist/judges/logging-privacy.d.ts +1 -1
- package/dist/judges/logging-privacy.d.ts.map +1 -1
- package/dist/judges/maintainability.d.ts +1 -1
- package/dist/judges/maintainability.d.ts.map +1 -1
- package/dist/judges/observability.d.ts +1 -1
- package/dist/judges/observability.d.ts.map +1 -1
- package/dist/judges/performance.d.ts +1 -1
- package/dist/judges/performance.d.ts.map +1 -1
- package/dist/judges/portability.d.ts +1 -1
- package/dist/judges/portability.d.ts.map +1 -1
- package/dist/judges/rate-limiting.d.ts +1 -1
- package/dist/judges/rate-limiting.d.ts.map +1 -1
- package/dist/judges/reliability.d.ts +1 -1
- package/dist/judges/reliability.d.ts.map +1 -1
- package/dist/judges/scalability.d.ts +1 -1
- package/dist/judges/scalability.d.ts.map +1 -1
- package/dist/judges/software-practices.d.ts +1 -1
- package/dist/judges/software-practices.d.ts.map +1 -1
- package/dist/judges/testing.d.ts +1 -1
- package/dist/judges/testing.d.ts.map +1 -1
- package/dist/judges/ux.d.ts +1 -1
- package/dist/judges/ux.d.ts.map +1 -1
- package/dist/language-patterns.d.ts +37 -0
- package/dist/language-patterns.d.ts.map +1 -1
- package/dist/language-patterns.js +58 -3
- package/dist/language-patterns.js.map +1 -1
- package/dist/patches/index.d.ts +10 -0
- package/dist/patches/index.d.ts.map +1 -0
- package/dist/patches/index.js +533 -0
- package/dist/patches/index.js.map +1 -0
- package/dist/reports/public-repo-report.d.ts +1 -1
- package/dist/reports/public-repo-report.d.ts.map +1 -1
- package/dist/scoring.d.ts +18 -0
- package/dist/scoring.d.ts.map +1 -0
- package/dist/scoring.js +178 -0
- package/dist/scoring.js.map +1 -0
- package/dist/tools/deep-review.d.ts +4 -0
- package/dist/tools/deep-review.d.ts.map +1 -0
- package/dist/tools/deep-review.js +56 -0
- package/dist/tools/deep-review.js.map +1 -0
- package/dist/tools/prompts.d.ts +8 -0
- package/dist/tools/prompts.d.ts.map +1 -0
- package/dist/tools/prompts.js +66 -0
- package/dist/tools/prompts.js.map +1 -0
- package/dist/tools/register-evaluation.d.ts +7 -0
- package/dist/tools/register-evaluation.d.ts.map +1 -0
- package/dist/tools/register-evaluation.js +303 -0
- package/dist/tools/register-evaluation.js.map +1 -0
- package/dist/tools/register-workflow.d.ts +7 -0
- package/dist/tools/register-workflow.d.ts.map +1 -0
- package/dist/tools/register-workflow.js +395 -0
- package/dist/tools/register-workflow.js.map +1 -0
- package/dist/tools/register.d.ts +7 -0
- package/dist/tools/register.d.ts.map +1 -0
- package/dist/tools/register.js +14 -0
- package/dist/tools/register.js.map +1 -0
- package/dist/tools/schemas.d.ts +26 -0
- package/dist/tools/schemas.d.ts.map +1 -0
- package/dist/tools/schemas.js +42 -0
- package/dist/tools/schemas.js.map +1 -0
- package/dist/types.d.ts +29 -2
- package/dist/types.d.ts.map +1 -1
- package/package.json +42 -3
- package/server.json +51 -3
package/dist/index.js
CHANGED
|
@@ -8,825 +8,28 @@
|
|
|
8
8
|
* to perform thorough contextual analysis beyond what static patterns catch.
|
|
9
9
|
*
|
|
10
10
|
* Tools exposed:
|
|
11
|
+
* - get_judges: List all available judges
|
|
11
12
|
* - evaluate_v2: Context/evidence-aware V2 evaluation
|
|
12
13
|
* - evaluate_app_builder_flow: 3-step workflow (review, translate, tasks)
|
|
13
14
|
* - evaluate_public_repo_report: Clone public repo and generate full report
|
|
14
15
|
* - evaluate_code: Full panel review (all judges)
|
|
15
16
|
* - evaluate_code_single_judge: Review by a specific judge
|
|
16
|
-
* -
|
|
17
|
+
* - evaluate_project: Multi-file project analysis
|
|
18
|
+
* - evaluate_diff: Changed-line-only diff analysis
|
|
19
|
+
* - analyze_dependencies: Supply-chain manifest analysis
|
|
17
20
|
*/
|
|
18
21
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
19
22
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
20
|
-
import {
|
|
21
|
-
import {
|
|
22
|
-
import { evaluateWithJudge, evaluateWithTribunal, evaluateProject, evaluateDiff, analyzeDependencies, runAppBuilderWorkflow, formatVerdictAsMarkdown, formatEvaluationAsMarkdown, } from "./evaluators/index.js";
|
|
23
|
-
import { evaluateCodeV2, evaluateProjectV2, getSupportedPolicyProfiles, } from "./evaluators/v2.js";
|
|
24
|
-
import { generatePublicRepoReport } from "./reports/public-repo-report.js";
|
|
23
|
+
import { registerTools } from "./tools/register.js";
|
|
24
|
+
import { registerPrompts } from "./tools/prompts.js";
|
|
25
25
|
// ─── Create MCP Server ──────────────────────────────────────────────────────
|
|
26
26
|
const server = new McpServer({
|
|
27
27
|
name: "judges",
|
|
28
28
|
version: "2.0.0",
|
|
29
29
|
});
|
|
30
|
-
// ───
|
|
31
|
-
server
|
|
32
|
-
|
|
33
|
-
const text = judges
|
|
34
|
-
.map((j) => `**${j.name}** (id: \`${j.id}\`)\n Domain: ${j.domain}\n ${j.description}`)
|
|
35
|
-
.join("\n\n");
|
|
36
|
-
return {
|
|
37
|
-
content: [
|
|
38
|
-
{
|
|
39
|
-
type: "text",
|
|
40
|
-
text: `# Judges Panel\n\n${text}`,
|
|
41
|
-
},
|
|
42
|
-
],
|
|
43
|
-
};
|
|
44
|
-
});
|
|
45
|
-
// ─── Tool: evaluate_public_repo_report ──────────────────────────────────────
|
|
46
|
-
server.tool("evaluate_public_repo_report", "Clone a public repository URL, run the full judges panel across source files, and generate a consolidated markdown report.", {
|
|
47
|
-
repoUrl: z
|
|
48
|
-
.string()
|
|
49
|
-
.describe("Public repository URL (HTTP/HTTPS)"),
|
|
50
|
-
branch: z
|
|
51
|
-
.string()
|
|
52
|
-
.optional()
|
|
53
|
-
.describe("Optional branch name (defaults to repository default branch)"),
|
|
54
|
-
outputPath: z
|
|
55
|
-
.string()
|
|
56
|
-
.optional()
|
|
57
|
-
.describe("Optional path to write the markdown report"),
|
|
58
|
-
maxFiles: z
|
|
59
|
-
.number()
|
|
60
|
-
.int()
|
|
61
|
-
.positive()
|
|
62
|
-
.optional()
|
|
63
|
-
.describe("Maximum number of source files to analyze (default: 600)"),
|
|
64
|
-
maxFileBytes: z
|
|
65
|
-
.number()
|
|
66
|
-
.int()
|
|
67
|
-
.positive()
|
|
68
|
-
.optional()
|
|
69
|
-
.describe("Maximum single file size in bytes (default: 300000)"),
|
|
70
|
-
maxFindingsInReport: z
|
|
71
|
-
.number()
|
|
72
|
-
.int()
|
|
73
|
-
.positive()
|
|
74
|
-
.optional()
|
|
75
|
-
.describe("Maximum number of detailed findings in report (default: 150)"),
|
|
76
|
-
credentialMode: z
|
|
77
|
-
.enum(["standard", "strict"])
|
|
78
|
-
.optional()
|
|
79
|
-
.describe("Credential detection mode: standard (default) or strict"),
|
|
80
|
-
includeAstFindings: z
|
|
81
|
-
.boolean()
|
|
82
|
-
.optional()
|
|
83
|
-
.describe("Include AST/code-structure findings (default: true)"),
|
|
84
|
-
minConfidence: z
|
|
85
|
-
.number()
|
|
86
|
-
.min(0)
|
|
87
|
-
.max(1)
|
|
88
|
-
.optional()
|
|
89
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
90
|
-
enableMustFixGate: z
|
|
91
|
-
.boolean()
|
|
92
|
-
.optional()
|
|
93
|
-
.describe("Enable must-fix gate for high-confidence dangerous findings (default: false)"),
|
|
94
|
-
mustFixMinConfidence: z
|
|
95
|
-
.number()
|
|
96
|
-
.min(0)
|
|
97
|
-
.max(1)
|
|
98
|
-
.optional()
|
|
99
|
-
.describe("Minimum confidence threshold for must-fix gate triggers (0-1, default: 0.85)"),
|
|
100
|
-
mustFixDangerousRulePrefixes: z
|
|
101
|
-
.array(z.string())
|
|
102
|
-
.optional()
|
|
103
|
-
.describe("Optional rule prefixes considered dangerous for must-fix gate"),
|
|
104
|
-
keepClone: z
|
|
105
|
-
.boolean()
|
|
106
|
-
.optional()
|
|
107
|
-
.describe("Keep cloned repository on disk for inspection"),
|
|
108
|
-
}, async ({ repoUrl, branch, outputPath, maxFiles, maxFileBytes, maxFindingsInReport, credentialMode, includeAstFindings, minConfidence, enableMustFixGate, mustFixMinConfidence, mustFixDangerousRulePrefixes, keepClone, }) => {
|
|
109
|
-
try {
|
|
110
|
-
const report = generatePublicRepoReport({
|
|
111
|
-
repoUrl,
|
|
112
|
-
branch,
|
|
113
|
-
outputPath,
|
|
114
|
-
maxFiles,
|
|
115
|
-
maxFileBytes,
|
|
116
|
-
maxFindingsInReport,
|
|
117
|
-
credentialMode,
|
|
118
|
-
includeAstFindings,
|
|
119
|
-
minConfidence,
|
|
120
|
-
mustFixGate: enableMustFixGate
|
|
121
|
-
? {
|
|
122
|
-
enabled: true,
|
|
123
|
-
minConfidence: mustFixMinConfidence,
|
|
124
|
-
dangerousRulePrefixes: mustFixDangerousRulePrefixes,
|
|
125
|
-
}
|
|
126
|
-
: undefined,
|
|
127
|
-
keepClone,
|
|
128
|
-
});
|
|
129
|
-
let summary = `# Public Repo Report Generated\n\n`;
|
|
130
|
-
summary += `- Repository: ${repoUrl}\n`;
|
|
131
|
-
summary += `- Overall verdict: ${report.overallVerdict.toUpperCase()}\n`;
|
|
132
|
-
summary += `- Average score: ${report.averageScore}/100\n`;
|
|
133
|
-
summary += `- Files analyzed: ${report.analyzedFileCount}\n`;
|
|
134
|
-
summary += `- Total findings: ${report.totalFindings}\n`;
|
|
135
|
-
summary += `- Credential mode: ${(credentialMode ?? "standard").toUpperCase()}\n`;
|
|
136
|
-
summary += `- AST findings: ${(includeAstFindings ?? true) ? "INCLUDED" : "EXCLUDED"}\n`;
|
|
137
|
-
summary += `- Min confidence: ${minConfidence ?? 0}\n`;
|
|
138
|
-
if (enableMustFixGate) {
|
|
139
|
-
summary += `- Must-fix gate: ENABLED (min confidence: ${mustFixMinConfidence ?? 0.85})\n`;
|
|
140
|
-
}
|
|
141
|
-
if (report.outputPath) {
|
|
142
|
-
summary += `- Report path: ${report.outputPath}\n`;
|
|
143
|
-
}
|
|
144
|
-
if (keepClone) {
|
|
145
|
-
summary += `- Clone path: ${report.clonePath}\n`;
|
|
146
|
-
}
|
|
147
|
-
return {
|
|
148
|
-
content: [
|
|
149
|
-
{
|
|
150
|
-
type: "text",
|
|
151
|
-
text: `${summary}\n---\n\n${report.markdown}`,
|
|
152
|
-
},
|
|
153
|
-
],
|
|
154
|
-
};
|
|
155
|
-
}
|
|
156
|
-
catch (error) {
|
|
157
|
-
return {
|
|
158
|
-
content: [
|
|
159
|
-
{
|
|
160
|
-
type: "text",
|
|
161
|
-
text: error instanceof Error
|
|
162
|
-
? `Error: ${error.message}`
|
|
163
|
-
: "Error: Failed to generate public repository report",
|
|
164
|
-
},
|
|
165
|
-
],
|
|
166
|
-
isError: true,
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
});
|
|
170
|
-
// ─── Tool: evaluate_code ─────────────────────────────────────────────────────
|
|
171
|
-
server.tool("evaluate_v2", "Run V2 context-aware tribunal evaluation with policy profiles, evidence calibration, specialty feedback, confidence scoring, and uncertainty reporting.", {
|
|
172
|
-
code: z
|
|
173
|
-
.string()
|
|
174
|
-
.optional()
|
|
175
|
-
.describe("Source code for single-file mode"),
|
|
176
|
-
language: z
|
|
177
|
-
.string()
|
|
178
|
-
.optional()
|
|
179
|
-
.describe("Language for single-file mode"),
|
|
180
|
-
files: z
|
|
181
|
-
.array(z.object({
|
|
182
|
-
path: z.string().describe("Relative file path"),
|
|
183
|
-
content: z.string().describe("File content"),
|
|
184
|
-
language: z.string().describe("Programming language"),
|
|
185
|
-
}))
|
|
186
|
-
.optional()
|
|
187
|
-
.describe("Project files for multi-file mode"),
|
|
188
|
-
context: z
|
|
189
|
-
.string()
|
|
190
|
-
.optional()
|
|
191
|
-
.describe("Optional high-level context"),
|
|
192
|
-
includeAstFindings: z
|
|
193
|
-
.boolean()
|
|
194
|
-
.optional()
|
|
195
|
-
.describe("Include AST/code-structure findings (default: true)"),
|
|
196
|
-
minConfidence: z
|
|
197
|
-
.number()
|
|
198
|
-
.min(0)
|
|
199
|
-
.max(1)
|
|
200
|
-
.optional()
|
|
201
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
202
|
-
policyProfile: z
|
|
203
|
-
.enum([
|
|
204
|
-
"default",
|
|
205
|
-
"startup",
|
|
206
|
-
"regulated",
|
|
207
|
-
"healthcare",
|
|
208
|
-
"fintech",
|
|
209
|
-
"public-sector",
|
|
210
|
-
])
|
|
211
|
-
.optional()
|
|
212
|
-
.describe("Policy profile for domain-specific severity calibration"),
|
|
213
|
-
evaluationContext: z
|
|
214
|
-
.object({
|
|
215
|
-
architectureNotes: z.string().optional(),
|
|
216
|
-
constraints: z.array(z.string()).optional(),
|
|
217
|
-
standards: z.array(z.string()).optional(),
|
|
218
|
-
knownRisks: z.array(z.string()).optional(),
|
|
219
|
-
dataBoundaryModel: z.string().optional(),
|
|
220
|
-
})
|
|
221
|
-
.optional()
|
|
222
|
-
.describe("Structured context to improve semantic relevance"),
|
|
223
|
-
evidence: z
|
|
224
|
-
.object({
|
|
225
|
-
testSummary: z.string().optional(),
|
|
226
|
-
coveragePercent: z.number().optional(),
|
|
227
|
-
p95LatencyMs: z.number().optional(),
|
|
228
|
-
errorRatePercent: z.number().optional(),
|
|
229
|
-
dependencyVulnerabilityCount: z.number().optional(),
|
|
230
|
-
deploymentNotes: z.string().optional(),
|
|
231
|
-
})
|
|
232
|
-
.optional()
|
|
233
|
-
.describe("Runtime/operational evidence used for confidence calibration"),
|
|
234
|
-
}, async ({ code, language, files, context, includeAstFindings, minConfidence, policyProfile, evaluationContext, evidence }) => {
|
|
235
|
-
try {
|
|
236
|
-
if (!code && (!files || files.length === 0)) {
|
|
237
|
-
return {
|
|
238
|
-
content: [
|
|
239
|
-
{
|
|
240
|
-
type: "text",
|
|
241
|
-
text: "Error: provide either code+language for single-file mode, or files[] for project mode.",
|
|
242
|
-
},
|
|
243
|
-
],
|
|
244
|
-
isError: true,
|
|
245
|
-
};
|
|
246
|
-
}
|
|
247
|
-
if (code && !language) {
|
|
248
|
-
return {
|
|
249
|
-
content: [
|
|
250
|
-
{
|
|
251
|
-
type: "text",
|
|
252
|
-
text: "Error: language is required when code is provided.",
|
|
253
|
-
},
|
|
254
|
-
],
|
|
255
|
-
isError: true,
|
|
256
|
-
};
|
|
257
|
-
}
|
|
258
|
-
if (code && files && files.length > 0) {
|
|
259
|
-
return {
|
|
260
|
-
content: [
|
|
261
|
-
{
|
|
262
|
-
type: "text",
|
|
263
|
-
text: "Error: provide either code+language OR files[], not both.",
|
|
264
|
-
},
|
|
265
|
-
],
|
|
266
|
-
isError: true,
|
|
267
|
-
};
|
|
268
|
-
}
|
|
269
|
-
const supportedProfiles = getSupportedPolicyProfiles();
|
|
270
|
-
const result = files && files.length > 0
|
|
271
|
-
? evaluateProjectV2({
|
|
272
|
-
files,
|
|
273
|
-
context,
|
|
274
|
-
includeAstFindings,
|
|
275
|
-
minConfidence,
|
|
276
|
-
policyProfile,
|
|
277
|
-
evaluationContext,
|
|
278
|
-
evidence,
|
|
279
|
-
})
|
|
280
|
-
: evaluateCodeV2({
|
|
281
|
-
code: code,
|
|
282
|
-
language: language,
|
|
283
|
-
context,
|
|
284
|
-
includeAstFindings,
|
|
285
|
-
minConfidence,
|
|
286
|
-
policyProfile,
|
|
287
|
-
evaluationContext,
|
|
288
|
-
evidence,
|
|
289
|
-
});
|
|
290
|
-
let md = `# V2 Tribunal Evaluation\n\n`;
|
|
291
|
-
md += `**Policy Profile:** ${result.policyProfile}\n`;
|
|
292
|
-
md += `**Calibrated Verdict:** ${result.calibratedVerdict.toUpperCase()} (${result.calibratedScore}/100)\n`;
|
|
293
|
-
md += `**Base Verdict:** ${result.baseVerdict.overallVerdict.toUpperCase()} (${result.baseVerdict.overallScore}/100)\n`;
|
|
294
|
-
md += `**Confidence:** ${Math.round(result.confidence * 100)}%\n`;
|
|
295
|
-
md += `**Findings:** ${result.findings.length}\n\n`;
|
|
296
|
-
md += `${result.summary}\n\n`;
|
|
297
|
-
md += `## Specialty Feedback\n\n`;
|
|
298
|
-
for (const block of result.specialtyFeedback.slice(0, 10)) {
|
|
299
|
-
md += `### ${block.judgeName} — ${block.domain}\n`;
|
|
300
|
-
md += `Confidence: ${Math.round(block.confidence * 100)}% | Findings: ${block.findings.length}\n\n`;
|
|
301
|
-
for (const finding of block.findings.slice(0, 3)) {
|
|
302
|
-
md += `- [${finding.severity.toUpperCase()}] ${finding.ruleId} ${finding.title} (confidence ${Math.round(finding.confidence * 100)}%)\n`;
|
|
303
|
-
}
|
|
304
|
-
md += `\n`;
|
|
305
|
-
}
|
|
306
|
-
md += `## Uncertainty Report\n\n`;
|
|
307
|
-
md += `**Assumptions**\n`;
|
|
308
|
-
if (result.uncertainty.assumptions.length === 0) {
|
|
309
|
-
md += `- None\n`;
|
|
310
|
-
}
|
|
311
|
-
else {
|
|
312
|
-
for (const item of result.uncertainty.assumptions) {
|
|
313
|
-
md += `- ${item}\n`;
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
md += `\n**Missing Evidence**\n`;
|
|
317
|
-
if (result.uncertainty.missingEvidence.length === 0) {
|
|
318
|
-
md += `- None\n`;
|
|
319
|
-
}
|
|
320
|
-
else {
|
|
321
|
-
for (const item of result.uncertainty.missingEvidence) {
|
|
322
|
-
md += `- ${item}\n`;
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
md += `\n**Escalation Recommendations**\n`;
|
|
326
|
-
if (result.uncertainty.escalationRecommendations.length === 0) {
|
|
327
|
-
md += `- None\n`;
|
|
328
|
-
}
|
|
329
|
-
else {
|
|
330
|
-
for (const item of result.uncertainty.escalationRecommendations) {
|
|
331
|
-
md += `- ${item}\n`;
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
md += `\n## Supported Policy Profiles\n\n`;
|
|
335
|
-
md += supportedProfiles.map((profile) => `- ${profile}`).join("\n");
|
|
336
|
-
md += "\n";
|
|
337
|
-
return {
|
|
338
|
-
content: [{ type: "text", text: md }],
|
|
339
|
-
};
|
|
340
|
-
}
|
|
341
|
-
catch (error) {
|
|
342
|
-
return {
|
|
343
|
-
content: [
|
|
344
|
-
{
|
|
345
|
-
type: "text",
|
|
346
|
-
text: error instanceof Error
|
|
347
|
-
? `Error: ${error.message}`
|
|
348
|
-
: "Error: Failed to run V2 evaluation",
|
|
349
|
-
},
|
|
350
|
-
],
|
|
351
|
-
isError: true,
|
|
352
|
-
};
|
|
353
|
-
}
|
|
354
|
-
});
|
|
355
|
-
// ─── Tool: evaluate_code ─────────────────────────────────────────────────────
|
|
356
|
-
server.tool("evaluate_app_builder_flow", "Run a 3-step app-builder workflow: tribunal review, plain-language risk translation, and prioritized remediation tasks with AI-fixable P0/P1 items.", {
|
|
357
|
-
code: z
|
|
358
|
-
.string()
|
|
359
|
-
.optional()
|
|
360
|
-
.describe("Source code to evaluate (use with language for single-file mode)"),
|
|
361
|
-
language: z
|
|
362
|
-
.string()
|
|
363
|
-
.optional()
|
|
364
|
-
.describe("Programming language for single-file or diff mode"),
|
|
365
|
-
files: z
|
|
366
|
-
.array(z.object({
|
|
367
|
-
path: z.string().describe("Relative file path"),
|
|
368
|
-
content: z.string().describe("File content"),
|
|
369
|
-
language: z.string().describe("Programming language"),
|
|
370
|
-
}))
|
|
371
|
-
.optional()
|
|
372
|
-
.describe("Project files for multi-file mode"),
|
|
373
|
-
changedLines: z
|
|
374
|
-
.array(z.number())
|
|
375
|
-
.optional()
|
|
376
|
-
.describe("1-based changed line numbers for diff mode"),
|
|
377
|
-
context: z
|
|
378
|
-
.string()
|
|
379
|
-
.optional()
|
|
380
|
-
.describe("Optional context about business purpose or constraints"),
|
|
381
|
-
includeAstFindings: z
|
|
382
|
-
.boolean()
|
|
383
|
-
.optional()
|
|
384
|
-
.describe("Include AST/code-structure findings (default: true)"),
|
|
385
|
-
minConfidence: z
|
|
386
|
-
.number()
|
|
387
|
-
.min(0)
|
|
388
|
-
.max(1)
|
|
389
|
-
.optional()
|
|
390
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
391
|
-
maxFindings: z
|
|
392
|
-
.number()
|
|
393
|
-
.optional()
|
|
394
|
-
.describe("Maximum number of translated top findings to return (default: 10)"),
|
|
395
|
-
maxTasks: z
|
|
396
|
-
.number()
|
|
397
|
-
.optional()
|
|
398
|
-
.describe("Maximum number of remediation tasks to return (default: 20)"),
|
|
399
|
-
}, async ({ code, language, files, changedLines, context, includeAstFindings, minConfidence, maxFindings, maxTasks }) => {
|
|
400
|
-
try {
|
|
401
|
-
const result = runAppBuilderWorkflow({
|
|
402
|
-
code,
|
|
403
|
-
language,
|
|
404
|
-
files,
|
|
405
|
-
changedLines,
|
|
406
|
-
context,
|
|
407
|
-
includeAstFindings,
|
|
408
|
-
minConfidence,
|
|
409
|
-
maxFindings,
|
|
410
|
-
maxTasks,
|
|
411
|
-
});
|
|
412
|
-
const releaseLabel = result.releaseDecision === "do-not-ship"
|
|
413
|
-
? "Do not ship"
|
|
414
|
-
: result.releaseDecision === "ship-with-caution"
|
|
415
|
-
? "Ship with caution"
|
|
416
|
-
: "Ship now";
|
|
417
|
-
let md = `# App Builder Workflow Report\n\n`;
|
|
418
|
-
md += `**Mode:** ${result.mode}\n`;
|
|
419
|
-
md += `**Decision:** ${releaseLabel}\n`;
|
|
420
|
-
md += `**Verdict:** ${result.verdict.toUpperCase()} (${result.score}/100)\n`;
|
|
421
|
-
md += `**Findings:** Critical ${result.criticalCount} | High ${result.highCount} | Medium ${result.mediumCount}\n\n`;
|
|
422
|
-
md += `${result.summary}\n\n`;
|
|
423
|
-
md += `## Plain-Language Summary\n\n`;
|
|
424
|
-
if (result.plainLanguageFindings.length === 0) {
|
|
425
|
-
md += `No critical/high/medium findings were identified in this run.\n\n`;
|
|
426
|
-
}
|
|
427
|
-
else {
|
|
428
|
-
for (const finding of result.plainLanguageFindings) {
|
|
429
|
-
md += `### [${finding.severity.toUpperCase()}] ${finding.ruleId}: ${finding.title}\n`;
|
|
430
|
-
md += `- **What is wrong:** ${finding.whatIsWrong}\n`;
|
|
431
|
-
md += `- **Why it matters:** ${finding.whyItMatters}\n`;
|
|
432
|
-
md += `- **Next action:** ${finding.nextAction}\n\n`;
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
md += `## Prioritized Task List\n\n`;
|
|
436
|
-
if (result.tasks.length === 0) {
|
|
437
|
-
md += `No remediation tasks generated.\n\n`;
|
|
438
|
-
}
|
|
439
|
-
else {
|
|
440
|
-
for (const task of result.tasks) {
|
|
441
|
-
md += `- **${task.priority}** | Owner: ${task.owner.toUpperCase()} | Effort: ${task.effort} | ${task.ruleId}\n`;
|
|
442
|
-
md += ` - Task: ${task.task}\n`;
|
|
443
|
-
md += ` - Done when: ${task.doneWhen}\n`;
|
|
444
|
-
}
|
|
445
|
-
md += `\n`;
|
|
446
|
-
}
|
|
447
|
-
md += `## AI-Fixable Now (P0/P1)\n\n`;
|
|
448
|
-
if (result.aiFixableNow.length === 0) {
|
|
449
|
-
md += `No AI-fixable P0/P1 items detected in this run.\n`;
|
|
450
|
-
}
|
|
451
|
-
else {
|
|
452
|
-
for (const task of result.aiFixableNow) {
|
|
453
|
-
md += `- **${task.priority} ${task.ruleId}** ${task.task}\n`;
|
|
454
|
-
}
|
|
455
|
-
}
|
|
456
|
-
return {
|
|
457
|
-
content: [{ type: "text", text: md }],
|
|
458
|
-
};
|
|
459
|
-
}
|
|
460
|
-
catch (error) {
|
|
461
|
-
return {
|
|
462
|
-
content: [
|
|
463
|
-
{
|
|
464
|
-
type: "text",
|
|
465
|
-
text: error instanceof Error
|
|
466
|
-
? `Error: ${error.message}`
|
|
467
|
-
: "Error: Failed to run app builder workflow",
|
|
468
|
-
},
|
|
469
|
-
],
|
|
470
|
-
isError: true,
|
|
471
|
-
};
|
|
472
|
-
}
|
|
473
|
-
});
|
|
474
|
-
// ─── Tool: evaluate_code ─────────────────────────────────────────────────────
|
|
475
|
-
server.tool("evaluate_code", `Submit code to the full Judges Panel for evaluation. All ${JUDGES.length} judges will independently review the code using both automated pattern detection and deep contextual analysis criteria. Returns a combined verdict with scores, findings, and expert review guidance for thorough evaluation.`, {
|
|
476
|
-
code: z
|
|
477
|
-
.string()
|
|
478
|
-
.describe("The source code to evaluate. Include the full file content for best results."),
|
|
479
|
-
language: z
|
|
480
|
-
.string()
|
|
481
|
-
.describe("The programming language of the code (e.g., 'typescript', 'python', 'javascript', 'csharp', 'java')."),
|
|
482
|
-
context: z
|
|
483
|
-
.string()
|
|
484
|
-
.optional()
|
|
485
|
-
.describe("Optional additional context about the code — e.g., what the code does, which framework it uses, or the deployment target."),
|
|
486
|
-
includeAstFindings: z
|
|
487
|
-
.boolean()
|
|
488
|
-
.optional()
|
|
489
|
-
.describe("Include AST/code-structure findings (default: true)"),
|
|
490
|
-
minConfidence: z
|
|
491
|
-
.number()
|
|
492
|
-
.min(0)
|
|
493
|
-
.max(1)
|
|
494
|
-
.optional()
|
|
495
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
496
|
-
}, async ({ code, language, context, includeAstFindings, minConfidence }) => {
|
|
497
|
-
const verdict = evaluateWithTribunal(code, language, context, {
|
|
498
|
-
includeAstFindings,
|
|
499
|
-
minConfidence,
|
|
500
|
-
});
|
|
501
|
-
const patternResults = formatVerdictAsMarkdown(verdict);
|
|
502
|
-
const deepReview = buildTribunalDeepReviewSection(JUDGES, language, context);
|
|
503
|
-
return {
|
|
504
|
-
content: [
|
|
505
|
-
{
|
|
506
|
-
type: "text",
|
|
507
|
-
text: patternResults + deepReview,
|
|
508
|
-
},
|
|
509
|
-
],
|
|
510
|
-
};
|
|
511
|
-
});
|
|
512
|
-
// ─── Tool: evaluate_code_single_judge ────────────────────────────────────────
|
|
513
|
-
const judgeIds = JUDGES.map((j) => j.id);
|
|
514
|
-
server.tool("evaluate_code_single_judge", `Submit code to a specific judge on the Judges Panel. Use get_judges to see available judges. Available judge IDs: ${judgeIds.join(", ")}`, {
|
|
515
|
-
code: z
|
|
516
|
-
.string()
|
|
517
|
-
.describe("The source code to evaluate. Include the full file content for best results."),
|
|
518
|
-
language: z
|
|
519
|
-
.string()
|
|
520
|
-
.describe("The programming language of the code (e.g., 'typescript', 'python', 'javascript', 'csharp', 'java')."),
|
|
521
|
-
judgeId: z
|
|
522
|
-
.string()
|
|
523
|
-
.describe(`The ID of the judge to use. One of: ${judgeIds.join(", ")}`),
|
|
524
|
-
context: z
|
|
525
|
-
.string()
|
|
526
|
-
.optional()
|
|
527
|
-
.describe("Optional additional context about the code — e.g., what the code does, which framework it uses, or the deployment target."),
|
|
528
|
-
minConfidence: z
|
|
529
|
-
.number()
|
|
530
|
-
.min(0)
|
|
531
|
-
.max(1)
|
|
532
|
-
.optional()
|
|
533
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
534
|
-
}, async ({ code, language, judgeId, context, minConfidence }) => {
|
|
535
|
-
const judge = getJudge(judgeId);
|
|
536
|
-
if (!judge) {
|
|
537
|
-
return {
|
|
538
|
-
content: [
|
|
539
|
-
{
|
|
540
|
-
type: "text",
|
|
541
|
-
text: `Error: Unknown judge ID "${judgeId}". Available judges: ${judgeIds.join(", ")}`,
|
|
542
|
-
},
|
|
543
|
-
],
|
|
544
|
-
isError: true,
|
|
545
|
-
};
|
|
546
|
-
}
|
|
547
|
-
const evaluation = evaluateWithJudge(judge, code, language, context, {
|
|
548
|
-
minConfidence,
|
|
549
|
-
});
|
|
550
|
-
const patternResults = formatEvaluationAsMarkdown(evaluation);
|
|
551
|
-
const deepReview = buildSingleJudgeDeepReviewSection(judge, language, context);
|
|
552
|
-
return {
|
|
553
|
-
content: [
|
|
554
|
-
{
|
|
555
|
-
type: "text",
|
|
556
|
-
text: patternResults + deepReview,
|
|
557
|
-
},
|
|
558
|
-
],
|
|
559
|
-
};
|
|
560
|
-
});
|
|
561
|
-
// ─── Tool: evaluate_project ──────────────────────────────────────────────────
|
|
562
|
-
server.tool("evaluate_project", `Submit multiple files for project-level analysis. All ${JUDGES.length} judges evaluate each file, plus cross-file architectural analysis detects issues like code duplication, inconsistent error handling, and dependency cycles.`, {
|
|
563
|
-
files: z.array(z.object({
|
|
564
|
-
path: z.string().describe("Relative file path"),
|
|
565
|
-
content: z.string().describe("File content"),
|
|
566
|
-
language: z.string().describe("Programming language"),
|
|
567
|
-
})).describe("Array of project files to analyze"),
|
|
568
|
-
context: z
|
|
569
|
-
.string()
|
|
570
|
-
.optional()
|
|
571
|
-
.describe("Optional context about the project"),
|
|
572
|
-
includeAstFindings: z
|
|
573
|
-
.boolean()
|
|
574
|
-
.optional()
|
|
575
|
-
.describe("Include AST/code-structure findings (default: true)"),
|
|
576
|
-
minConfidence: z
|
|
577
|
-
.number()
|
|
578
|
-
.min(0)
|
|
579
|
-
.max(1)
|
|
580
|
-
.optional()
|
|
581
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
582
|
-
}, async ({ files, context, includeAstFindings, minConfidence }) => {
|
|
583
|
-
const result = evaluateProject(files, context, {
|
|
584
|
-
includeAstFindings,
|
|
585
|
-
minConfidence,
|
|
586
|
-
});
|
|
587
|
-
let md = `# Project Analysis\n\n`;
|
|
588
|
-
md += `**Overall:** ${result.overallVerdict.toUpperCase()} (${result.overallScore}/100)\n`;
|
|
589
|
-
md += `**Files:** ${result.fileResults.length} | **Critical:** ${result.criticalCount} | **High:** ${result.highCount}\n\n`;
|
|
590
|
-
for (const fr of result.fileResults) {
|
|
591
|
-
md += `## ${fr.path} (${fr.language}) — ${fr.score}/100\n`;
|
|
592
|
-
if (fr.findings.length === 0) {
|
|
593
|
-
md += `No findings.\n\n`;
|
|
594
|
-
}
|
|
595
|
-
else {
|
|
596
|
-
for (const f of fr.findings.slice(0, 10)) {
|
|
597
|
-
md += `- **[${f.severity.toUpperCase()}]** ${f.ruleId}: ${f.title}\n`;
|
|
598
|
-
}
|
|
599
|
-
if (fr.findings.length > 10) {
|
|
600
|
-
md += `- ... and ${fr.findings.length - 10} more\n`;
|
|
601
|
-
}
|
|
602
|
-
md += `\n`;
|
|
603
|
-
}
|
|
604
|
-
}
|
|
605
|
-
if (result.architecturalFindings.length > 0) {
|
|
606
|
-
md += `## Architectural Findings\n\n`;
|
|
607
|
-
for (const f of result.architecturalFindings) {
|
|
608
|
-
md += `- **[${f.severity.toUpperCase()}]** ${f.ruleId}: ${f.title}\n ${f.description}\n`;
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
return {
|
|
612
|
-
content: [{ type: "text", text: md }],
|
|
613
|
-
};
|
|
614
|
-
});
|
|
615
|
-
// ─── Tool: evaluate_diff ─────────────────────────────────────────────────────
|
|
616
|
-
server.tool("evaluate_diff", `Evaluate only the changed lines in a code diff. Runs all ${JUDGES.length} judges on the full file but filters findings to only those affecting the specified changed lines. Ideal for PR reviews and incremental analysis.`, {
|
|
617
|
-
code: z
|
|
618
|
-
.string()
|
|
619
|
-
.describe("The full file content (post-change)"),
|
|
620
|
-
language: z
|
|
621
|
-
.string()
|
|
622
|
-
.describe("The programming language"),
|
|
623
|
-
changedLines: z
|
|
624
|
-
.array(z.number())
|
|
625
|
-
.describe("Array of 1-based line numbers that were changed (added or modified)"),
|
|
626
|
-
context: z
|
|
627
|
-
.string()
|
|
628
|
-
.optional()
|
|
629
|
-
.describe("Optional context about the change"),
|
|
630
|
-
includeAstFindings: z
|
|
631
|
-
.boolean()
|
|
632
|
-
.optional()
|
|
633
|
-
.describe("Include AST/code-structure findings (default: true)"),
|
|
634
|
-
minConfidence: z
|
|
635
|
-
.number()
|
|
636
|
-
.min(0)
|
|
637
|
-
.max(1)
|
|
638
|
-
.optional()
|
|
639
|
-
.describe("Minimum finding confidence to include (0-1, default: 0)"),
|
|
640
|
-
}, async ({ code, language, changedLines, context, includeAstFindings, minConfidence }) => {
|
|
641
|
-
const result = evaluateDiff(code, language, changedLines, context, {
|
|
642
|
-
includeAstFindings,
|
|
643
|
-
minConfidence,
|
|
644
|
-
});
|
|
645
|
-
let md = `# Diff Analysis\n\n`;
|
|
646
|
-
md += `**Verdict:** ${result.verdict.toUpperCase()} (${result.score}/100)\n`;
|
|
647
|
-
md += `**Changed lines analyzed:** ${result.linesAnalyzed}\n`;
|
|
648
|
-
md += `**Findings in changed code:** ${result.findings.length}\n\n`;
|
|
649
|
-
if (result.findings.length === 0) {
|
|
650
|
-
md += `No issues found in the changed lines.\n`;
|
|
651
|
-
}
|
|
652
|
-
else {
|
|
653
|
-
for (const f of result.findings) {
|
|
654
|
-
md += `### ${f.ruleId}: ${f.title}\n`;
|
|
655
|
-
md += `**Severity:** ${f.severity} | **Lines:** ${f.lineNumbers?.join(", ") ?? "N/A"}\n\n`;
|
|
656
|
-
md += `${f.description}\n\n`;
|
|
657
|
-
md += `**Recommendation:** ${f.recommendation}\n\n`;
|
|
658
|
-
}
|
|
659
|
-
}
|
|
660
|
-
return {
|
|
661
|
-
content: [{ type: "text", text: md }],
|
|
662
|
-
};
|
|
663
|
-
});
|
|
664
|
-
// ─── Tool: analyze_dependencies ──────────────────────────────────────────────
|
|
665
|
-
server.tool("analyze_dependencies", "Analyze a dependency manifest file for supply-chain risks, version pinning issues, typosquatting indicators, and dependency hygiene. Supports package.json, requirements.txt, Cargo.toml, go.mod, pom.xml, and .csproj files.", {
|
|
666
|
-
manifest: z
|
|
667
|
-
.string()
|
|
668
|
-
.describe("The full content of the manifest file"),
|
|
669
|
-
manifestType: z
|
|
670
|
-
.enum([
|
|
671
|
-
"package.json",
|
|
672
|
-
"requirements.txt",
|
|
673
|
-
"Cargo.toml",
|
|
674
|
-
"go.mod",
|
|
675
|
-
"pom.xml",
|
|
676
|
-
"csproj",
|
|
677
|
-
])
|
|
678
|
-
.describe("The type of manifest file"),
|
|
679
|
-
}, async ({ manifest, manifestType }) => {
|
|
680
|
-
const result = analyzeDependencies(manifest, manifestType);
|
|
681
|
-
let md = `# Dependency Analysis (${manifestType})\n\n`;
|
|
682
|
-
md += `**Verdict:** ${result.verdict.toUpperCase()} (${result.score}/100)\n`;
|
|
683
|
-
md += `**Total dependencies:** ${result.totalDependencies}\n`;
|
|
684
|
-
md += `**Findings:** ${result.findings.length}\n\n`;
|
|
685
|
-
if (result.findings.length > 0) {
|
|
686
|
-
for (const f of result.findings) {
|
|
687
|
-
md += `### ${f.ruleId}: ${f.title}\n`;
|
|
688
|
-
md += `**Severity:** ${f.severity}\n\n`;
|
|
689
|
-
md += `${f.description}\n\n`;
|
|
690
|
-
md += `**Recommendation:** ${f.recommendation}\n\n`;
|
|
691
|
-
}
|
|
692
|
-
}
|
|
693
|
-
if (result.dependencies.length > 0) {
|
|
694
|
-
md += `## Dependencies (${result.dependencies.length})\n\n`;
|
|
695
|
-
const prod = result.dependencies.filter((d) => !d.isDev);
|
|
696
|
-
const dev = result.dependencies.filter((d) => d.isDev);
|
|
697
|
-
if (prod.length > 0) {
|
|
698
|
-
md += `**Production (${prod.length}):** ${prod.map((d) => `${d.name}@${d.version}`).join(", ")}\n\n`;
|
|
699
|
-
}
|
|
700
|
-
if (dev.length > 0) {
|
|
701
|
-
md += `**Development (${dev.length}):** ${dev.map((d) => `${d.name}@${d.version}`).join(", ")}\n\n`;
|
|
702
|
-
}
|
|
703
|
-
}
|
|
704
|
-
return {
|
|
705
|
-
content: [{ type: "text", text: md }],
|
|
706
|
-
};
|
|
707
|
-
});
|
|
708
|
-
// ─── Deep Review Builders ────────────────────────────────────────────────────
|
|
709
|
-
// These functions append each judge's expert criteria to the tool output,
|
|
710
|
-
// turning pattern-only results into hybrid evaluations. The calling LLM
|
|
711
|
-
// uses these criteria to perform deep contextual analysis that regex can't.
|
|
712
|
-
// ──────────────────────────────────────────────────────────────────────────────
|
|
713
|
-
function buildSingleJudgeDeepReviewSection(judge, language, context) {
|
|
714
|
-
let md = `\n\n---\n\n`;
|
|
715
|
-
md += `## 🔍 Deep Contextual Review Required\n\n`;
|
|
716
|
-
md += `> **The pattern-based findings above are a starting point only.** `;
|
|
717
|
-
md += `Static pattern matching cannot evaluate semantic correctness, architectural coherence, `;
|
|
718
|
-
md += `business logic validity, cross-module data flow, or context-dependent implications.\n>\n`;
|
|
719
|
-
md += `> **You MUST now perform a thorough contextual review** of the ${language} code using the expert criteria below. `;
|
|
720
|
-
md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified evaluation.\n\n`;
|
|
721
|
-
if (context) {
|
|
722
|
-
md += `**Context provided:** ${context}\n\n`;
|
|
723
|
-
}
|
|
724
|
-
md += `### ${judge.name} — ${judge.domain}\n\n`;
|
|
725
|
-
md += `${judge.systemPrompt}\n\n`;
|
|
726
|
-
md += `### Response Format\n\n`;
|
|
727
|
-
md += `Provide your deep review as additional findings using the same format:\n`;
|
|
728
|
-
md += `- Rule ID prefix: \`${judge.rulePrefix}-\`\n`;
|
|
729
|
-
md += `- Severity levels: critical / high / medium / low / info\n`;
|
|
730
|
-
md += `- Include: title, description, affected lines, recommendation, and reference\n`;
|
|
731
|
-
md += `- After all findings, provide an updated score (0-100) and final verdict (PASS/WARNING/FAIL)\n`;
|
|
732
|
-
md += `- The final verdict must account for BOTH the pattern findings AND your contextual findings\n`;
|
|
733
|
-
return md;
|
|
734
|
-
}
|
|
735
|
-
function buildTribunalDeepReviewSection(judges, language, context) {
|
|
736
|
-
let md = `\n\n---\n\n`;
|
|
737
|
-
md += `## 🔍 Deep Contextual Review Required\n\n`;
|
|
738
|
-
md += `> **The pattern-based tribunal findings above are a starting point only.** `;
|
|
739
|
-
md += `Static pattern matching cannot evaluate semantic correctness, architectural coherence, `;
|
|
740
|
-
md += `business logic validity, cross-module data flow, or context-dependent implications.\n>\n`;
|
|
741
|
-
md += `> **You MUST now perform a thorough contextual review** of the ${language} code from the perspective of ALL ${judges.length} judges below. `;
|
|
742
|
-
md += `Identify issues that pattern matching cannot catch. Incorporate both the pattern findings above AND your own deep analysis into a final, unified tribunal verdict.\n\n`;
|
|
743
|
-
if (context) {
|
|
744
|
-
md += `**Context provided:** ${context}\n\n`;
|
|
745
|
-
}
|
|
746
|
-
for (const judge of judges) {
|
|
747
|
-
md += `### ${judge.name} — ${judge.domain}\n\n`;
|
|
748
|
-
md += `${judge.systemPrompt}\n\n`;
|
|
749
|
-
md += `---\n\n`;
|
|
750
|
-
}
|
|
751
|
-
md += `### Response Format\n\n`;
|
|
752
|
-
md += `For each judge, provide any additional findings your contextual analysis uncovers using:\n`;
|
|
753
|
-
md += `- The judge's rule ID prefix\n`;
|
|
754
|
-
md += `- Severity levels: critical / high / medium / low / info\n`;
|
|
755
|
-
md += `- Include: title, description, affected lines, recommendation, and reference\n\n`;
|
|
756
|
-
md += `Then provide an **OVERALL UPDATED TRIBUNAL VERDICT** that accounts for BOTH the pattern findings AND your contextual findings:\n`;
|
|
757
|
-
md += `- Per-judge scores (0-100) and verdicts\n`;
|
|
758
|
-
md += `- Overall score and verdict (PASS/WARNING/FAIL)\n`;
|
|
759
|
-
md += `- Executive summary of the most critical issues\n`;
|
|
760
|
-
return md;
|
|
761
|
-
}
|
|
762
|
-
// ─── Prompts ─────────────────────────────────────────────────────────────────
|
|
763
|
-
// Expose the judges' system prompts as MCP prompts so that an LLM-based
|
|
764
|
-
// client can use them for deeper, AI-powered analysis beyond pattern matching.
|
|
765
|
-
for (const judge of JUDGES) {
|
|
766
|
-
server.prompt(`judge-${judge.id}`, `Use the ${judge.name} persona to perform a deep ${judge.domain} review of code. This prompt provides the judge's expert criteria for LLM-powered analysis that goes beyond pattern matching.`, {
|
|
767
|
-
code: z
|
|
768
|
-
.string()
|
|
769
|
-
.describe("The source code to evaluate"),
|
|
770
|
-
language: z
|
|
771
|
-
.string()
|
|
772
|
-
.describe("The programming language"),
|
|
773
|
-
context: z
|
|
774
|
-
.string()
|
|
775
|
-
.optional()
|
|
776
|
-
.describe("Additional context about the code"),
|
|
777
|
-
}, async ({ code, language, context }) => {
|
|
778
|
-
const userMessage = `Please evaluate the following ${language} code:\n\n\`\`\`${language}\n${code}\n\`\`\`` +
|
|
779
|
-
(context ? `\n\nAdditional context: ${context}` : "") +
|
|
780
|
-
`\n\nProvide your evaluation as structured findings with rule IDs (prefix: ${judge.rulePrefix}-), severity levels (critical/high/medium/low/info), descriptions, and actionable recommendations. End with an overall score (0-100) and verdict (pass/warning/fail).`;
|
|
781
|
-
return {
|
|
782
|
-
messages: [
|
|
783
|
-
{
|
|
784
|
-
role: "user",
|
|
785
|
-
content: {
|
|
786
|
-
type: "text",
|
|
787
|
-
text: userMessage,
|
|
788
|
-
},
|
|
789
|
-
},
|
|
790
|
-
],
|
|
791
|
-
};
|
|
792
|
-
});
|
|
793
|
-
}
|
|
794
|
-
// Full tribunal prompt
|
|
795
|
-
server.prompt("full-tribunal", `Convene the full Judges Panel — all ${JUDGES.length} judges evaluate the code in their respective domains and produce a combined verdict.`, {
|
|
796
|
-
code: z
|
|
797
|
-
.string()
|
|
798
|
-
.describe("The source code to evaluate"),
|
|
799
|
-
language: z
|
|
800
|
-
.string()
|
|
801
|
-
.describe("The programming language"),
|
|
802
|
-
context: z
|
|
803
|
-
.string()
|
|
804
|
-
.optional()
|
|
805
|
-
.describe("Additional context about the code"),
|
|
806
|
-
}, async ({ code, language, context }) => {
|
|
807
|
-
const judgeInstructions = JUDGES.map((j) => `### ${j.name} — ${j.domain}\n${j.systemPrompt}`).join("\n\n---\n\n");
|
|
808
|
-
const userMessage = `You are the Judges Panel — a panel of ${JUDGES.length} expert judges who independently evaluate code for quality, security, and operational readiness.\n\n` +
|
|
809
|
-
`Evaluate the following ${language} code from the perspective of ALL ${JUDGES.length} judges below. For each judge, provide:\n` +
|
|
810
|
-
`1. Judge name and domain\n` +
|
|
811
|
-
`2. Verdict (PASS / WARNING / FAIL)\n` +
|
|
812
|
-
`3. Score (0-100)\n` +
|
|
813
|
-
`4. Specific findings with rule IDs, severity, and recommendations\n\n` +
|
|
814
|
-
`Then provide an OVERALL TRIBUNAL VERDICT that synthesizes all judges' input.\n\n` +
|
|
815
|
-
`## The Judges\n\n${judgeInstructions}\n\n` +
|
|
816
|
-
`## Code to Evaluate\n\n\`\`\`${language}\n${code}\n\`\`\`` +
|
|
817
|
-
(context ? `\n\n## Additional Context\n${context}` : "");
|
|
818
|
-
return {
|
|
819
|
-
messages: [
|
|
820
|
-
{
|
|
821
|
-
role: "user",
|
|
822
|
-
content: {
|
|
823
|
-
type: "text",
|
|
824
|
-
text: userMessage,
|
|
825
|
-
},
|
|
826
|
-
},
|
|
827
|
-
],
|
|
828
|
-
};
|
|
829
|
-
});
|
|
30
|
+
// ─── Register Tools & Prompts ────────────────────────────────────────────────
|
|
31
|
+
registerTools(server);
|
|
32
|
+
registerPrompts(server);
|
|
830
33
|
// ─── Start Server ────────────────────────────────────────────────────────────
|
|
831
34
|
async function main() {
|
|
832
35
|
const transport = new StdioServerTransport();
|