@agentsid/scanner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +205 -0
  2. package/action/action.yml +42 -0
  3. package/action/index.mjs +179 -0
  4. package/docs/state-of-agent-security-2026.md +377 -0
  5. package/examples/security-scan.yml +57 -0
  6. package/package.json +37 -0
  7. package/reports/aashari-mcp-server-atlassian-confluence.json +110 -0
  8. package/reports/aashari-mcp-server-atlassian-jira.json +138 -0
  9. package/reports/aashari-mcp-server-aws-sso.json +122 -0
  10. package/reports/agentdeskai-browser-tools-mcp.json +361 -0
  11. package/reports/ahmetkca-mcp-server-postgres.json +43 -0
  12. package/reports/aiondadotcom-mcp-ssh.json +166 -0
  13. package/reports/apify-actors-mcp-server.json +43 -0
  14. package/reports/azure-mcp.json +43 -0
  15. package/reports/boilerplate-mcp-tool.json +43 -0
  16. package/reports/browserstack-mcp-server.json +43 -0
  17. package/reports/canvas-mcp-server.json +43 -0
  18. package/reports/canvas-mcp-tool.json +43 -0
  19. package/reports/chrome-devtools-mcp.json +300 -0
  20. package/reports/chrome-local-mcp.json +222 -0
  21. package/reports/claude-flow-mcp.json +43 -0
  22. package/reports/cloudflare-mcp-server.json +43 -0
  23. package/reports/code-canvas-server.json +43 -0
  24. package/reports/cognitionai-metabase-mcp-server.json +43 -0
  25. package/reports/composio-mcp.json +43 -0
  26. package/reports/contentful-mcp-server.json +43 -0
  27. package/reports/dbhub.json +43 -0
  28. package/reports/desktop-commander.json +43 -0
  29. package/reports/dynatrace-oss-dynatrace-mcp-server.json +43 -0
  30. package/reports/e2b-mcp-server.json +67 -0
  31. package/reports/eslint-mcp.json +51 -0
  32. package/reports/european-parliament-mcp-server.json +1467 -0
  33. package/reports/exa-mcp-server.json +74 -0
  34. package/reports/executeautomation-playwright-mcp-server.json +418 -0
  35. package/reports/fast-kit-spec-kit.json +43 -0
  36. package/reports/felores-airtable-mcp-server.json +43 -0
  37. package/reports/figma-mcp.json +103 -0
  38. package/reports/forestadmin-mcp-server.json +43 -0
  39. package/reports/fullrun-mcp.json +43 -0
  40. package/reports/gemini-mcp-tool.json +43 -0
  41. package/reports/gitlab-mcp-agent-server.json +186 -0
  42. package/reports/grackle-ai-mcp.json +43 -0
  43. package/reports/heroku-mcp-server.json +333 -0
  44. package/reports/hisma-server-puppeteer.json +93 -0
  45. package/reports/hubspot-mcp-server.json +43 -0
  46. package/reports/hyper-mcp-shell.json +59 -0
  47. package/reports/iflow-mcp-server-github.json +327 -0
  48. package/reports/jpisnice-shadcn-ui-mcp-server.json +149 -0
  49. package/reports/jsonresume-mcp.json +43 -0
  50. package/reports/mapbox-mcp-server.json +43 -0
  51. package/reports/mcp-framework.json +43 -0
  52. package/reports/mcp-from-openapi.json +43 -0
  53. package/reports/mcp-handler.json +43 -0
  54. package/reports/mcp-proxy.json +43 -0
  55. package/reports/mcp-server-docker.json +59 -0
  56. package/reports/mcp-server-github-gist.json +108 -0
  57. package/reports/mcp-server-google-calendar.json +43 -0
  58. package/reports/mcp-server-jira-cloud.json +43 -0
  59. package/reports/mcp-server-kubernetes.json +43 -0
  60. package/reports/mcp-server-slack.json +411 -0
  61. package/reports/mcp-server-sqlite-npx.json +43 -0
  62. package/reports/mcp-server.json +43 -0
  63. package/reports/mcp-starter.json +59 -0
  64. package/reports/mcp-tool-lint.json +43 -0
  65. package/reports/mcporter.json +43 -0
  66. package/reports/mcptoolshop-mcp-tool-registry.json +43 -0
  67. package/reports/microsoft-devbox-mcp.json +43 -0
  68. package/reports/mobilenext-mobile-mcp.json +214 -0
  69. package/reports/modelcontextprotocol-server-brave-search.json +43 -0
  70. package/reports/modelcontextprotocol-server-everything.json +165 -0
  71. package/reports/modelcontextprotocol-server-fetch.json +43 -0
  72. package/reports/modelcontextprotocol-server-filesystem.json +259 -0
  73. package/reports/modelcontextprotocol-server-github.json +391 -0
  74. package/reports/modelcontextprotocol-server-memory.json +117 -0
  75. package/reports/modelcontextprotocol-server-postgres.json +43 -0
  76. package/reports/modelcontextprotocol-server-puppeteer.json +101 -0
  77. package/reports/modelcontextprotocol-server-sequential-thinking.json +67 -0
  78. package/reports/mongodb-mcp-server.json +43 -0
  79. package/reports/mseep-linear-mcp-server.json +43 -0
  80. package/reports/mseep-mcp-server-sqlite-npx.json +43 -0
  81. package/reports/n8n-mcp.json +123 -0
  82. package/reports/notepost-mcp.json +43 -0
  83. package/reports/notionhq-notion-mcp-server.json +220 -0
  84. package/reports/nx-mcp.json +59 -0
  85. package/reports/obsidian-mcp-server.json +43 -0
  86. package/reports/opengraph-io-mcp.json +130 -0
  87. package/reports/payloadcms-plugin-mcp.json +43 -0
  88. package/reports/peac-mappings-mcp.json +43 -0
  89. package/reports/playwright-mcp.json +236 -0
  90. package/reports/puppeteer-mcp-server.json +43 -0
  91. package/reports/railway-mcp-server.json +194 -0
  92. package/reports/razorpay-blade-mcp.json +182 -0
  93. package/reports/rekog-mcp-nest.json +43 -0
  94. package/reports/remotion-mcp.json +51 -0
  95. package/reports/rollbar-mcp-server.json +43 -0
  96. package/reports/sap-ux-fiori-mcp-server.json +80 -0
  97. package/reports/sentry-mcp-server.json +43 -0
  98. package/reports/server-filesystem.json +43 -0
  99. package/reports/server-memory.json +43 -0
  100. package/reports/shortcut-mcp.json +43 -0
  101. package/reports/supabase-mcp-server-supabase.json +43 -0
  102. package/reports/tavily-mcp.json +79 -0
  103. package/reports/thelord-mcp-server-docker-npx.json +43 -0
  104. package/reports/tyk-technologies-api-to-mcp.json +43 -0
  105. package/reports/tyk-technologies-tyk-dashboard-mcp.json +43 -0
  106. package/reports/ui5-mcp-server.json +157 -0
  107. package/reports/upstash-context7-mcp.json +82 -0
  108. package/reports/vantasdk-vanta-mcp-server.json +43 -0
  109. package/reports/winor30-mcp-server-datadog.json +43 -0
  110. package/reports/wonderwhy-er-desktop-commander.json +43 -0
  111. package/reports/xzxzzx-bilibili-mcp.json +58 -0
  112. package/src/grader.mjs +66 -0
  113. package/src/index.mjs +108 -0
  114. package/src/reporter.mjs +158 -0
  115. package/src/rules.mjs +363 -0
  116. package/src/scanner.mjs +208 -0
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Report generator — formats scan results into human-readable output.
3
+ * Supports terminal (ANSI colors) and JSON output.
4
+ */
5
+
6
+ const COLORS = {
7
+ reset: "\x1b[0m",
8
+ bold: "\x1b[1m",
9
+ dim: "\x1b[2m",
10
+ red: "\x1b[31m",
11
+ green: "\x1b[32m",
12
+ yellow: "\x1b[33m",
13
+ blue: "\x1b[34m",
14
+ magenta: "\x1b[35m",
15
+ cyan: "\x1b[36m",
16
+ white: "\x1b[37m",
17
+ bgRed: "\x1b[41m",
18
+ bgGreen: "\x1b[42m",
19
+ bgYellow: "\x1b[43m",
20
+ };
21
+
22
+ const SEVERITY_COLORS = {
23
+ CRITICAL: COLORS.bgRed + COLORS.white,
24
+ HIGH: COLORS.red,
25
+ MEDIUM: COLORS.yellow,
26
+ LOW: COLORS.cyan,
27
+ INFO: COLORS.dim,
28
+ };
29
+
30
+ const GRADE_COLORS = {
31
+ A: COLORS.green,
32
+ B: COLORS.green,
33
+ C: COLORS.yellow,
34
+ D: COLORS.red,
35
+ F: COLORS.bgRed + COLORS.white,
36
+ };
37
+
38
+ function severityBadge(sev) {
39
+ return `${SEVERITY_COLORS[sev] || ""}${sev}${COLORS.reset}`;
40
+ }
41
+
42
+ function gradeBadge(letter, score) {
43
+ return `${GRADE_COLORS[letter] || ""}${COLORS.bold}${letter} (${score}/100)${COLORS.reset}`;
44
+ }
45
+
46
+ export function formatTerminalReport(serverInfo, tools, findings, gradeResult, riskProfile) {
47
+ const lines = [];
48
+ const { bold, dim, reset, cyan, yellow, green, red } = COLORS;
49
+
50
+ // Header
51
+ lines.push("");
52
+ lines.push(`${bold}╔══════════════════════════════════════════════════════════════╗${reset}`);
53
+ lines.push(`${bold}║ AgentsID Security Scanner — Report ║${reset}`);
54
+ lines.push(`${bold}╚══════════════════════════════════════════════════════════════╝${reset}`);
55
+ lines.push("");
56
+
57
+ // Server info
58
+ lines.push(`${bold}Server:${reset} ${serverInfo.name || "unknown"} v${serverInfo.version || "?"}`);
59
+ lines.push(`${bold}Tools:${reset} ${tools.length}`);
60
+ lines.push(`${bold}Scanned:${reset} ${new Date().toISOString()}`);
61
+ lines.push("");
62
+
63
+ // Overall grade
64
+ lines.push(`${bold}Overall Grade: ${gradeBadge(gradeResult.letter, gradeResult.score)}${reset}`);
65
+ lines.push("");
66
+
67
+ // Category grades
68
+ lines.push(`${bold}Category Grades:${reset}`);
69
+ for (const [cat, letter] of Object.entries(gradeResult.categoryGrades)) {
70
+ const color = GRADE_COLORS[letter] || "";
71
+ lines.push(` ${cat.padEnd(15)} ${color}${letter}${reset}`);
72
+ }
73
+ lines.push("");
74
+
75
+ // Risk profile
76
+ if (riskProfile) {
77
+ lines.push(`${bold}Tool Risk Profile:${reset}`);
78
+ const riskEntries = Object.entries(riskProfile).filter(([, v]) => v > 0).sort((a, b) => b[1] - a[1]);
79
+ for (const [risk, count] of riskEntries) {
80
+ const bar = "█".repeat(Math.min(count, 30));
81
+ const riskColor = ["destructive", "execution", "privilege", "financial"].includes(risk) ? red : yellow;
82
+ lines.push(` ${risk.padEnd(20)} ${riskColor}${bar}${reset} ${count}`);
83
+ }
84
+ if (riskEntries.length === 0) lines.push(` ${dim}(no high-risk tools detected)${reset}`);
85
+ lines.push("");
86
+ }
87
+
88
+ // Finding summary
89
+ lines.push(`${bold}Findings: ${gradeResult.totalFindings}${reset}`);
90
+ if (gradeResult.critical > 0) lines.push(` ${SEVERITY_COLORS.CRITICAL} CRITICAL: ${gradeResult.critical} ${reset}`);
91
+ if (gradeResult.high > 0) lines.push(` ${SEVERITY_COLORS.HIGH}HIGH: ${gradeResult.counts.HIGH}${reset}`);
92
+ if (gradeResult.counts.MEDIUM > 0) lines.push(` ${SEVERITY_COLORS.MEDIUM}MEDIUM: ${gradeResult.counts.MEDIUM}${reset}`);
93
+ if (gradeResult.counts.LOW > 0) lines.push(` ${SEVERITY_COLORS.LOW}LOW: ${gradeResult.counts.LOW}${reset}`);
94
+ lines.push("");
95
+
96
+ // Detailed findings
97
+ if (findings.length > 0) {
98
+ lines.push(`${bold}Detailed Findings:${reset}`);
99
+ lines.push(`${"─".repeat(62)}`);
100
+
101
+ // Group by category
102
+ const grouped = {};
103
+ for (const f of findings) {
104
+ const cat = f.category || "other";
105
+ if (!grouped[cat]) grouped[cat] = [];
106
+ grouped[cat].push(f);
107
+ }
108
+
109
+ for (const [cat, catFindings] of Object.entries(grouped).sort()) {
110
+ lines.push(`\n${bold}${cyan}[${cat.toUpperCase()}]${reset}`);
111
+ for (const f of catFindings) {
112
+ lines.push(` ${severityBadge(f.severity)} ${f.detail}`);
113
+ if (f.tool && f.tool !== "*") lines.push(` ${dim}Tool: ${f.tool}${reset}`);
114
+ if (f.evidence) lines.push(` ${dim}Evidence: ${f.evidence.substring(0, 100)}...${reset}`);
115
+ }
116
+ }
117
+ lines.push("");
118
+ }
119
+
120
+ // Recommendations
121
+ lines.push(`${bold}Recommendations:${reset}`);
122
+ if (gradeResult.critical > 0 || gradeResult.high > 0) {
123
+ lines.push(` ${red}1. Address CRITICAL and HIGH findings immediately${reset}`);
124
+ lines.push(` ${yellow}2. Add per-tool permission controls (agentsid.dev/docs)${reset}`);
125
+ lines.push(` 3. Implement input validation on all tool parameters`);
126
+ lines.push(` 4. Add authentication to server endpoints`);
127
+ } else if (gradeResult.counts.MEDIUM > 0) {
128
+ lines.push(` ${green}Good security posture.${reset} Address MEDIUM findings for improvement:`);
129
+ lines.push(` 1. Tighten input validation schemas`);
130
+ lines.push(` 2. Consider per-agent tool scoping`);
131
+ } else {
132
+ lines.push(` ${green}Excellent security posture.${reset} No significant issues found.`);
133
+ }
134
+
135
+ lines.push("");
136
+ lines.push(`${dim}Scan powered by AgentsID — agentsid.dev/scanner${reset}`);
137
+ lines.push(`${dim}Protect this server with per-agent permissions: npx @agentsid/guard${reset}`);
138
+ lines.push("");
139
+
140
+ return lines.join("\n");
141
+ }
142
+
143
+ export function formatJsonReport(serverInfo, tools, findings, gradeResult, riskProfile) {
144
+ return JSON.stringify({
145
+ scanner: { name: "agentsid-scanner", version: "0.1.0" },
146
+ scannedAt: new Date().toISOString(),
147
+ server: serverInfo,
148
+ toolCount: tools.length,
149
+ grade: {
150
+ overall: gradeResult.letter,
151
+ score: gradeResult.score,
152
+ categories: gradeResult.categoryGrades,
153
+ },
154
+ summary: gradeResult.counts,
155
+ riskProfile,
156
+ findings,
157
+ }, null, 2);
158
+ }
package/src/rules.mjs ADDED
@@ -0,0 +1,363 @@
1
+ /**
2
+ * Security scanning rules engine.
3
+ *
4
+ * Each rule is a function that receives scan context and returns findings.
5
+ * Rules are organized by category: auth, permissions, injection, validation,
6
+ * secrets, output, transport, configuration.
7
+ *
8
+ * Severity levels: CRITICAL, HIGH, MEDIUM, LOW, INFO
9
+ */
10
+
11
+ // ─── Tool Description Injection Patterns ───
12
+ // These patterns in tool descriptions can manipulate LLM behavior.
13
+
14
+ const INJECTION_PATTERNS = [
15
+ { pattern: /ignore\s+(previous|prior|above|all)\s+(instructions?|prompts?|rules?)/i, name: "instruction_override", severity: "CRITICAL" },
16
+ { pattern: /disregard\s+(previous|prior|above|all)/i, name: "instruction_override", severity: "CRITICAL" },
17
+ { pattern: /you\s+are\s+now\s+a/i, name: "role_hijack", severity: "CRITICAL" },
18
+ { pattern: /forget\s+(everything|all|previous)/i, name: "memory_wipe", severity: "CRITICAL" },
19
+ { pattern: /instead\s+of\s+\w+,?\s*(call|use|execute|run)/i, name: "tool_redirect", severity: "HIGH" },
20
+ { pattern: /also\s+(call|execute|run|invoke)\s+/i, name: "hidden_action", severity: "HIGH" },
21
+ { pattern: /do\s+not\s+(tell|show|reveal|mention)\s+(the\s+)?user/i, name: "concealment", severity: "HIGH" },
22
+ { pattern: /secretly|covertly|without\s+(the\s+)?user\s+knowing/i, name: "stealth_action", severity: "HIGH" },
23
+ { pattern: /override|bypass|skip|ignore\s+(auth|security|permission|validation)/i, name: "security_bypass", severity: "CRITICAL" },
24
+ { pattern: /base64|atob|btoa|eval\s*\(/i, name: "encoded_payload", severity: "MEDIUM" },
25
+ { pattern: /\{%|<%|<\?|{{.*}}/i, name: "template_injection", severity: "MEDIUM" },
26
+ { pattern: /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}/i, name: "unicode_obfuscation", severity: "MEDIUM" },
27
+ ];
28
+
29
+ // ─── Dangerous Tool Name Patterns ───
30
+ // Tools with these names are high-risk without permission controls.
31
+
32
+ const DANGEROUS_TOOL_PATTERNS = [
33
+ { pattern: /^(delete|remove|drop|truncate|purge|destroy|wipe|erase)/i, risk: "destructive", severity: "HIGH" },
34
+ { pattern: /(delete|remove|drop|truncate|purge|destroy|wipe|erase)$/i, risk: "destructive", severity: "HIGH" },
35
+ { pattern: /^(execute|exec|run|eval|shell|cmd|command|bash|terminal)/i, risk: "execution", severity: "CRITICAL" },
36
+ { pattern: /^(deploy|publish|release|push|ship)/i, risk: "deployment", severity: "HIGH" },
37
+ { pattern: /^(admin|root|sudo|superuser|elevate)/i, risk: "privilege", severity: "CRITICAL" },
38
+ { pattern: /(password|secret|key|token|credential|auth)/i, risk: "credential_access", severity: "HIGH" },
39
+ { pattern: /^(send|email|message|notify|post|tweet|slack)/i, risk: "external_action", severity: "MEDIUM" },
40
+ { pattern: /(payment|charge|bill|invoice|transfer|withdraw)/i, risk: "financial", severity: "CRITICAL" },
41
+ { pattern: /^(create|insert|update|modify|set|write)/i, risk: "mutation", severity: "MEDIUM" },
42
+ { pattern: /^(read|get|list|show|describe|fetch|query|search|find)/i, risk: "read_only", severity: "INFO" },
43
+ ];
44
+
45
+ // ─── Input Validation Checks ───
46
+ // Schema patterns that indicate missing validation.
47
+
48
+ const SCHEMA_WEAKNESS_PATTERNS = [
49
+ { check: (schema) => !schema || Object.keys(schema).length === 0, name: "no_schema", severity: "HIGH", desc: "Tool accepts arbitrary input with no schema validation" },
50
+ { check: (schema) => schema?.type === "object" && (!schema.properties || Object.keys(schema.properties).length === 0), name: "empty_schema", severity: "MEDIUM", desc: "Schema defined but no properties specified" },
51
+ { check: (schema) => schema?.type === "object" && !schema.required?.length, name: "no_required_fields", severity: "LOW", desc: "No required fields — all input is optional" },
52
+ { check: (schema) => {
53
+ const props = schema?.properties || {};
54
+ return Object.values(props).some(p => p.type === "string" && !p.maxLength && !p.pattern && !p.enum);
55
+ }, name: "unbounded_strings", severity: "MEDIUM", desc: "String parameters without length limits or pattern validation" },
56
+ ];
57
+
58
+ // ─── Scan Rules ───
59
+
60
+ export function scanToolDescriptions(tools) {
61
+ const findings = [];
62
+
63
+ for (const tool of tools) {
64
+ const desc = (tool.description || "").toLowerCase();
65
+ const name = tool.name || "";
66
+
67
+ // Check for injection patterns in description
68
+ for (const rule of INJECTION_PATTERNS) {
69
+ if (rule.pattern.test(tool.description || "")) {
70
+ findings.push({
71
+ category: "injection",
72
+ severity: rule.severity,
73
+ tool: name,
74
+ rule: rule.name,
75
+ detail: `Tool description contains potential prompt injection pattern: "${rule.name}"`,
76
+ evidence: (tool.description || "").substring(0, 200),
77
+ });
78
+ }
79
+ }
80
+
81
+ // Check for excessively long descriptions (injection hiding)
82
+ if ((tool.description || "").length > 1000) {
83
+ findings.push({
84
+ category: "injection",
85
+ severity: "MEDIUM",
86
+ tool: name,
87
+ rule: "excessive_description_length",
88
+ detail: `Tool description is ${(tool.description || "").length} chars — unusually long, may contain hidden instructions`,
89
+ });
90
+ }
91
+ }
92
+
93
+ return findings;
94
+ }
95
+
96
+ export function scanToolNames(tools) {
97
+ const findings = [];
98
+ const riskProfile = { read_only: 0, mutation: 0, destructive: 0, execution: 0, privilege: 0, financial: 0 };
99
+
100
+ for (const tool of tools) {
101
+ const name = tool.name || "";
102
+
103
+ for (const rule of DANGEROUS_TOOL_PATTERNS) {
104
+ if (rule.pattern.test(name)) {
105
+ riskProfile[rule.risk] = (riskProfile[rule.risk] || 0) + 1;
106
+
107
+ if (rule.severity !== "INFO") {
108
+ findings.push({
109
+ category: "permissions",
110
+ severity: rule.severity,
111
+ tool: name,
112
+ rule: `dangerous_tool_${rule.risk}`,
113
+ detail: `Tool "${name}" classified as ${rule.risk} — requires permission controls`,
114
+ });
115
+ }
116
+ }
117
+ }
118
+ }
119
+
120
+ return { findings, riskProfile };
121
+ }
122
+
123
+ export function scanInputSchemas(tools) {
124
+ const findings = [];
125
+
126
+ for (const tool of tools) {
127
+ const schema = tool.inputSchema;
128
+ const name = tool.name || "";
129
+
130
+ for (const rule of SCHEMA_WEAKNESS_PATTERNS) {
131
+ if (rule.check(schema)) {
132
+ findings.push({
133
+ category: "validation",
134
+ severity: rule.severity,
135
+ tool: name,
136
+ rule: rule.name,
137
+ detail: `${rule.desc} in tool "${name}"`,
138
+ });
139
+ }
140
+ }
141
+ }
142
+
143
+ return findings;
144
+ }
145
+
146
+ export function scanAuthIndicators(tools, serverInfo) {
147
+ const findings = [];
148
+
149
+ // Check if server name/version suggests auth awareness
150
+ const hasAuthTool = tools.some(t =>
151
+ /auth|login|token|credential|session/i.test(t.name)
152
+ );
153
+
154
+ if (!hasAuthTool) {
155
+ findings.push({
156
+ category: "auth",
157
+ severity: "HIGH",
158
+ tool: "*",
159
+ rule: "no_auth_tools",
160
+ detail: "Server exposes no authentication-related tools — may accept unauthenticated connections",
161
+ });
162
+ }
163
+
164
+ // Check tool count — more tools = higher attack surface
165
+ if (tools.length > 20) {
166
+ findings.push({
167
+ category: "permissions",
168
+ severity: "MEDIUM",
169
+ tool: "*",
170
+ rule: "large_tool_surface",
171
+ detail: `Server exposes ${tools.length} tools — large attack surface without per-tool permission controls`,
172
+ });
173
+ }
174
+
175
+ if (tools.length > 50) {
176
+ findings.push({
177
+ category: "permissions",
178
+ severity: "HIGH",
179
+ tool: "*",
180
+ rule: "excessive_tool_surface",
181
+ detail: `Server exposes ${tools.length} tools — excessive attack surface, strongly recommends per-agent tool scoping`,
182
+ });
183
+ }
184
+
185
+ return findings;
186
+ }
187
+
188
+ export function scanOutputSafety(tools) {
189
+ const findings = [];
190
+
191
+ // Check for tools that might leak data
192
+ for (const tool of tools) {
193
+ const desc = (tool.description || "").toLowerCase();
194
+ const name = tool.name || "";
195
+
196
+ if (/secret|password|credential|key|token/i.test(desc) && /return|output|display|show|get/i.test(desc)) {
197
+ findings.push({
198
+ category: "secrets",
199
+ severity: "HIGH",
200
+ tool: name,
201
+ rule: "potential_secret_exposure",
202
+ detail: `Tool "${name}" may expose secrets in its output based on description`,
203
+ });
204
+ }
205
+
206
+ if (/file|read|cat|content/i.test(name) && !/sanitiz|filter|redact/i.test(desc)) {
207
+ findings.push({
208
+ category: "output",
209
+ severity: "LOW",
210
+ tool: name,
211
+ rule: "unfiltered_file_output",
212
+ detail: `File reading tool "${name}" may output sensitive file contents without filtering`,
213
+ });
214
+ }
215
+ }
216
+
217
+ return findings;
218
+ }
219
+
220
+ // ═══════════════════════════════════════════════════════════════
221
+ // HALLUCINATION-BASED VULNERABILITY SCANNING
222
+ //
223
+ // These detect cases where vague, ambiguous, or misleading tool
224
+ // definitions cause LLMs to over-privilege, misroute, or make
225
+ // unpredictable tool choices.
226
+ //
227
+ // Nobody else scans for these.
228
+ // ═══════════════════════════════════════════════════════════════
229
+
230
+ const VAGUE_ACTION_WORDS = [
231
+ "manage", "handle", "process", "work with", "deal with",
232
+ "interact", "operate on", "perform", "do", "run",
233
+ "access", "use", "control", "modify", "change",
234
+ "update", "affect", "manipulate", "transform",
235
+ ];
236
+
237
+ const SPECIFIC_ACTION_WORDS = [
238
+ "read", "write", "delete", "create", "list", "get", "set",
239
+ "search", "find", "count", "validate", "check", "verify",
240
+ "send", "receive", "upload", "download", "export", "import",
241
+ ];
242
+
243
+ const SENSITIVE_RESOURCES = [
244
+ "file", "database", "user", "account", "payment", "credential",
245
+ "config", "setting", "permission", "role", "secret", "key",
246
+ "server", "cluster", "deployment", "container", "network",
247
+ "email", "message", "notification", "webhook",
248
+ ];
249
+
250
+ export function scanHallucinationRisks(tools) {
251
+ const findings = [];
252
+
253
+ for (const tool of tools) {
254
+ const desc = (tool.description || "");
255
+ const descLower = desc.toLowerCase();
256
+ const name = (tool.name || "");
257
+
258
+ // 1. Vague description causing over-privileging
259
+ const vagueMatches = VAGUE_ACTION_WORDS.filter(w => descLower.includes(w));
260
+ const specificMatches = SPECIFIC_ACTION_WORDS.filter(w => descLower.includes(w));
261
+ const sensitiveMatches = SENSITIVE_RESOURCES.filter(w => descLower.includes(w));
262
+
263
+ if (vagueMatches.length > 0 && specificMatches.length === 0) {
264
+ findings.push({
265
+ category: "hallucination",
266
+ severity: sensitiveMatches.length > 0 ? "HIGH" : "MEDIUM",
267
+ tool: name,
268
+ rule: "vague_description_over_privilege",
269
+ detail: `Tool "${name}" uses vague action words (${vagueMatches.join(", ")}) without specific operations. LLMs will interpret this as the broadest possible action${sensitiveMatches.length > 0 ? ` on sensitive resources (${sensitiveMatches.join(", ")})` : ""}.`,
270
+ });
271
+ }
272
+
273
+ // 2. Ambiguous tool name
274
+ const ambiguousVerbs = ["manage", "handle", "process", "admin", "control", "maintain"];
275
+ const nameVerb = name.split(/[_\-\.]/)[0]?.toLowerCase();
276
+ if (ambiguousVerbs.includes(nameVerb)) {
277
+ findings.push({
278
+ category: "hallucination",
279
+ severity: "HIGH",
280
+ tool: name,
281
+ rule: "ambiguous_tool_name",
282
+ detail: `Tool name "${name}" is ambiguous — "${nameVerb}" could mean read, create, update, or delete. LLM may choose the most destructive interpretation.`,
283
+ });
284
+ }
285
+
286
+ // 3. Missing scope boundaries
287
+ if (sensitiveMatches.length > 0 && !/only|restrict|limit|within|specific|allowed|scoped|bounded/i.test(desc)) {
288
+ if (!/must|should|cannot|must not|only if|requires/i.test(desc)) {
289
+ findings.push({
290
+ category: "hallucination",
291
+ severity: "MEDIUM",
292
+ tool: name,
293
+ rule: "missing_scope_boundary",
294
+ detail: `Tool "${name}" references ${sensitiveMatches.join(", ")} without specifying scope boundaries. LLM will attempt to access the broadest possible scope.`,
295
+ });
296
+ }
297
+ }
298
+
299
+ // 4. Description too short — LLM fills in the gaps
300
+ if (desc.length > 0 && desc.length < 20) {
301
+ findings.push({
302
+ category: "hallucination",
303
+ severity: "MEDIUM",
304
+ tool: name,
305
+ rule: "description_too_short",
306
+ detail: `Tool "${name}" description is only ${desc.length} chars. LLM will hallucinate capabilities based on the name alone.`,
307
+ });
308
+ }
309
+
310
+ // 5. No description at all
311
+ if (!desc || desc.trim().length === 0) {
312
+ findings.push({
313
+ category: "hallucination",
314
+ severity: "HIGH",
315
+ tool: name,
316
+ rule: "no_description",
317
+ detail: `Tool "${name}" has no description. LLM will infer behavior entirely from the name — unpredictable tool usage.`,
318
+ });
319
+ }
320
+
321
+ // 6. Implicit authority escalation
322
+ const innocuousWords = ["helper", "utility", "tool", "assistant", "basic", "simple", "general"];
323
+ const dangerousNameParts = ["admin", "root", "sudo", "deploy", "delete", "drop", "exec", "shell", "kill"];
324
+ const descInnocuous = innocuousWords.some(w => descLower.includes(w));
325
+ const nameDangerous = dangerousNameParts.some(w => name.toLowerCase().includes(w));
326
+
327
+ if (descInnocuous && nameDangerous) {
328
+ findings.push({
329
+ category: "hallucination",
330
+ severity: "CRITICAL",
331
+ tool: name,
332
+ rule: "implicit_authority_escalation",
333
+ detail: `Tool "${name}" has dangerous capabilities but is described as a "${innocuousWords.find(w => descLower.includes(w))}". LLM will underestimate the risk and use it without caution.`,
334
+ });
335
+ }
336
+ }
337
+
338
+ // 7. Conflicting/overlapping tool descriptions
339
+ for (let i = 0; i < tools.length; i++) {
340
+ for (let j = i + 1; j < tools.length; j++) {
341
+ const descA = (tools[i].description || "").toLowerCase();
342
+ const descB = (tools[j].description || "").toLowerCase();
343
+ if (!descA || !descB) continue;
344
+
345
+ const wordsA = new Set(descA.split(/\s+/).filter(w => w.length > 4));
346
+ const wordsB = new Set(descB.split(/\s+/).filter(w => w.length > 4));
347
+ const overlap = [...wordsA].filter(w => wordsB.has(w));
348
+ const overlapRatio = overlap.length / Math.min(wordsA.size, wordsB.size);
349
+
350
+ if (overlapRatio > 0.6 && overlap.length >= 5) {
351
+ findings.push({
352
+ category: "hallucination",
353
+ severity: "MEDIUM",
354
+ tool: `${tools[i].name} + ${tools[j].name}`,
355
+ rule: "conflicting_tool_descriptions",
356
+ detail: `Tools "${tools[i].name}" and "${tools[j].name}" have ${Math.round(overlapRatio * 100)}% description overlap. LLM may choose between them unpredictably.`,
357
+ });
358
+ }
359
+ }
360
+ }
361
+
362
+ return findings;
363
+ }