@bryan-thompson/inspector-assessment 1.34.2 → 1.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/build/assess-full.js +48 -6
- package/cli/build/lib/cli-parser.js +31 -1
- package/cli/build/lib/cli-parserSchemas.js +2 -2
- package/cli/build/lib/jsonl-events.js +12 -0
- package/cli/build/lib/result-output.js +147 -0
- package/cli/package.json +1 -1
- package/client/dist/assets/{OAuthCallback-D6FmzQHu.js → OAuthCallback-DC1cIXHT.js} +1 -1
- package/client/dist/assets/{OAuthDebugCallback-Biy5LlVo.js → OAuthDebugCallback-C3gqJjgQ.js} +1 -1
- package/client/dist/assets/{index-CtFU98YI.js → index-Dn2w887x.js} +5 -4
- package/client/dist/index.html +1 -1
- package/client/lib/lib/assessment/resultTypes.d.ts +42 -0
- package/client/lib/lib/assessment/resultTypes.d.ts.map +1 -1
- package/client/lib/lib/assessment/sharedSchemas.d.ts +13 -0
- package/client/lib/lib/assessment/sharedSchemas.d.ts.map +1 -1
- package/client/lib/lib/assessment/sharedSchemas.js +9 -0
- package/client/lib/lib/assessment/summarizer/AssessmentSummarizer.d.ts +112 -0
- package/client/lib/lib/assessment/summarizer/AssessmentSummarizer.d.ts.map +1 -0
- package/client/lib/lib/assessment/summarizer/AssessmentSummarizer.js +439 -0
- package/client/lib/lib/assessment/summarizer/index.d.ts +15 -0
- package/client/lib/lib/assessment/summarizer/index.d.ts.map +1 -0
- package/client/lib/lib/assessment/summarizer/index.js +15 -0
- package/client/lib/lib/assessment/summarizer/tokenEstimator.d.ts +103 -0
- package/client/lib/lib/assessment/summarizer/tokenEstimator.d.ts.map +1 -0
- package/client/lib/lib/assessment/summarizer/tokenEstimator.js +225 -0
- package/client/lib/lib/assessment/summarizer/types.d.ts +182 -0
- package/client/lib/lib/assessment/summarizer/types.d.ts.map +1 -0
- package/client/lib/lib/assessment/summarizer/types.js +19 -0
- package/client/lib/services/assessment/modules/securityTests/TestValidityAnalyzer.d.ts +28 -0
- package/client/lib/services/assessment/modules/securityTests/TestValidityAnalyzer.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/securityTests/TestValidityAnalyzer.js +180 -0
- package/client/package.json +1 -1
- package/package.json +1 -1
- package/server/package.json +1 -1
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assessment Summarizer
|
|
3
|
+
*
|
|
4
|
+
* Generates tiered output for large assessment results to fit within
|
|
5
|
+
* LLM context windows. Creates executive summaries and per-tool digests.
|
|
6
|
+
*
|
|
7
|
+
* Issue #136: Tiered output strategy for large assessments
|
|
8
|
+
*
|
|
9
|
+
* @module assessment/summarizer/AssessmentSummarizer
|
|
10
|
+
*/
|
|
11
|
+
import type { MCPDirectoryAssessment } from "../resultTypes.js";
|
|
12
|
+
import { type ExecutiveSummary, type ToolSummariesCollection, type SummarizerConfig } from "./types.js";
|
|
13
|
+
/**
|
|
14
|
+
* Generates tiered summaries from assessment results.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const summarizer = new AssessmentSummarizer();
|
|
19
|
+
* const executive = summarizer.generateExecutiveSummary(results);
|
|
20
|
+
* const toolSummaries = summarizer.generateToolSummaries(results);
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
export declare class AssessmentSummarizer {
|
|
24
|
+
private config;
|
|
25
|
+
constructor(config?: SummarizerConfig);
|
|
26
|
+
/**
|
|
27
|
+
* Generate executive summary (Tier 1) from assessment results.
|
|
28
|
+
* Targets ~5K tokens for guaranteed LLM context fit.
|
|
29
|
+
*
|
|
30
|
+
* @param results - Full assessment results
|
|
31
|
+
* @returns Executive summary
|
|
32
|
+
*/
|
|
33
|
+
generateExecutiveSummary(results: MCPDirectoryAssessment): ExecutiveSummary;
|
|
34
|
+
/**
|
|
35
|
+
* Extract per-module status and scores.
|
|
36
|
+
*/
|
|
37
|
+
private extractModulesSummary;
|
|
38
|
+
/**
|
|
39
|
+
* Extract critical findings counts from all modules.
|
|
40
|
+
*/
|
|
41
|
+
private extractCriticalFindings;
|
|
42
|
+
/**
|
|
43
|
+
* Calculate tool risk distribution from security test results.
|
|
44
|
+
*/
|
|
45
|
+
private calculateToolRiskDistribution;
|
|
46
|
+
/**
|
|
47
|
+
* Calculate risk level based on vulnerability count.
|
|
48
|
+
*/
|
|
49
|
+
private calculateToolRiskLevel;
|
|
50
|
+
/**
|
|
51
|
+
* Aggregate recommendations from all modules.
|
|
52
|
+
*/
|
|
53
|
+
private aggregateRecommendations;
|
|
54
|
+
/**
|
|
55
|
+
* Calculate overall score from module scores.
|
|
56
|
+
*/
|
|
57
|
+
private calculateOverallScore;
|
|
58
|
+
/**
|
|
59
|
+
* Generate tool summaries (Tier 2) from assessment results.
|
|
60
|
+
* Targets ~500 tokens per tool for efficient LLM processing.
|
|
61
|
+
*
|
|
62
|
+
* @param results - Full assessment results
|
|
63
|
+
* @returns Collection of tool summaries
|
|
64
|
+
*/
|
|
65
|
+
generateToolSummaries(results: MCPDirectoryAssessment): ToolSummariesCollection;
|
|
66
|
+
/**
|
|
67
|
+
* Extract all unique tool names from assessment results.
|
|
68
|
+
*/
|
|
69
|
+
private extractToolNames;
|
|
70
|
+
/**
|
|
71
|
+
* Generate summary for a single tool.
|
|
72
|
+
*/
|
|
73
|
+
private generateSingleToolSummary;
|
|
74
|
+
/**
|
|
75
|
+
* Get all security tests for a specific tool.
|
|
76
|
+
*/
|
|
77
|
+
private getToolTests;
|
|
78
|
+
/**
|
|
79
|
+
* Extract top vulnerability patterns from tests.
|
|
80
|
+
*/
|
|
81
|
+
private extractTopPatterns;
|
|
82
|
+
/**
|
|
83
|
+
* Calculate pass rate for tests.
|
|
84
|
+
*/
|
|
85
|
+
private calculatePassRate;
|
|
86
|
+
/**
|
|
87
|
+
* Get annotation info for a tool.
|
|
88
|
+
*/
|
|
89
|
+
private getToolAnnotationInfo;
|
|
90
|
+
/**
|
|
91
|
+
* Generate recommendations for a specific tool.
|
|
92
|
+
*/
|
|
93
|
+
private generateToolRecommendations;
|
|
94
|
+
/**
|
|
95
|
+
* Calculate aggregate statistics across all tool summaries.
|
|
96
|
+
*/
|
|
97
|
+
private calculateAggregate;
|
|
98
|
+
/**
|
|
99
|
+
* Extract full detail data for a specific tool.
|
|
100
|
+
* Used when generating Tier 3 per-tool detail files.
|
|
101
|
+
*
|
|
102
|
+
* @param toolName - Tool name to extract
|
|
103
|
+
* @param results - Full assessment results
|
|
104
|
+
* @returns Tool-specific detail data
|
|
105
|
+
*/
|
|
106
|
+
extractToolDetail(toolName: string, results: MCPDirectoryAssessment): Record<string, unknown>;
|
|
107
|
+
/**
|
|
108
|
+
* Get all tool names for Tier 3 file generation.
|
|
109
|
+
*/
|
|
110
|
+
getAllToolNames(results: MCPDirectoryAssessment): string[];
|
|
111
|
+
}
|
|
112
|
+
//# sourceMappingURL=AssessmentSummarizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AssessmentSummarizer.d.ts","sourceRoot":"","sources":["../../../../src/lib/assessment/summarizer/AssessmentSummarizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EACV,sBAAsB,EAEvB,MAAM,gBAAgB,CAAC;AAIxB,OAAO,EACL,KAAK,gBAAgB,EAErB,KAAK,uBAAuB,EAE5B,KAAK,gBAAgB,EAEtB,MAAM,SAAS,CAAC;AAMjB;;;;;;;;;GASG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,MAAM,CAA6B;gBAE/B,MAAM,GAAE,gBAAqB;IAQzC;;;;;;OAMG;IACH,wBAAwB,CAAC,OAAO,EAAE,sBAAsB,GAAG,gBAAgB;IA2B3E;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA8C7B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAc/B;;OAEG;IACH,OAAO,CAAC,6BAA6B;IAmDrC;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAO9B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IA8BhC;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA2B7B;;;;;;OAMG;IACH,qBAAqB,CACnB,OAAO,EAAE,sBAAsB,GAC9B,uBAAuB;IAoC1B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IA8BxB;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAgCjC;;OAEG;IACH,OAAO,CAAC,YAAY;IAQpB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAgB1B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAMzB;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA8B7B;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAgCnC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA2B1B;;;;;;;OAOG;IACH,iBAAiB,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,sBAAsB,GAC9B,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IA2B1B;;OAEG;IACH,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,MAAM,EAAE;CAG3D"}
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assessment Summarizer
|
|
3
|
+
*
|
|
4
|
+
* Generates tiered output for large assessment results to fit within
|
|
5
|
+
* LLM context windows. Creates executive summaries and per-tool digests.
|
|
6
|
+
*
|
|
7
|
+
* Issue #136: Tiered output strategy for large assessments
|
|
8
|
+
*
|
|
9
|
+
* @module assessment/summarizer/AssessmentSummarizer
|
|
10
|
+
*/
|
|
11
|
+
import { calculateModuleScore } from "../../moduleScoring.js";
|
|
12
|
+
import { estimateTokens } from "./tokenEstimator.js";
|
|
13
|
+
import { DEFAULT_SUMMARIZER_CONFIG, } from "./types.js";
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// Assessment Summarizer Class
|
|
16
|
+
// ============================================================================
|
|
17
|
+
/**
|
|
18
|
+
* Generates tiered summaries from assessment results.
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const summarizer = new AssessmentSummarizer();
|
|
23
|
+
* const executive = summarizer.generateExecutiveSummary(results);
|
|
24
|
+
* const toolSummaries = summarizer.generateToolSummaries(results);
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export class AssessmentSummarizer {
|
|
28
|
+
config;
|
|
29
|
+
constructor(config = {}) {
|
|
30
|
+
this.config = { ...DEFAULT_SUMMARIZER_CONFIG, ...config };
|
|
31
|
+
}
|
|
32
|
+
// ==========================================================================
|
|
33
|
+
// Tier 1: Executive Summary
|
|
34
|
+
// ==========================================================================
|
|
35
|
+
/**
|
|
36
|
+
* Generate executive summary (Tier 1) from assessment results.
|
|
37
|
+
* Targets ~5K tokens for guaranteed LLM context fit.
|
|
38
|
+
*
|
|
39
|
+
* @param results - Full assessment results
|
|
40
|
+
* @returns Executive summary
|
|
41
|
+
*/
|
|
42
|
+
generateExecutiveSummary(results) {
|
|
43
|
+
const modulesSummary = this.extractModulesSummary(results);
|
|
44
|
+
const criticalFindings = this.extractCriticalFindings(results);
|
|
45
|
+
const toolRiskDistribution = this.calculateToolRiskDistribution(results);
|
|
46
|
+
const recommendations = this.aggregateRecommendations(results);
|
|
47
|
+
const summary = {
|
|
48
|
+
serverName: results.serverName,
|
|
49
|
+
overallStatus: results.overallStatus,
|
|
50
|
+
overallScore: this.calculateOverallScore(results),
|
|
51
|
+
toolCount: results.functionality?.totalTools ?? 0,
|
|
52
|
+
testCount: results.totalTestsRun ?? 0,
|
|
53
|
+
executionTime: results.executionTime ?? 0,
|
|
54
|
+
modulesSummary,
|
|
55
|
+
criticalFindings,
|
|
56
|
+
toolRiskDistribution,
|
|
57
|
+
recommendations: recommendations.slice(0, this.config.maxRecommendations),
|
|
58
|
+
estimatedTokens: 0, // Will be calculated after construction
|
|
59
|
+
generatedAt: new Date().toISOString(),
|
|
60
|
+
};
|
|
61
|
+
// Calculate actual token estimate
|
|
62
|
+
summary.estimatedTokens = estimateTokens(summary);
|
|
63
|
+
return summary;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Extract per-module status and scores.
|
|
67
|
+
*/
|
|
68
|
+
extractModulesSummary(results) {
|
|
69
|
+
const summary = {};
|
|
70
|
+
const moduleKeys = [
|
|
71
|
+
"functionality",
|
|
72
|
+
"security",
|
|
73
|
+
"errorHandling",
|
|
74
|
+
"aupCompliance",
|
|
75
|
+
"toolAnnotations",
|
|
76
|
+
"temporal",
|
|
77
|
+
"resources",
|
|
78
|
+
"prompts",
|
|
79
|
+
"crossCapability",
|
|
80
|
+
"protocolCompliance",
|
|
81
|
+
"developerExperience",
|
|
82
|
+
"prohibitedLibraries",
|
|
83
|
+
"manifestValidation",
|
|
84
|
+
"authentication",
|
|
85
|
+
"portability",
|
|
86
|
+
"externalAPIScanner",
|
|
87
|
+
// Legacy keys for backwards compatibility
|
|
88
|
+
"mcpSpecCompliance",
|
|
89
|
+
"documentation",
|
|
90
|
+
"usability",
|
|
91
|
+
];
|
|
92
|
+
for (const key of moduleKeys) {
|
|
93
|
+
const module = results[key];
|
|
94
|
+
if (module && module.status) {
|
|
95
|
+
const score = calculateModuleScore(module) ?? 50;
|
|
96
|
+
summary[key] = {
|
|
97
|
+
status: module.status,
|
|
98
|
+
score,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return summary;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Extract critical findings counts from all modules.
|
|
106
|
+
*/
|
|
107
|
+
extractCriticalFindings(results) {
|
|
108
|
+
return {
|
|
109
|
+
securityVulnerabilities: results.security?.vulnerabilities?.length ?? 0,
|
|
110
|
+
aupViolations: results.aupCompliance?.violations?.length ?? 0,
|
|
111
|
+
brokenTools: results.functionality?.brokenTools?.length ?? 0,
|
|
112
|
+
missingAnnotations: results.toolAnnotations?.missingAnnotationsCount ?? 0,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Calculate tool risk distribution from security test results.
|
|
117
|
+
*/
|
|
118
|
+
calculateToolRiskDistribution(results) {
|
|
119
|
+
const distribution = { high: 0, medium: 0, low: 0, safe: 0 };
|
|
120
|
+
const tests = results.security?.promptInjectionTests ?? [];
|
|
121
|
+
const toolVulnCounts = new Map();
|
|
122
|
+
// Count vulnerabilities per tool
|
|
123
|
+
for (const test of tests) {
|
|
124
|
+
if (test.vulnerable && test.toolName) {
|
|
125
|
+
const current = toolVulnCounts.get(test.toolName) ?? 0;
|
|
126
|
+
toolVulnCounts.set(test.toolName, current + 1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Get all tool names
|
|
130
|
+
const allTools = new Set();
|
|
131
|
+
for (const test of tests) {
|
|
132
|
+
if (test.toolName) {
|
|
133
|
+
allTools.add(test.toolName);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// Categorize each tool
|
|
137
|
+
for (const toolName of allTools) {
|
|
138
|
+
const vulnCount = toolVulnCounts.get(toolName) ?? 0;
|
|
139
|
+
const riskLevel = this.calculateToolRiskLevel(vulnCount);
|
|
140
|
+
switch (riskLevel) {
|
|
141
|
+
case "HIGH":
|
|
142
|
+
distribution.high++;
|
|
143
|
+
break;
|
|
144
|
+
case "MEDIUM":
|
|
145
|
+
distribution.medium++;
|
|
146
|
+
break;
|
|
147
|
+
case "LOW":
|
|
148
|
+
distribution.low++;
|
|
149
|
+
break;
|
|
150
|
+
case "SAFE":
|
|
151
|
+
distribution.safe++;
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return distribution;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Calculate risk level based on vulnerability count.
|
|
159
|
+
*/
|
|
160
|
+
calculateToolRiskLevel(vulnCount) {
|
|
161
|
+
if (vulnCount >= 5)
|
|
162
|
+
return "HIGH";
|
|
163
|
+
if (vulnCount >= 2)
|
|
164
|
+
return "MEDIUM";
|
|
165
|
+
if (vulnCount >= 1)
|
|
166
|
+
return "LOW";
|
|
167
|
+
return "SAFE";
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Aggregate recommendations from all modules.
|
|
171
|
+
*/
|
|
172
|
+
aggregateRecommendations(results) {
|
|
173
|
+
const recommendations = [];
|
|
174
|
+
// Top-level recommendations
|
|
175
|
+
if (results.recommendations) {
|
|
176
|
+
recommendations.push(...results.recommendations);
|
|
177
|
+
}
|
|
178
|
+
// Module-specific recommendations
|
|
179
|
+
const modulesWithRecs = [
|
|
180
|
+
results.errorHandling,
|
|
181
|
+
results.aupCompliance,
|
|
182
|
+
results.toolAnnotations,
|
|
183
|
+
results.developerExperience,
|
|
184
|
+
results.documentation,
|
|
185
|
+
results.usability,
|
|
186
|
+
results.prohibitedLibraries,
|
|
187
|
+
results.portability,
|
|
188
|
+
];
|
|
189
|
+
for (const module of modulesWithRecs) {
|
|
190
|
+
if (module?.recommendations) {
|
|
191
|
+
recommendations.push(...module.recommendations);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
// Deduplicate
|
|
195
|
+
return [...new Set(recommendations)];
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Calculate overall score from module scores.
|
|
199
|
+
*/
|
|
200
|
+
calculateOverallScore(results) {
|
|
201
|
+
const scores = [];
|
|
202
|
+
const coreModules = [
|
|
203
|
+
results.functionality,
|
|
204
|
+
results.security,
|
|
205
|
+
results.errorHandling,
|
|
206
|
+
];
|
|
207
|
+
for (const module of coreModules) {
|
|
208
|
+
const score = calculateModuleScore(module);
|
|
209
|
+
if (score !== null) {
|
|
210
|
+
scores.push(score);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
if (scores.length === 0) {
|
|
214
|
+
return results.overallStatus === "PASS" ? 100 : 0;
|
|
215
|
+
}
|
|
216
|
+
return Math.round(scores.reduce((a, b) => a + b, 0) / scores.length);
|
|
217
|
+
}
|
|
218
|
+
// ==========================================================================
|
|
219
|
+
// Tier 2: Tool Summaries
|
|
220
|
+
// ==========================================================================
|
|
221
|
+
/**
|
|
222
|
+
* Generate tool summaries (Tier 2) from assessment results.
|
|
223
|
+
* Targets ~500 tokens per tool for efficient LLM processing.
|
|
224
|
+
*
|
|
225
|
+
* @param results - Full assessment results
|
|
226
|
+
* @returns Collection of tool summaries
|
|
227
|
+
*/
|
|
228
|
+
generateToolSummaries(results) {
|
|
229
|
+
const tools = [];
|
|
230
|
+
// Get all unique tool names from security tests
|
|
231
|
+
const toolNames = this.extractToolNames(results);
|
|
232
|
+
for (const toolName of toolNames) {
|
|
233
|
+
const summary = this.generateSingleToolSummary(toolName, results);
|
|
234
|
+
tools.push(summary);
|
|
235
|
+
}
|
|
236
|
+
// Sort by risk level (highest first)
|
|
237
|
+
tools.sort((a, b) => {
|
|
238
|
+
const riskOrder = {
|
|
239
|
+
HIGH: 0,
|
|
240
|
+
MEDIUM: 1,
|
|
241
|
+
LOW: 2,
|
|
242
|
+
SAFE: 3,
|
|
243
|
+
};
|
|
244
|
+
return riskOrder[a.riskLevel] - riskOrder[b.riskLevel];
|
|
245
|
+
});
|
|
246
|
+
const aggregate = this.calculateAggregate(tools);
|
|
247
|
+
const collection = {
|
|
248
|
+
tools,
|
|
249
|
+
totalTools: tools.length,
|
|
250
|
+
aggregate,
|
|
251
|
+
estimatedTokens: 0,
|
|
252
|
+
generatedAt: new Date().toISOString(),
|
|
253
|
+
};
|
|
254
|
+
collection.estimatedTokens = estimateTokens(collection);
|
|
255
|
+
return collection;
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Extract all unique tool names from assessment results.
|
|
259
|
+
*/
|
|
260
|
+
extractToolNames(results) {
|
|
261
|
+
const toolNames = new Set();
|
|
262
|
+
// From security tests
|
|
263
|
+
const tests = results.security?.promptInjectionTests ?? [];
|
|
264
|
+
for (const test of tests) {
|
|
265
|
+
if (test.toolName) {
|
|
266
|
+
toolNames.add(test.toolName);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
// From functionality results
|
|
270
|
+
const funcResults = results.functionality?.toolResults ?? [];
|
|
271
|
+
for (const result of funcResults) {
|
|
272
|
+
if (result.toolName) {
|
|
273
|
+
toolNames.add(result.toolName);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// From annotation results
|
|
277
|
+
const annotationResults = results.toolAnnotations?.toolResults ?? [];
|
|
278
|
+
for (const result of annotationResults) {
|
|
279
|
+
if (result.toolName) {
|
|
280
|
+
toolNames.add(result.toolName);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return [...toolNames].sort();
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Generate summary for a single tool.
|
|
287
|
+
*/
|
|
288
|
+
generateSingleToolSummary(toolName, results) {
|
|
289
|
+
const tests = this.getToolTests(toolName, results);
|
|
290
|
+
const vulnCount = tests.filter((t) => t.vulnerable).length;
|
|
291
|
+
const patterns = this.extractTopPatterns(tests);
|
|
292
|
+
const passRate = this.calculatePassRate(tests);
|
|
293
|
+
const annotationInfo = this.getToolAnnotationInfo(toolName, results);
|
|
294
|
+
const summary = {
|
|
295
|
+
toolName,
|
|
296
|
+
riskLevel: this.calculateToolRiskLevel(vulnCount),
|
|
297
|
+
vulnerabilityCount: vulnCount,
|
|
298
|
+
topPatterns: patterns.slice(0, this.config.maxPatternsPerTool),
|
|
299
|
+
testCount: tests.length,
|
|
300
|
+
passRate,
|
|
301
|
+
recommendations: this.generateToolRecommendations(toolName, vulnCount, annotationInfo),
|
|
302
|
+
estimatedTokens: 0,
|
|
303
|
+
hasAnnotations: annotationInfo.hasAnnotations,
|
|
304
|
+
annotationStatus: annotationInfo.status,
|
|
305
|
+
};
|
|
306
|
+
summary.estimatedTokens = estimateTokens(summary);
|
|
307
|
+
return summary;
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Get all security tests for a specific tool.
|
|
311
|
+
*/
|
|
312
|
+
getToolTests(toolName, results) {
|
|
313
|
+
const tests = results.security?.promptInjectionTests ?? [];
|
|
314
|
+
return tests.filter((t) => t.toolName === toolName);
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Extract top vulnerability patterns from tests.
|
|
318
|
+
*/
|
|
319
|
+
extractTopPatterns(tests) {
|
|
320
|
+
const patternCounts = new Map();
|
|
321
|
+
for (const test of tests) {
|
|
322
|
+
if (test.vulnerable && test.testName) {
|
|
323
|
+
const current = patternCounts.get(test.testName) ?? 0;
|
|
324
|
+
patternCounts.set(test.testName, current + 1);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
// Sort by count and return names
|
|
328
|
+
return [...patternCounts.entries()]
|
|
329
|
+
.sort((a, b) => b[1] - a[1])
|
|
330
|
+
.map(([name]) => name);
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Calculate pass rate for tests.
|
|
334
|
+
*/
|
|
335
|
+
calculatePassRate(tests) {
|
|
336
|
+
if (tests.length === 0)
|
|
337
|
+
return 100;
|
|
338
|
+
const passed = tests.filter((t) => !t.vulnerable).length;
|
|
339
|
+
return Math.round((passed / tests.length) * 100);
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Get annotation info for a tool.
|
|
343
|
+
*/
|
|
344
|
+
getToolAnnotationInfo(toolName, results) {
|
|
345
|
+
const annotationResults = results.toolAnnotations?.toolResults ?? [];
|
|
346
|
+
const toolResult = annotationResults.find((r) => r.toolName === toolName);
|
|
347
|
+
if (!toolResult) {
|
|
348
|
+
return { hasAnnotations: false, status: "MISSING" };
|
|
349
|
+
}
|
|
350
|
+
const hasAnnotations = toolResult.annotations?.readOnlyHint !== undefined ||
|
|
351
|
+
toolResult.annotations?.destructiveHint !== undefined;
|
|
352
|
+
let status;
|
|
353
|
+
if (!hasAnnotations) {
|
|
354
|
+
status = "MISSING";
|
|
355
|
+
}
|
|
356
|
+
else if (toolResult.alignmentStatus === "ALIGNED") {
|
|
357
|
+
status = "ALIGNED";
|
|
358
|
+
}
|
|
359
|
+
else if (toolResult.alignmentStatus === "MISALIGNED") {
|
|
360
|
+
status = "MISALIGNED";
|
|
361
|
+
}
|
|
362
|
+
return { hasAnnotations, status };
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Generate recommendations for a specific tool.
|
|
366
|
+
*/
|
|
367
|
+
generateToolRecommendations(toolName, vulnCount, annotationInfo) {
|
|
368
|
+
const recommendations = [];
|
|
369
|
+
if (vulnCount >= 5) {
|
|
370
|
+
recommendations.push(`Critical: ${toolName} has ${vulnCount} vulnerabilities - requires immediate security review`);
|
|
371
|
+
}
|
|
372
|
+
else if (vulnCount >= 2) {
|
|
373
|
+
recommendations.push(`${toolName} has ${vulnCount} vulnerabilities - review input validation`);
|
|
374
|
+
}
|
|
375
|
+
else if (vulnCount >= 1) {
|
|
376
|
+
recommendations.push(`${toolName} has a vulnerability - investigate and patch`);
|
|
377
|
+
}
|
|
378
|
+
if (!annotationInfo.hasAnnotations) {
|
|
379
|
+
recommendations.push(`Add readOnlyHint/destructiveHint annotations to ${toolName}`);
|
|
380
|
+
}
|
|
381
|
+
else if (annotationInfo.status === "MISALIGNED") {
|
|
382
|
+
recommendations.push(`Review annotation alignment for ${toolName}`);
|
|
383
|
+
}
|
|
384
|
+
return recommendations;
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Calculate aggregate statistics across all tool summaries.
|
|
388
|
+
*/
|
|
389
|
+
calculateAggregate(tools) {
|
|
390
|
+
const totalVulns = tools.reduce((sum, t) => sum + t.vulnerabilityCount, 0);
|
|
391
|
+
const avgPassRate = tools.length > 0
|
|
392
|
+
? Math.round(tools.reduce((sum, t) => sum + t.passRate, 0) / tools.length)
|
|
393
|
+
: 100;
|
|
394
|
+
const misaligned = tools.filter((t) => t.annotationStatus === "MISALIGNED").length;
|
|
395
|
+
return {
|
|
396
|
+
totalVulnerabilities: totalVulns,
|
|
397
|
+
averagePassRate: avgPassRate,
|
|
398
|
+
misalignedAnnotations: misaligned,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
// ==========================================================================
|
|
402
|
+
// Tier 3: Per-Tool Details (extraction helpers)
|
|
403
|
+
// ==========================================================================
|
|
404
|
+
/**
|
|
405
|
+
* Extract full detail data for a specific tool.
|
|
406
|
+
* Used when generating Tier 3 per-tool detail files.
|
|
407
|
+
*
|
|
408
|
+
* @param toolName - Tool name to extract
|
|
409
|
+
* @param results - Full assessment results
|
|
410
|
+
* @returns Tool-specific detail data
|
|
411
|
+
*/
|
|
412
|
+
extractToolDetail(toolName, results) {
|
|
413
|
+
const securityTests = this.getToolTests(toolName, results);
|
|
414
|
+
const functionalityResult = results.functionality?.toolResults?.find((r) => r.toolName === toolName);
|
|
415
|
+
const annotationResult = results.toolAnnotations?.toolResults?.find((r) => r.toolName === toolName);
|
|
416
|
+
return {
|
|
417
|
+
toolName,
|
|
418
|
+
extractedAt: new Date().toISOString(),
|
|
419
|
+
security: {
|
|
420
|
+
tests: securityTests,
|
|
421
|
+
vulnerableCount: securityTests.filter((t) => t.vulnerable).length,
|
|
422
|
+
totalTests: securityTests.length,
|
|
423
|
+
},
|
|
424
|
+
functionality: functionalityResult ?? null,
|
|
425
|
+
annotations: annotationResult ?? null,
|
|
426
|
+
estimatedTokens: estimateTokens({
|
|
427
|
+
security: { tests: securityTests },
|
|
428
|
+
functionality: functionalityResult,
|
|
429
|
+
annotations: annotationResult,
|
|
430
|
+
}),
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Get all tool names for Tier 3 file generation.
|
|
435
|
+
*/
|
|
436
|
+
getAllToolNames(results) {
|
|
437
|
+
return this.extractToolNames(results);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assessment Summarizer Module
|
|
3
|
+
*
|
|
4
|
+
* Generates tiered output for large assessment results to fit within
|
|
5
|
+
* LLM context windows.
|
|
6
|
+
*
|
|
7
|
+
* Issue #136: Tiered output strategy for large assessments
|
|
8
|
+
*
|
|
9
|
+
* @module assessment/summarizer
|
|
10
|
+
*/
|
|
11
|
+
export type { OutputFormat, ToolRiskLevel, ExecutiveSummary, ToolSummary, ToolSummariesCollection, ToolDetailReference, TieredOutput, SummarizerConfig, } from "./types.js";
|
|
12
|
+
export { DEFAULT_SUMMARIZER_CONFIG } from "./types.js";
|
|
13
|
+
export { estimateTokens, estimateJsonFileTokens, shouldAutoTier, formatTokenEstimate, estimateSectionTokens, getTopSections, } from "./tokenEstimator.js";
|
|
14
|
+
export { AssessmentSummarizer } from "./AssessmentSummarizer.js";
|
|
15
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/assessment/summarizer/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,YAAY,EACV,YAAY,EACZ,aAAa,EACb,gBAAgB,EAChB,WAAW,EACX,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,gBAAgB,GACjB,MAAM,SAAS,CAAC;AAEjB,OAAO,EAAE,yBAAyB,EAAE,MAAM,SAAS,CAAC;AAGpD,OAAO,EACL,cAAc,EACd,sBAAsB,EACtB,cAAc,EACd,mBAAmB,EACnB,qBAAqB,EACrB,cAAc,GACf,MAAM,kBAAkB,CAAC;AAG1B,OAAO,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assessment Summarizer Module
|
|
3
|
+
*
|
|
4
|
+
* Generates tiered output for large assessment results to fit within
|
|
5
|
+
* LLM context windows.
|
|
6
|
+
*
|
|
7
|
+
* Issue #136: Tiered output strategy for large assessments
|
|
8
|
+
*
|
|
9
|
+
* @module assessment/summarizer
|
|
10
|
+
*/
|
|
11
|
+
export { DEFAULT_SUMMARIZER_CONFIG } from "./types.js";
|
|
12
|
+
// Token estimation utilities
|
|
13
|
+
export { estimateTokens, estimateJsonFileTokens, shouldAutoTier, formatTokenEstimate, estimateSectionTokens, getTopSections, } from "./tokenEstimator.js";
|
|
14
|
+
// Main summarizer class
|
|
15
|
+
export { AssessmentSummarizer } from "./AssessmentSummarizer.js";
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Estimation Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides token counting and threshold detection for tiered output strategy.
|
|
5
|
+
* Uses industry-standard approximation of ~4 characters per token.
|
|
6
|
+
*
|
|
7
|
+
* Issue #136: Tiered output strategy for large assessments
|
|
8
|
+
*
|
|
9
|
+
* @module assessment/summarizer/tokenEstimator
|
|
10
|
+
*/
|
|
11
|
+
import type { MCPDirectoryAssessment } from "../resultTypes.js";
|
|
12
|
+
/**
|
|
13
|
+
* Estimate the number of tokens for any content.
|
|
14
|
+
*
|
|
15
|
+
* Uses the industry-standard approximation of ~4 characters per token.
|
|
16
|
+
* For JSON content, applies a buffer for formatting overhead.
|
|
17
|
+
*
|
|
18
|
+
* @param content - Content to estimate (string, object, or array)
|
|
19
|
+
* @returns Estimated token count
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```typescript
|
|
23
|
+
* // String content
|
|
24
|
+
* estimateTokens("Hello world"); // ~3 tokens
|
|
25
|
+
*
|
|
26
|
+
* // Object content (will be JSON stringified)
|
|
27
|
+
* estimateTokens({ name: "test", value: 123 }); // ~10 tokens
|
|
28
|
+
*
|
|
29
|
+
* // Large assessment results
|
|
30
|
+
* estimateTokens(assessmentResults); // ~50,000+ tokens
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export declare function estimateTokens(content: unknown): number;
|
|
34
|
+
/**
|
|
35
|
+
* Estimate tokens for a JSON file that would be written.
|
|
36
|
+
* Accounts for pretty-printing with indent=2.
|
|
37
|
+
*
|
|
38
|
+
* @param content - Content that would be JSON.stringify'd
|
|
39
|
+
* @returns Estimated token count
|
|
40
|
+
*/
|
|
41
|
+
export declare function estimateJsonFileTokens(content: unknown): number;
|
|
42
|
+
/**
|
|
43
|
+
* Determine if assessment results should automatically use tiered output.
|
|
44
|
+
*
|
|
45
|
+
* Returns true when estimated token count exceeds the threshold,
|
|
46
|
+
* indicating the full output would not fit in typical LLM context windows.
|
|
47
|
+
*
|
|
48
|
+
* @param results - Full assessment results
|
|
49
|
+
* @param threshold - Token threshold (default: 100,000)
|
|
50
|
+
* @returns true if results should be tiered
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ```typescript
|
|
54
|
+
* const results = await runAssessment(server);
|
|
55
|
+
*
|
|
56
|
+
* if (shouldAutoTier(results)) {
|
|
57
|
+
* // Use tiered output
|
|
58
|
+
* saveTieredResults(serverName, results, options);
|
|
59
|
+
* } else {
|
|
60
|
+
* // Use standard full output
|
|
61
|
+
* saveResults(serverName, results, options);
|
|
62
|
+
* }
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
export declare function shouldAutoTier(results: MCPDirectoryAssessment, threshold?: number): boolean;
|
|
66
|
+
/**
|
|
67
|
+
* Get a human-readable token estimate with size category.
|
|
68
|
+
*
|
|
69
|
+
* @param tokenCount - Number of tokens
|
|
70
|
+
* @returns Object with formatted token count and size category
|
|
71
|
+
*
|
|
72
|
+
* @example
|
|
73
|
+
* ```typescript
|
|
74
|
+
* formatTokenEstimate(5000);
|
|
75
|
+
* // { tokens: "5,000", category: "small", fitsContext: true }
|
|
76
|
+
*
|
|
77
|
+
* formatTokenEstimate(500000);
|
|
78
|
+
* // { tokens: "500,000", category: "very-large", fitsContext: false }
|
|
79
|
+
* ```
|
|
80
|
+
*/
|
|
81
|
+
export declare function formatTokenEstimate(tokenCount: number): {
|
|
82
|
+
tokens: string;
|
|
83
|
+
category: "small" | "medium" | "large" | "very-large" | "oversized";
|
|
84
|
+
fitsContext: boolean;
|
|
85
|
+
recommendation: string;
|
|
86
|
+
};
|
|
87
|
+
/**
|
|
88
|
+
* Estimate tokens for each major section of assessment results.
|
|
89
|
+
* Useful for understanding which modules contribute most to output size.
|
|
90
|
+
*
|
|
91
|
+
* @param results - Assessment results to analyze
|
|
92
|
+
* @returns Map of section name to estimated token count
|
|
93
|
+
*/
|
|
94
|
+
export declare function estimateSectionTokens(results: MCPDirectoryAssessment): Record<string, number>;
|
|
95
|
+
/**
|
|
96
|
+
* Get the top N largest sections by token count.
|
|
97
|
+
*
|
|
98
|
+
* @param results - Assessment results
|
|
99
|
+
* @param topN - Number of sections to return (default: 5)
|
|
100
|
+
* @returns Array of [sectionName, tokenCount] sorted by size descending
|
|
101
|
+
*/
|
|
102
|
+
export declare function getTopSections(results: MCPDirectoryAssessment, topN?: number): Array<[string, number]>;
|
|
103
|
+
//# sourceMappingURL=tokenEstimator.d.ts.map
|