visus-mcp 0.3.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.claude/settings.local.json +22 -0
  2. package/LINKEDIN-STRATEGY.md +367 -0
  3. package/README.md +491 -16
  4. package/ROADMAP.md +167 -30
  5. package/SECURITY-AUDIT-v1.md +277 -0
  6. package/STATUS.md +801 -42
  7. package/TROUBLESHOOT-AUTH-20260322-2019.md +291 -0
  8. package/TROUBLESHOOT-JEST-20260323-1357.md +139 -0
  9. package/TROUBLESHOOT-LAMBDA-20260322-1945.md +183 -0
  10. package/VISUS-CLAUDE-CODE-PROMPT.md +1 -1
  11. package/VISUS-PROJECT-PLAN.md +7 -0
  12. package/dist/browser/playwright-renderer.d.ts.map +1 -1
  13. package/dist/browser/playwright-renderer.js +7 -0
  14. package/dist/browser/playwright-renderer.js.map +1 -1
  15. package/dist/browser/reader.d.ts +31 -0
  16. package/dist/browser/reader.d.ts.map +1 -0
  17. package/dist/browser/reader.js +98 -0
  18. package/dist/browser/reader.js.map +1 -0
  19. package/dist/index.d.ts +1 -1
  20. package/dist/index.d.ts.map +1 -1
  21. package/dist/index.js +37 -5
  22. package/dist/index.js.map +1 -1
  23. package/dist/lambda-handler.d.ts +0 -6
  24. package/dist/lambda-handler.d.ts.map +1 -1
  25. package/dist/lambda-handler.js +97 -25
  26. package/dist/lambda-handler.js.map +1 -1
  27. package/dist/sanitizer/framework-mapper.d.ts +22 -0
  28. package/dist/sanitizer/framework-mapper.d.ts.map +1 -0
  29. package/dist/sanitizer/framework-mapper.js +296 -0
  30. package/dist/sanitizer/framework-mapper.js.map +1 -0
  31. package/dist/sanitizer/index.d.ts +2 -0
  32. package/dist/sanitizer/index.d.ts.map +1 -1
  33. package/dist/sanitizer/index.js +14 -1
  34. package/dist/sanitizer/index.js.map +1 -1
  35. package/dist/sanitizer/patterns.js +1 -1
  36. package/dist/sanitizer/patterns.js.map +1 -1
  37. package/dist/sanitizer/severity-classifier.d.ts +33 -0
  38. package/dist/sanitizer/severity-classifier.d.ts.map +1 -0
  39. package/dist/sanitizer/severity-classifier.js +113 -0
  40. package/dist/sanitizer/severity-classifier.js.map +1 -0
  41. package/dist/sanitizer/threat-reporter.d.ts +65 -0
  42. package/dist/sanitizer/threat-reporter.d.ts.map +1 -0
  43. package/dist/sanitizer/threat-reporter.js +160 -0
  44. package/dist/sanitizer/threat-reporter.js.map +1 -0
  45. package/dist/tools/fetch-structured.d.ts +5 -0
  46. package/dist/tools/fetch-structured.d.ts.map +1 -1
  47. package/dist/tools/fetch-structured.js +54 -6
  48. package/dist/tools/fetch-structured.js.map +1 -1
  49. package/dist/tools/fetch.d.ts +5 -0
  50. package/dist/tools/fetch.d.ts.map +1 -1
  51. package/dist/tools/fetch.js +42 -9
  52. package/dist/tools/fetch.js.map +1 -1
  53. package/dist/tools/read.d.ts +51 -0
  54. package/dist/tools/read.d.ts.map +1 -0
  55. package/dist/tools/read.js +127 -0
  56. package/dist/tools/read.js.map +1 -0
  57. package/dist/tools/search.d.ts +45 -0
  58. package/dist/tools/search.d.ts.map +1 -0
  59. package/dist/tools/search.js +220 -0
  60. package/dist/tools/search.js.map +1 -0
  61. package/dist/types.d.ts +64 -0
  62. package/dist/types.d.ts.map +1 -1
  63. package/dist/types.js.map +1 -1
  64. package/dist/utils/format-converter.d.ts +39 -0
  65. package/dist/utils/format-converter.d.ts.map +1 -0
  66. package/dist/utils/format-converter.js +191 -0
  67. package/dist/utils/format-converter.js.map +1 -0
  68. package/dist/utils/truncate.d.ts +26 -0
  69. package/dist/utils/truncate.d.ts.map +1 -0
  70. package/dist/utils/truncate.js +54 -0
  71. package/dist/utils/truncate.js.map +1 -0
  72. package/infrastructure/stack.ts +55 -6
  73. package/jest.config.js +3 -0
  74. package/package.json +9 -2
  75. package/src/browser/playwright-renderer.ts +8 -0
  76. package/src/browser/reader.ts +129 -0
  77. package/src/index.ts +49 -5
  78. package/src/lambda-handler.ts +131 -26
  79. package/src/sanitizer/framework-mapper.ts +347 -0
  80. package/src/sanitizer/index.ts +18 -1
  81. package/src/sanitizer/patterns.ts +1 -1
  82. package/src/sanitizer/severity-classifier.ts +132 -0
  83. package/src/sanitizer/threat-reporter.ts +261 -0
  84. package/src/tools/fetch-structured.ts +58 -6
  85. package/src/tools/fetch.ts +44 -9
  86. package/src/tools/read.ts +143 -0
  87. package/src/tools/search.ts +263 -0
  88. package/src/types.ts +69 -0
  89. package/src/utils/format-converter.ts +236 -0
  90. package/src/utils/truncate.ts +64 -0
  91. package/tests/auth-smoke.test.ts +480 -0
  92. package/tests/fetch-tool.test.ts +595 -2
  93. package/tests/reader.test.ts +353 -0
  94. package/tests/sanitizer.test.ts +52 -0
  95. package/tests/search.test.ts +456 -0
  96. package/tests/threat-reporter.test.ts +266 -0
@@ -0,0 +1,263 @@
1
+ /**
2
+ * Visus Search Tool - Safe Web Search
3
+ *
4
+ * Queries DuckDuckGo's Instant Answer API and sanitizes all results
5
+ * before returning them to the LLM.
6
+ *
7
+ * SECURITY: Every search result snippet and title passes through the
8
+ * sanitization pipeline. This prevents prompt injection via search results.
9
+ */
10
+
11
+ import { sanitize } from '../sanitizer/index.js';
12
+ import { generateThreatReport } from '../sanitizer/threat-reporter.js';
13
+ import type { VisusSearchInput, VisusSearchOutput, Result } from '../types.js';
14
+ import { Ok, Err } from '../types.js';
15
+
16
+ /**
17
+ * DuckDuckGo API Response Types
18
+ */
19
+ interface DuckDuckGoRelatedTopic {
20
+ Text?: string;
21
+ FirstURL?: string;
22
+ }
23
+
24
+ interface DuckDuckGoResponse {
25
+ AbstractText?: string;
26
+ AbstractURL?: string;
27
+ RelatedTopics?: Array<DuckDuckGoRelatedTopic | { Topics: DuckDuckGoRelatedTopic[] }>;
28
+ }
29
+
30
+ /**
31
+ * Search the web via DuckDuckGo and return sanitized results
32
+ *
33
+ * @param input Search query and options
34
+ * @returns Sanitized search results with injection detection metadata
35
+ */
36
+ export async function visusSearch(input: VisusSearchInput): Promise<Result<VisusSearchOutput, Error>> {
37
+ // Validate input
38
+ if (!input.query || typeof input.query !== 'string' || input.query.trim().length === 0) {
39
+ return Err(new Error('query must be a non-empty string'));
40
+ }
41
+
42
+ // Enforce max_results cap
43
+ const maxResults = Math.min(input.max_results ?? 5, 10);
44
+
45
+ try {
46
+ // Call DuckDuckGo Instant Answer API
47
+ const query = encodeURIComponent(input.query.trim());
48
+ const apiUrl = `https://api.duckduckgo.com/?q=${query}&format=json&no_redirect=1&no_html=1`;
49
+
50
+ const controller = new AbortController();
51
+ const timeout = setTimeout(() => controller.abort(), 8000);
52
+
53
+ let response: Response;
54
+ try {
55
+ response = await fetch(apiUrl, {
56
+ signal: controller.signal,
57
+ headers: {
58
+ 'User-Agent': 'visus-mcp/0.3.0 (https://github.com/lateos/visus-mcp)'
59
+ }
60
+ });
61
+ } finally {
62
+ clearTimeout(timeout);
63
+ }
64
+
65
+ if (!response.ok) {
66
+ return Ok({
67
+ query: input.query,
68
+ result_count: 0,
69
+ sanitized: true,
70
+ results: [],
71
+ total_injections_removed: 0,
72
+ message: `Search unavailable (HTTP ${response.status})`
73
+ });
74
+ }
75
+
76
+ const data = await response.json() as DuckDuckGoResponse;
77
+
78
+ // Extract results from DuckDuckGo response
79
+ const rawResults: Array<{ title: string; url: string; snippet: string }> = [];
80
+
81
+ // Add AbstractText as first result if present
82
+ if (data.AbstractText && data.AbstractURL) {
83
+ rawResults.push({
84
+ title: extractTitle(data.AbstractText),
85
+ url: data.AbstractURL,
86
+ snippet: data.AbstractText
87
+ });
88
+ }
89
+
90
+ // Extract from RelatedTopics
91
+ if (data.RelatedTopics) {
92
+ for (const topic of data.RelatedTopics) {
93
+ // Handle both direct topics and nested topic groups
94
+ if ('Topics' in topic && Array.isArray(topic.Topics)) {
95
+ // Nested topics group
96
+ for (const nestedTopic of topic.Topics) {
97
+ if (nestedTopic.Text && nestedTopic.FirstURL) {
98
+ rawResults.push({
99
+ title: extractTitle(nestedTopic.Text),
100
+ url: nestedTopic.FirstURL,
101
+ snippet: nestedTopic.Text
102
+ });
103
+ }
104
+ }
105
+ } else if ('Text' in topic && topic.Text && topic.FirstURL) {
106
+ // Direct topic
107
+ rawResults.push({
108
+ title: extractTitle(topic.Text),
109
+ url: topic.FirstURL,
110
+ snippet: topic.Text
111
+ });
112
+ }
113
+
114
+ // Stop if we've collected enough results
115
+ if (rawResults.length >= maxResults) {
116
+ break;
117
+ }
118
+ }
119
+ }
120
+
121
+ // Filter out results with empty URLs and limit to max_results
122
+ const validResults = rawResults
123
+ .filter(r => r.url && r.url.trim().length > 0)
124
+ .slice(0, maxResults);
125
+
126
+ // If no results found, return empty array with message
127
+ if (validResults.length === 0) {
128
+ return Ok({
129
+ query: input.query,
130
+ result_count: 0,
131
+ sanitized: true,
132
+ results: [],
133
+ total_injections_removed: 0,
134
+ message: 'No results found'
135
+ });
136
+ }
137
+
138
+ // Sanitize each result independently
139
+ const sanitizedResults = [];
140
+ const allPatternsDetected = new Set<string>();
141
+ let totalInjectionsRemoved = 0;
142
+ let totalPIIRedacted = 0;
143
+
144
+ for (const result of validResults) {
145
+ // Sanitize title
146
+ const titleSanitization = sanitize(result.title);
147
+
148
+ // Sanitize snippet
149
+ const snippetSanitization = sanitize(result.snippet);
150
+
151
+ const injectionsRemoved =
152
+ titleSanitization.sanitization.patterns_detected.length +
153
+ snippetSanitization.sanitization.patterns_detected.length;
154
+
155
+ const piiRedacted =
156
+ titleSanitization.sanitization.pii_types_redacted.length +
157
+ snippetSanitization.sanitization.pii_types_redacted.length;
158
+
159
+ totalInjectionsRemoved += injectionsRemoved;
160
+ totalPIIRedacted += piiRedacted;
161
+
162
+ // Collect all patterns detected across all results
163
+ titleSanitization.sanitization.patterns_detected.forEach(p => allPatternsDetected.add(p));
164
+ snippetSanitization.sanitization.patterns_detected.forEach(p => allPatternsDetected.add(p));
165
+
166
+ sanitizedResults.push({
167
+ title: titleSanitization.content,
168
+ url: result.url,
169
+ snippet: snippetSanitization.content,
170
+ injections_removed: injectionsRemoved,
171
+ pii_redacted: piiRedacted
172
+ });
173
+ }
174
+
175
+ // Generate aggregated threat report for all search results
176
+ const threatReport = generateThreatReport({
177
+ patterns_detected: Array.from(allPatternsDetected),
178
+ pii_redacted: totalPIIRedacted,
179
+ source_url: `DuckDuckGo Search: ${input.query}`
180
+ });
181
+
182
+ return Ok({
183
+ query: input.query,
184
+ result_count: sanitizedResults.length,
185
+ sanitized: true,
186
+ results: sanitizedResults,
187
+ total_injections_removed: totalInjectionsRemoved,
188
+ // Include threat_report only if findings exist
189
+ ...(threatReport && { threat_report: threatReport })
190
+ });
191
+
192
+ } catch (error) {
193
+ // Handle timeout or network errors
194
+ if (error instanceof Error && error.name === 'AbortError') {
195
+ return Ok({
196
+ query: input.query,
197
+ result_count: 0,
198
+ sanitized: true,
199
+ results: [],
200
+ total_injections_removed: 0,
201
+ message: 'Search unavailable (timeout)'
202
+ });
203
+ }
204
+
205
+ return Ok({
206
+ query: input.query,
207
+ result_count: 0,
208
+ sanitized: true,
209
+ results: [],
210
+ total_injections_removed: 0,
211
+ message: `Search unavailable: ${error instanceof Error ? error.message : String(error)}`
212
+ });
213
+ }
214
+ }
215
+
216
+ /**
217
+ * Extract title from text (first sentence or up to 80 chars)
218
+ */
219
+ function extractTitle(text: string): string {
220
+ // Try to find first sentence
221
+ const firstSentenceMatch = text.match(/^[^.!?]+[.!?]/);
222
+ if (firstSentenceMatch) {
223
+ const sentence = firstSentenceMatch[0].trim();
224
+ if (sentence.length <= 80) {
225
+ return sentence;
226
+ }
227
+ }
228
+
229
+ // Fallback to first 80 chars
230
+ if (text.length <= 80) {
231
+ return text.trim();
232
+ }
233
+
234
+ return text.substring(0, 77).trim() + '...';
235
+ }
236
+
237
+ /**
238
+ * Tool definition for MCP registration
239
+ */
240
+ export const visusSearchToolDefinition = {
241
+ name: 'visus_search',
242
+ title: 'Search the Web (Sanitized)',
243
+ description: 'Searches the web via DuckDuckGo and returns sanitized results with prompt injection and PII removed before reaching the LLM. Use before visus_fetch or visus_read to safely discover and then read pages.',
244
+ inputSchema: {
245
+ type: 'object',
246
+ properties: {
247
+ query: {
248
+ type: 'string',
249
+ description: 'Search query'
250
+ },
251
+ max_results: {
252
+ type: 'number',
253
+ description: 'Maximum number of results to return (default: 5, max: 10)',
254
+ default: 5
255
+ }
256
+ },
257
+ required: ['query']
258
+ },
259
+ readOnlyHint: true,
260
+ destructiveHint: false,
261
+ idempotentHint: true,
262
+ openWorldHint: true
263
+ };
package/src/types.ts CHANGED
@@ -2,6 +2,8 @@
2
2
  * Shared TypeScript interfaces for Visus MCP tool
3
3
  */
4
4
 
5
+ import type { ThreatReport } from './sanitizer/threat-reporter.js';
6
+
5
7
  /**
6
8
  * Input options for visus_fetch tool
7
9
  */
@@ -28,7 +30,12 @@ export interface VisusFetchOutput {
28
30
  fetched_at: string;
29
31
  content_length_original: number;
30
32
  content_length_sanitized: number;
33
+ format_detected?: 'html' | 'json' | 'xml' | 'rss';
34
+ content_type?: string;
35
+ truncated?: boolean;
36
+ truncated_at_chars?: number;
31
37
  };
38
+ threat_report?: ThreatReport;
32
39
  }
33
40
 
34
41
  /**
@@ -40,6 +47,14 @@ export interface VisusFetchStructuredInput {
40
47
  timeout_ms?: number;
41
48
  }
42
49
 
50
+ /**
51
+ * Input for visus_read tool
52
+ */
53
+ export interface VisusReadInput {
54
+ url: string;
55
+ timeout_ms?: number;
56
+ }
57
+
43
58
  /**
44
59
  * Output from visus_fetch_structured tool
45
60
  */
@@ -57,7 +72,60 @@ export interface VisusFetchStructuredOutput {
57
72
  fetched_at: string;
58
73
  content_length_original: number;
59
74
  content_length_sanitized: number;
75
+ format_detected?: 'html' | 'json' | 'xml' | 'rss';
76
+ content_type?: string;
77
+ truncated?: boolean;
78
+ truncated_at_chars?: number;
60
79
  };
80
+ threat_report?: ThreatReport;
81
+ }
82
+
83
+ /**
84
+ * Output from visus_read tool
85
+ */
86
+ export interface VisusReadOutput {
87
+ url: string;
88
+ content: string;
89
+ metadata: {
90
+ title: string;
91
+ author: string | null;
92
+ published: string | null;
93
+ word_count: number;
94
+ reader_mode_available: boolean;
95
+ sanitized: true;
96
+ injections_removed: number;
97
+ pii_redacted: number;
98
+ truncated: boolean;
99
+ fetched_at?: string;
100
+ };
101
+ threat_report?: ThreatReport;
102
+ }
103
+
104
+ /**
105
+ * Input for visus_search tool
106
+ */
107
+ export interface VisusSearchInput {
108
+ query: string;
109
+ max_results?: number;
110
+ }
111
+
112
+ /**
113
+ * Output from visus_search tool
114
+ */
115
+ export interface VisusSearchOutput {
116
+ query: string;
117
+ result_count: number;
118
+ sanitized: true;
119
+ results: Array<{
120
+ title: string;
121
+ url: string;
122
+ snippet: string;
123
+ injections_removed: number;
124
+ pii_redacted: number;
125
+ }>;
126
+ total_injections_removed: number;
127
+ message?: string;
128
+ threat_report?: ThreatReport;
61
129
  }
62
130
 
63
131
  /**
@@ -67,6 +135,7 @@ export interface BrowserRenderResult {
67
135
  html: string;
68
136
  title: string;
69
137
  url: string;
138
+ contentType?: string;
70
139
  text?: string;
71
140
  error?: string;
72
141
  }
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Format Converter - Content-Type based format detection and conversion
3
+ *
4
+ * Handles format-appropriate conversion based on detected Content-Type.
5
+ * Supports HTML, JSON, XML, and RSS/Atom feeds.
6
+ */
7
+
8
+ import { XMLParser } from 'fast-xml-parser';
9
+
10
+ /**
11
+ * Detected format type
12
+ */
13
+ export type FormatType = 'html' | 'json' | 'xml' | 'rss';
14
+
15
+ /**
16
+ * Detect format from Content-Type header
17
+ *
18
+ * @param contentType - Content-Type header value (e.g., "application/json", "text/html; charset=utf-8")
19
+ * @returns Detected format type
20
+ */
21
+ export function detectFormat(contentType: string): FormatType {
22
+ // Normalize: lowercase and extract MIME type (before semicolon)
23
+ const mimeType = contentType.toLowerCase().split(';')[0].trim();
24
+
25
+ // HTML formats
26
+ if (mimeType === 'text/html' || mimeType === 'application/xhtml+xml') {
27
+ return 'html';
28
+ }
29
+
30
+ // JSON formats
31
+ if (mimeType === 'application/json' || mimeType === 'text/json') {
32
+ return 'json';
33
+ }
34
+
35
+ // RSS/Atom feed formats
36
+ if (mimeType === 'application/rss+xml' ||
37
+ mimeType === 'application/atom+xml' ||
38
+ mimeType === 'application/feed+json') {
39
+ return 'rss';
40
+ }
41
+
42
+ // XML formats (must come after RSS check)
43
+ if (mimeType === 'application/xml' || mimeType === 'text/xml') {
44
+ return 'xml';
45
+ }
46
+
47
+ // Default to HTML for unknown types
48
+ return 'html';
49
+ }
50
+
51
+ /**
52
+ * Convert JSON content to formatted string
53
+ *
54
+ * @param raw - Raw JSON string
55
+ * @returns Formatted JSON string with prefix, or raw string if parse fails
56
+ */
57
+ export function convertJson(raw: string): string {
58
+ try {
59
+ // Parse and re-stringify with 2-space indent for readability
60
+ const parsed = JSON.parse(raw);
61
+ const formatted = JSON.stringify(parsed, null, 2);
62
+ return `JSON Response:\n\n${formatted}`;
63
+ } catch (error) {
64
+ // Parse failed, return raw string unchanged
65
+ return raw;
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Convert XML content to clean text representation
71
+ *
72
+ * @param raw - Raw XML string
73
+ * @returns Formatted XML representation with prefix, or tag-stripped fallback if parse fails
74
+ */
75
+ export function convertXml(raw: string): string {
76
+ try {
77
+ const parser = new XMLParser({
78
+ ignoreAttributes: false,
79
+ attributeNamePrefix: '@_',
80
+ textNodeName: '#text',
81
+ ignoreDeclaration: true,
82
+ ignorePiTags: true,
83
+ removeNSPrefix: true,
84
+ });
85
+
86
+ const parsed = parser.parse(raw);
87
+ const formatted = JSON.stringify(parsed, null, 2);
88
+
89
+ return `XML Response:\n\n${formatted}`;
90
+ } catch (error) {
91
+ // Parse failed, strip XML tags using regex and return
92
+ const stripped = raw.replace(/<[^>]+>/g, '').trim();
93
+ return `XML Response:\n\n${stripped}`;
94
+ }
95
+ }
96
+
97
+ /**
98
+ * Convert RSS/Atom feed content to clean Markdown
99
+ *
100
+ * @param raw - Raw RSS/Atom XML string
101
+ * @returns Formatted Markdown representation, or falls back to convertXml if parse fails
102
+ */
103
+ export function convertRss(raw: string): string {
104
+ try {
105
+ const parser = new XMLParser({
106
+ ignoreAttributes: false,
107
+ attributeNamePrefix: '@_',
108
+ textNodeName: '#text',
109
+ removeNSPrefix: true,
110
+ });
111
+
112
+ const parsed = parser.parse(raw);
113
+
114
+ // Handle RSS 2.0 format
115
+ if (parsed.rss && parsed.rss.channel) {
116
+ return formatRss2(parsed.rss.channel);
117
+ }
118
+
119
+ // Handle Atom format
120
+ if (parsed.feed) {
121
+ return formatAtom(parsed.feed);
122
+ }
123
+
124
+ // Handle RSS 1.0 (RDF) format
125
+ if (parsed.rdf && parsed.rdf.channel) {
126
+ return formatRss2(parsed.rdf.channel);
127
+ }
128
+
129
+ // Unknown feed format, fall back to XML
130
+ return convertXml(raw);
131
+
132
+ } catch (error) {
133
+ // Parse failed, fall back to XML converter
134
+ return convertXml(raw);
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Format RSS 2.0 feed data as Markdown
140
+ */
141
+ function formatRss2(channel: any): string {
142
+ const title = channel.title || 'Untitled Feed';
143
+ const description = channel.description || '';
144
+ const items = Array.isArray(channel.item) ? channel.item : (channel.item ? [channel.item] : []);
145
+
146
+ let markdown = `RSS Feed:\n\n# ${title}\n`;
147
+
148
+ if (description) {
149
+ markdown += `${description}\n`;
150
+ }
151
+
152
+ markdown += '\n## Items\n\n';
153
+
154
+ // Extract up to 10 items
155
+ const itemsToShow = items.slice(0, 10);
156
+
157
+ for (const item of itemsToShow) {
158
+ const itemTitle = item.title || 'Untitled';
159
+ const itemLink = item.link || '';
160
+ const itemDescription = item.description || '';
161
+ const itemPubDate = item.pubDate || '';
162
+
163
+ // Truncate description to 200 chars
164
+ const truncatedDesc = itemDescription.length > 200
165
+ ? itemDescription.substring(0, 200) + '...'
166
+ : itemDescription;
167
+
168
+ markdown += `### ${itemTitle}\n\n`;
169
+
170
+ if (truncatedDesc) {
171
+ markdown += `${truncatedDesc}\n\n`;
172
+ }
173
+
174
+ if (itemLink) {
175
+ markdown += `Link: ${itemLink}\n`;
176
+ }
177
+
178
+ if (itemPubDate) {
179
+ markdown += `Published: ${itemPubDate}\n`;
180
+ }
181
+
182
+ markdown += '\n---\n\n';
183
+ }
184
+
185
+ return markdown;
186
+ }
187
+
188
+ /**
189
+ * Format Atom feed data as Markdown
190
+ */
191
+ function formatAtom(feed: any): string {
192
+ const title = feed.title || 'Untitled Feed';
193
+ const subtitle = feed.subtitle || '';
194
+ const entries = Array.isArray(feed.entry) ? feed.entry : (feed.entry ? [feed.entry] : []);
195
+
196
+ let markdown = `RSS Feed:\n\n# ${title}\n`;
197
+
198
+ if (subtitle) {
199
+ markdown += `${subtitle}\n`;
200
+ }
201
+
202
+ markdown += '\n## Items\n\n';
203
+
204
+ // Extract up to 10 entries
205
+ const entriesToShow = entries.slice(0, 10);
206
+
207
+ for (const entry of entriesToShow) {
208
+ const entryTitle = entry.title || 'Untitled';
209
+ const entryLink = entry.link ? (entry.link['@_href'] || entry.link) : '';
210
+ const entrySummary = entry.summary || entry.content || '';
211
+ const entryPublished = entry.published || entry.updated || '';
212
+
213
+ // Truncate summary to 200 chars
214
+ const truncatedSummary = entrySummary.length > 200
215
+ ? entrySummary.substring(0, 200) + '...'
216
+ : entrySummary;
217
+
218
+ markdown += `### ${entryTitle}\n\n`;
219
+
220
+ if (truncatedSummary) {
221
+ markdown += `${truncatedSummary}\n\n`;
222
+ }
223
+
224
+ if (entryLink) {
225
+ markdown += `Link: ${entryLink}\n`;
226
+ }
227
+
228
+ if (entryPublished) {
229
+ markdown += `Published: ${entryPublished}\n`;
230
+ }
231
+
232
+ markdown += '\n---\n\n';
233
+ }
234
+
235
+ return markdown;
236
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Token-aware content truncation utility
3
+ *
4
+ * Anthropic MCP Directory enforces a 25,000 token response limit.
5
+ * This utility provides safe truncation with token estimation.
6
+ */
7
+
8
+ /**
9
+ * Maximum tokens allowed in MCP response (Anthropic Directory limit)
10
+ * We target 24,000 to leave headroom for metadata/JSON structure
11
+ */
12
+ const MAX_TOKENS = 24000;
13
+
14
+ /**
15
+ * Conservative token estimation: 1 token ≈ 4 characters
16
+ * This is a safe approximation that errs on the side of caution
17
+ */
18
+ const CHARS_PER_TOKEN = 4;
19
+
20
+ /**
21
+ * Maximum characters based on token limit
22
+ */
23
+ const MAX_CHARS = MAX_TOKENS * CHARS_PER_TOKEN; // 96,000 characters
24
+
25
+ /**
26
+ * Truncate content if it exceeds the token ceiling
27
+ *
28
+ * @param content Content to potentially truncate
29
+ * @returns Truncated content and metadata
30
+ */
31
+ export function truncateContent(content: string): {
32
+ content: string;
33
+ truncated: boolean;
34
+ truncated_at_chars?: number;
35
+ } {
36
+ if (content.length <= MAX_CHARS) {
37
+ // Content is within limits
38
+ return {
39
+ content,
40
+ truncated: false
41
+ };
42
+ }
43
+
44
+ // Content exceeds limit - truncate with warning message
45
+ const truncatedContent = content.substring(0, MAX_CHARS);
46
+ const warningMessage = `\n\n--- CONTENT TRUNCATED ---\nOriginal length: ${content.length} characters (~${Math.ceil(content.length / CHARS_PER_TOKEN)} tokens)\nTruncated to: ${MAX_CHARS} characters (~${MAX_TOKENS} tokens)\nReason: Anthropic MCP Directory enforces a 25,000 token response limit\n`;
47
+
48
+ return {
49
+ content: truncatedContent + warningMessage,
50
+ truncated: true,
51
+ truncated_at_chars: MAX_CHARS
52
+ };
53
+ }
54
+
55
+ /**
56
+ * Estimate token count for a given string
57
+ * Uses conservative 4 chars per token approximation
58
+ *
59
+ * @param text Text to estimate
60
+ * @returns Estimated token count
61
+ */
62
+ export function estimateTokens(text: string): number {
63
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
64
+ }