visus-mcp 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +22 -0
- package/LINKEDIN-STRATEGY.md +367 -0
- package/README.md +491 -16
- package/ROADMAP.md +167 -30
- package/SECURITY-AUDIT-v1.md +277 -0
- package/STATUS.md +801 -42
- package/TROUBLESHOOT-AUTH-20260322-2019.md +291 -0
- package/TROUBLESHOOT-JEST-20260323-1357.md +139 -0
- package/TROUBLESHOOT-LAMBDA-20260322-1945.md +183 -0
- package/VISUS-CLAUDE-CODE-PROMPT.md +1 -1
- package/VISUS-PROJECT-PLAN.md +7 -0
- package/dist/browser/playwright-renderer.d.ts.map +1 -1
- package/dist/browser/playwright-renderer.js +7 -0
- package/dist/browser/playwright-renderer.js.map +1 -1
- package/dist/browser/reader.d.ts +31 -0
- package/dist/browser/reader.d.ts.map +1 -0
- package/dist/browser/reader.js +98 -0
- package/dist/browser/reader.js.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +37 -5
- package/dist/index.js.map +1 -1
- package/dist/lambda-handler.d.ts +0 -6
- package/dist/lambda-handler.d.ts.map +1 -1
- package/dist/lambda-handler.js +97 -25
- package/dist/lambda-handler.js.map +1 -1
- package/dist/sanitizer/framework-mapper.d.ts +22 -0
- package/dist/sanitizer/framework-mapper.d.ts.map +1 -0
- package/dist/sanitizer/framework-mapper.js +296 -0
- package/dist/sanitizer/framework-mapper.js.map +1 -0
- package/dist/sanitizer/index.d.ts +2 -0
- package/dist/sanitizer/index.d.ts.map +1 -1
- package/dist/sanitizer/index.js +14 -1
- package/dist/sanitizer/index.js.map +1 -1
- package/dist/sanitizer/patterns.js +1 -1
- package/dist/sanitizer/patterns.js.map +1 -1
- package/dist/sanitizer/severity-classifier.d.ts +33 -0
- package/dist/sanitizer/severity-classifier.d.ts.map +1 -0
- package/dist/sanitizer/severity-classifier.js +113 -0
- package/dist/sanitizer/severity-classifier.js.map +1 -0
- package/dist/sanitizer/threat-reporter.d.ts +65 -0
- package/dist/sanitizer/threat-reporter.d.ts.map +1 -0
- package/dist/sanitizer/threat-reporter.js +160 -0
- package/dist/sanitizer/threat-reporter.js.map +1 -0
- package/dist/tools/fetch-structured.d.ts +5 -0
- package/dist/tools/fetch-structured.d.ts.map +1 -1
- package/dist/tools/fetch-structured.js +54 -6
- package/dist/tools/fetch-structured.js.map +1 -1
- package/dist/tools/fetch.d.ts +5 -0
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +42 -9
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/read.d.ts +51 -0
- package/dist/tools/read.d.ts.map +1 -0
- package/dist/tools/read.js +127 -0
- package/dist/tools/read.js.map +1 -0
- package/dist/tools/search.d.ts +45 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +220 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/types.d.ts +64 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/format-converter.d.ts +39 -0
- package/dist/utils/format-converter.d.ts.map +1 -0
- package/dist/utils/format-converter.js +191 -0
- package/dist/utils/format-converter.js.map +1 -0
- package/dist/utils/truncate.d.ts +26 -0
- package/dist/utils/truncate.d.ts.map +1 -0
- package/dist/utils/truncate.js +54 -0
- package/dist/utils/truncate.js.map +1 -0
- package/infrastructure/stack.ts +55 -6
- package/jest.config.js +3 -0
- package/package.json +9 -2
- package/src/browser/playwright-renderer.ts +8 -0
- package/src/browser/reader.ts +129 -0
- package/src/index.ts +49 -5
- package/src/lambda-handler.ts +131 -26
- package/src/sanitizer/framework-mapper.ts +347 -0
- package/src/sanitizer/index.ts +18 -1
- package/src/sanitizer/patterns.ts +1 -1
- package/src/sanitizer/severity-classifier.ts +132 -0
- package/src/sanitizer/threat-reporter.ts +261 -0
- package/src/tools/fetch-structured.ts +58 -6
- package/src/tools/fetch.ts +44 -9
- package/src/tools/read.ts +143 -0
- package/src/tools/search.ts +263 -0
- package/src/types.ts +69 -0
- package/src/utils/format-converter.ts +236 -0
- package/src/utils/truncate.ts +64 -0
- package/tests/auth-smoke.test.ts +480 -0
- package/tests/fetch-tool.test.ts +595 -2
- package/tests/reader.test.ts +353 -0
- package/tests/sanitizer.test.ts +52 -0
- package/tests/search.test.ts +456 -0
- package/tests/threat-reporter.test.ts +266 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"truncate.d.ts","sourceRoot":"","sources":["../../src/utils/truncate.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAmBH;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG;IAChD,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,OAAO,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B,CAkBA;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token-aware content truncation utility
|
|
3
|
+
*
|
|
4
|
+
* Anthropic MCP Directory enforces a 25,000 token response limit.
|
|
5
|
+
* This utility provides safe truncation with token estimation.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Maximum tokens allowed in MCP response (Anthropic Directory limit)
|
|
9
|
+
* We target 24,000 to leave headroom for metadata/JSON structure
|
|
10
|
+
*/
|
|
11
|
+
const MAX_TOKENS = 24000;
|
|
12
|
+
/**
|
|
13
|
+
* Conservative token estimation: 1 token ≈ 4 characters
|
|
14
|
+
* This is a safe approximation that errs on the side of caution
|
|
15
|
+
*/
|
|
16
|
+
const CHARS_PER_TOKEN = 4;
|
|
17
|
+
/**
|
|
18
|
+
* Maximum characters based on token limit
|
|
19
|
+
*/
|
|
20
|
+
const MAX_CHARS = MAX_TOKENS * CHARS_PER_TOKEN; // 96,000 characters
|
|
21
|
+
/**
|
|
22
|
+
* Truncate content if it exceeds the token ceiling
|
|
23
|
+
*
|
|
24
|
+
* @param content Content to potentially truncate
|
|
25
|
+
* @returns Truncated content and metadata
|
|
26
|
+
*/
|
|
27
|
+
export function truncateContent(content) {
|
|
28
|
+
if (content.length <= MAX_CHARS) {
|
|
29
|
+
// Content is within limits
|
|
30
|
+
return {
|
|
31
|
+
content,
|
|
32
|
+
truncated: false
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
// Content exceeds limit - truncate with warning message
|
|
36
|
+
const truncatedContent = content.substring(0, MAX_CHARS);
|
|
37
|
+
const warningMessage = `\n\n--- CONTENT TRUNCATED ---\nOriginal length: ${content.length} characters (~${Math.ceil(content.length / CHARS_PER_TOKEN)} tokens)\nTruncated to: ${MAX_CHARS} characters (~${MAX_TOKENS} tokens)\nReason: Anthropic MCP Directory enforces a 25,000 token response limit\n`;
|
|
38
|
+
return {
|
|
39
|
+
content: truncatedContent + warningMessage,
|
|
40
|
+
truncated: true,
|
|
41
|
+
truncated_at_chars: MAX_CHARS
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Estimate token count for a given string
|
|
46
|
+
* Uses conservative 4 chars per token approximation
|
|
47
|
+
*
|
|
48
|
+
* @param text Text to estimate
|
|
49
|
+
* @returns Estimated token count
|
|
50
|
+
*/
|
|
51
|
+
export function estimateTokens(text) {
|
|
52
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=truncate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"truncate.js","sourceRoot":"","sources":["../../src/utils/truncate.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,MAAM,UAAU,GAAG,KAAK,CAAC;AAEzB;;;GAGG;AACH,MAAM,eAAe,GAAG,CAAC,CAAC;AAE1B;;GAEG;AACH,MAAM,SAAS,GAAG,UAAU,GAAG,eAAe,CAAC,CAAC,oBAAoB;AAEpE;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe;IAK7C,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAChC,2BAA2B;QAC3B,OAAO;YACL,OAAO;YACP,SAAS,EAAE,KAAK;SACjB,CAAC;IACJ,CAAC;IAED,wDAAwD;IACxD,MAAM,gBAAgB,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IACzD,MAAM,cAAc,GAAG,mDAAmD,OAAO,CAAC,MAAM,iBAAiB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,eAAe,CAAC,2BAA2B,SAAS,iBAAiB,UAAU,oFAAoF,CAAC;IAExS,OAAO;QACL,OAAO,EAAE,gBAAgB,GAAG,cAAc;QAC1C,SAAS,EAAE,IAAI;QACf,kBAAkB,EAAE,SAAS;KAC9B,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;AAClD,CAAC"}
|
package/infrastructure/stack.ts
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
import * as cdk from 'aws-cdk-lib';
|
|
21
21
|
import * as lambda from 'aws-cdk-lib/aws-lambda';
|
|
22
|
+
import * as lambdaNodejs from 'aws-cdk-lib/aws-lambda-nodejs';
|
|
22
23
|
import * as apigateway from 'aws-cdk-lib/aws-apigateway';
|
|
23
24
|
import * as cognito from 'aws-cdk-lib/aws-cognito';
|
|
24
25
|
import * as dynamodb from 'aws-cdk-lib/aws-dynamodb';
|
|
@@ -69,6 +70,7 @@ export class VisusStack extends cdk.Stack {
|
|
|
69
70
|
removalPolicy: environment === 'prod'
|
|
70
71
|
? cdk.RemovalPolicy.RETAIN
|
|
71
72
|
: cdk.RemovalPolicy.DESTROY,
|
|
73
|
+
timeToLiveAttribute: 'ttl', // Auto-delete audit logs after 30 days
|
|
72
74
|
});
|
|
73
75
|
|
|
74
76
|
// Global Secondary Index for querying by request_id
|
|
@@ -142,12 +144,12 @@ export class VisusStack extends cdk.Stack {
|
|
|
142
144
|
// Grant KMS decrypt access (for reading encrypted DynamoDB data if needed)
|
|
143
145
|
kmsKey.grantEncryptDecrypt(lambdaRole);
|
|
144
146
|
|
|
145
|
-
// Lambda function
|
|
146
|
-
const visusFn = new
|
|
147
|
+
// Lambda function (NodejsFunction with automatic bundling)
|
|
148
|
+
const visusFn = new lambdaNodejs.NodejsFunction(this, 'VisusFunction', {
|
|
147
149
|
functionName: `visus-mcp-${environment}`,
|
|
148
150
|
runtime: lambda.Runtime.NODEJS_20_X,
|
|
149
|
-
|
|
150
|
-
|
|
151
|
+
entry: 'src/lambda-handler.ts', // Entry point for bundler
|
|
152
|
+
handler: 'handler', // Export name in the entry file
|
|
151
153
|
timeout: cdk.Duration.seconds(30), // Playwright page loads can take time
|
|
152
154
|
memorySize: 1024, // Chromium requires significant memory
|
|
153
155
|
reservedConcurrentExecutions: environment === 'prod' ? 100 : 10, // RULE 7: Cost protection
|
|
@@ -155,12 +157,21 @@ export class VisusStack extends cdk.Stack {
|
|
|
155
157
|
environment: {
|
|
156
158
|
AUDIT_TABLE_NAME: auditTable.tableName,
|
|
157
159
|
ENVIRONMENT: environment,
|
|
160
|
+
ALLOWED_ORIGINS: 'https://claude.ai,https://app.claude.ai,http://localhost:3000',
|
|
158
161
|
NODE_OPTIONS: '--enable-source-maps', // For debugging
|
|
159
162
|
},
|
|
160
163
|
logRetention: environment === 'prod'
|
|
161
164
|
? logs.RetentionDays.ONE_MONTH
|
|
162
165
|
: logs.RetentionDays.ONE_WEEK,
|
|
163
166
|
description: `Visus MCP sanitization service (${environment})`,
|
|
167
|
+
bundling: {
|
|
168
|
+
minify: false, // Keep readable for debugging
|
|
169
|
+
sourceMap: true,
|
|
170
|
+
externalModules: [
|
|
171
|
+
'playwright-core', // Playwright is huge, will be added via layer
|
|
172
|
+
'@sparticuz/chromium', // Chromium binary
|
|
173
|
+
],
|
|
174
|
+
},
|
|
164
175
|
});
|
|
165
176
|
|
|
166
177
|
// ========================================
|
|
@@ -178,12 +189,44 @@ export class VisusStack extends cdk.Stack {
|
|
|
178
189
|
metricsEnabled: true,
|
|
179
190
|
},
|
|
180
191
|
defaultCorsPreflightOptions: {
|
|
181
|
-
allowOrigins:
|
|
182
|
-
|
|
192
|
+
allowOrigins: [
|
|
193
|
+
'https://claude.ai',
|
|
194
|
+
'https://app.claude.ai',
|
|
195
|
+
'http://localhost:3000', // local dev only
|
|
196
|
+
],
|
|
197
|
+
allowMethods: ['POST', 'OPTIONS'],
|
|
183
198
|
allowHeaders: ['Content-Type', 'Authorization'],
|
|
184
199
|
},
|
|
185
200
|
});
|
|
186
201
|
|
|
202
|
+
// Usage plan with rate limiting and quota
|
|
203
|
+
const usagePlan = api.addUsagePlan('VisusUsagePlan', {
|
|
204
|
+
name: `visus-usage-plan-${environment}`,
|
|
205
|
+
description: 'Rate limiting and quota management for Visus API',
|
|
206
|
+
throttle: {
|
|
207
|
+
rateLimit: 10, // 10 requests per second
|
|
208
|
+
burstLimit: 20, // 20 request burst
|
|
209
|
+
},
|
|
210
|
+
quota: {
|
|
211
|
+
limit: 1000, // 1000 requests per day
|
|
212
|
+
period: apigateway.Period.DAY,
|
|
213
|
+
},
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// Add deployment stage to usage plan
|
|
217
|
+
usagePlan.addApiStage({
|
|
218
|
+
stage: api.deploymentStage,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// Create API key for the usage plan
|
|
222
|
+
const apiKey = api.addApiKey('VisusApiKey', {
|
|
223
|
+
apiKeyName: `visus-api-key-${environment}`,
|
|
224
|
+
description: `API key for Visus ${environment} environment`,
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
// Associate API key with usage plan
|
|
228
|
+
usagePlan.addApiKey(apiKey);
|
|
229
|
+
|
|
187
230
|
// Cognito authorizer
|
|
188
231
|
const authorizer = new apigateway.CognitoUserPoolsAuthorizer(this, 'VisusAuthorizer', {
|
|
189
232
|
cognitoUserPools: [userPool],
|
|
@@ -244,5 +287,11 @@ export class VisusStack extends cdk.Stack {
|
|
|
244
287
|
description: 'Lambda function ARN',
|
|
245
288
|
exportName: `visus-lambda-arn-${environment}`,
|
|
246
289
|
});
|
|
290
|
+
|
|
291
|
+
new cdk.CfnOutput(this, 'ApiKeyId', {
|
|
292
|
+
value: apiKey.keyId,
|
|
293
|
+
description: 'API Gateway API Key ID (use aws apigateway get-api-key to retrieve value)',
|
|
294
|
+
exportName: `visus-api-key-id-${environment}`,
|
|
295
|
+
});
|
|
247
296
|
}
|
|
248
297
|
}
|
package/jest.config.js
CHANGED
|
@@ -15,6 +15,9 @@ export default {
|
|
|
15
15
|
},
|
|
16
16
|
testMatch: ['**/tests/**/*.test.ts'],
|
|
17
17
|
testPathIgnorePatterns: ['/node_modules/', '/dist/'],
|
|
18
|
+
transformIgnorePatterns: [
|
|
19
|
+
'node_modules/(?!(@mozilla/readability|jsdom|@exodus/bytes|html-encoding-sniffer|@toon-format)/)',
|
|
20
|
+
],
|
|
18
21
|
testTimeout: 15000,
|
|
19
22
|
forceExit: true,
|
|
20
23
|
detectOpenHandles: false,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "visus-mcp",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Secure web access for Claude
|
|
3
|
+
"version": "0.6.0",
|
|
4
|
+
"description": "Secure, sanitized web access for Claude. Detects prompt injection (43 patterns), redacts PII, renders JavaScript pages, generates NIST AI 600-1 / OWASP / MITRE ATLAS threat reports, and auto-detects JSON/XML/RSS content types.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"visus-mcp": "dist/index.js"
|
|
@@ -39,15 +39,22 @@
|
|
|
39
39
|
"node": ">=18"
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
|
+
"@aws-sdk/client-dynamodb": "^3.1014.0",
|
|
43
|
+
"@aws-sdk/lib-dynamodb": "^3.1014.0",
|
|
42
44
|
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
45
|
+
"@mozilla/readability": "^0.6.0",
|
|
43
46
|
"@playwright/test": "^1.58.2",
|
|
47
|
+
"@toon-format/toon": "^2.1.0",
|
|
44
48
|
"cheerio": "^1.2.0",
|
|
49
|
+
"fast-xml-parser": "^5.5.8",
|
|
50
|
+
"jsdom": "^29.0.1",
|
|
45
51
|
"playwright": "^1.58.2",
|
|
46
52
|
"undici": "^7.24.5"
|
|
47
53
|
},
|
|
48
54
|
"devDependencies": {
|
|
49
55
|
"@types/aws-lambda": "^8.10.161",
|
|
50
56
|
"@types/jest": "^29.5.14",
|
|
57
|
+
"@types/jsdom": "^28.0.1",
|
|
51
58
|
"@types/node": "^20.19.37",
|
|
52
59
|
"aws-cdk": "^2.1112.0",
|
|
53
60
|
"aws-cdk-lib": "^2.244.0",
|
|
@@ -126,6 +126,7 @@ async function renderWithLambda(
|
|
|
126
126
|
html: body.html,
|
|
127
127
|
title: body.title,
|
|
128
128
|
url,
|
|
129
|
+
contentType: 'text/html', // Lambda renderer defaults to HTML
|
|
129
130
|
text: undefined, // Lambda renderer doesn't extract text
|
|
130
131
|
});
|
|
131
132
|
|
|
@@ -164,6 +165,12 @@ async function renderWithFetch(
|
|
|
164
165
|
|
|
165
166
|
const html = await response.body.text();
|
|
166
167
|
|
|
168
|
+
// Capture Content-Type header
|
|
169
|
+
const contentTypeHeader = response.headers['content-type'];
|
|
170
|
+
const contentType = typeof contentTypeHeader === 'string'
|
|
171
|
+
? contentTypeHeader.split(';')[0].trim() // Remove charset and other params
|
|
172
|
+
: 'text/html'; // Default to HTML if missing
|
|
173
|
+
|
|
167
174
|
// Extract title using regex (simple fallback)
|
|
168
175
|
const titleMatch = html.match(/<title[^>]*>(.*?)<\/title>/i);
|
|
169
176
|
const title = titleMatch ? titleMatch[1].trim() : '';
|
|
@@ -172,6 +179,7 @@ async function renderWithFetch(
|
|
|
172
179
|
html,
|
|
173
180
|
title,
|
|
174
181
|
url,
|
|
182
|
+
contentType,
|
|
175
183
|
text: undefined,
|
|
176
184
|
});
|
|
177
185
|
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reader Mode - Mozilla Readability Integration
|
|
3
|
+
*
|
|
4
|
+
* Extracts clean article content from web pages using Mozilla's Readability.js.
|
|
5
|
+
* This module strips navigation, ads, and boilerplate to return main article content.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: Content extraction happens BEFORE sanitization. The pipeline is:
|
|
8
|
+
* Playwright renders → Readability extracts → Sanitizer runs → Token ceiling applied
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { Readability } from '@mozilla/readability';
|
|
12
|
+
import { JSDOM } from 'jsdom';
|
|
13
|
+
import type { Result } from '../types.js';
|
|
14
|
+
import { Ok, Err } from '../types.js';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Result from reader mode extraction
|
|
18
|
+
*/
|
|
19
|
+
export interface ReaderResult {
|
|
20
|
+
title: string;
|
|
21
|
+
byline: string | null; // Author
|
|
22
|
+
publishedTime: string | null; // ISO timestamp or null
|
|
23
|
+
content: string; // Extracted text content
|
|
24
|
+
excerpt: string | null; // Short summary
|
|
25
|
+
wordCount: number; // Estimated word count
|
|
26
|
+
readerModeAvailable: boolean; // True if Readability succeeded
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Extract clean article content using Mozilla Readability
|
|
31
|
+
*
|
|
32
|
+
* @param html - Rendered HTML from Playwright
|
|
33
|
+
* @param url - Original URL (required for relative link resolution)
|
|
34
|
+
* @returns Result containing extracted article or fallback to full HTML
|
|
35
|
+
*/
|
|
36
|
+
export function extractArticle(
|
|
37
|
+
html: string,
|
|
38
|
+
url: string
|
|
39
|
+
): Result<ReaderResult, Error> {
|
|
40
|
+
try {
|
|
41
|
+
// Parse HTML with jsdom
|
|
42
|
+
const dom = new JSDOM(html, { url });
|
|
43
|
+
const document = dom.window.document;
|
|
44
|
+
|
|
45
|
+
// Attempt extraction with Readability
|
|
46
|
+
const reader = new Readability(document);
|
|
47
|
+
const article = reader.parse();
|
|
48
|
+
|
|
49
|
+
// If Readability succeeds, return extracted content
|
|
50
|
+
if (article && article.textContent) {
|
|
51
|
+
const wordCount = estimateWordCount(article.textContent);
|
|
52
|
+
|
|
53
|
+
return Ok({
|
|
54
|
+
title: article.title || 'Untitled',
|
|
55
|
+
byline: article.byline || null,
|
|
56
|
+
publishedTime: article.publishedTime || null,
|
|
57
|
+
content: article.textContent,
|
|
58
|
+
excerpt: article.excerpt || null,
|
|
59
|
+
wordCount,
|
|
60
|
+
readerModeAvailable: true
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Readability failed - fallback to raw text extraction
|
|
65
|
+
const fallbackText = extractFallbackText(document);
|
|
66
|
+
const wordCount = estimateWordCount(fallbackText);
|
|
67
|
+
|
|
68
|
+
// Extract title from <title> tag as fallback
|
|
69
|
+
const titleElement = document.querySelector('title');
|
|
70
|
+
const fallbackTitle = titleElement?.textContent?.trim() || 'Untitled';
|
|
71
|
+
|
|
72
|
+
return Ok({
|
|
73
|
+
title: fallbackTitle,
|
|
74
|
+
byline: null,
|
|
75
|
+
publishedTime: null,
|
|
76
|
+
content: fallbackText,
|
|
77
|
+
excerpt: null,
|
|
78
|
+
wordCount,
|
|
79
|
+
readerModeAvailable: false
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
} catch (error) {
|
|
83
|
+
return Err(
|
|
84
|
+
error instanceof Error
|
|
85
|
+
? error
|
|
86
|
+
: new Error(`Reader extraction failed: ${String(error)}`)
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Estimate word count from text content
|
|
93
|
+
*
|
|
94
|
+
* @param text - Text content to count
|
|
95
|
+
* @returns Estimated word count
|
|
96
|
+
*/
|
|
97
|
+
function estimateWordCount(text: string): number {
|
|
98
|
+
if (!text || text.trim().length === 0) {
|
|
99
|
+
return 0;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Split on whitespace and filter out empty strings
|
|
103
|
+
const words = text.trim().split(/\s+/).filter(word => word.length > 0);
|
|
104
|
+
return words.length;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Fallback text extraction when Readability fails
|
|
109
|
+
*
|
|
110
|
+
* Extracts visible text from the page, skipping script/style elements.
|
|
111
|
+
*
|
|
112
|
+
* @param document - JSDOM document
|
|
113
|
+
* @returns Extracted text content
|
|
114
|
+
*/
|
|
115
|
+
function extractFallbackText(document: Document): string {
|
|
116
|
+
// Remove script and style elements
|
|
117
|
+
const scripts = document.querySelectorAll('script, style, noscript');
|
|
118
|
+
scripts.forEach(el => el.remove());
|
|
119
|
+
|
|
120
|
+
// Extract body text
|
|
121
|
+
const bodyText = document.body?.textContent || '';
|
|
122
|
+
|
|
123
|
+
// Clean up whitespace
|
|
124
|
+
return bodyText
|
|
125
|
+
.split('\n')
|
|
126
|
+
.map(line => line.trim())
|
|
127
|
+
.filter(line => line.length > 0)
|
|
128
|
+
.join('\n');
|
|
129
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -27,6 +27,8 @@ import {
|
|
|
27
27
|
|
|
28
28
|
import { visusFetch, visusFetchToolDefinition } from './tools/fetch.js';
|
|
29
29
|
import { visusFetchStructured, visusFetchStructuredToolDefinition } from './tools/fetch-structured.js';
|
|
30
|
+
import { visusRead, visusReadToolDefinition } from './tools/read.js';
|
|
31
|
+
import { visusSearch, visusSearchToolDefinition } from './tools/search.js';
|
|
30
32
|
import { closeBrowser } from './browser/playwright-renderer.js';
|
|
31
33
|
import { detectRuntime, logRuntimeConfig, validateRuntime } from './runtime.js';
|
|
32
34
|
|
|
@@ -52,7 +54,9 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
52
54
|
return {
|
|
53
55
|
tools: [
|
|
54
56
|
visusFetchToolDefinition,
|
|
55
|
-
visusFetchStructuredToolDefinition
|
|
57
|
+
visusFetchStructuredToolDefinition,
|
|
58
|
+
visusReadToolDefinition,
|
|
59
|
+
visusSearchToolDefinition
|
|
56
60
|
]
|
|
57
61
|
};
|
|
58
62
|
});
|
|
@@ -105,6 +109,46 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
105
109
|
};
|
|
106
110
|
}
|
|
107
111
|
|
|
112
|
+
case 'visus_read': {
|
|
113
|
+
const result = await visusRead(args as any);
|
|
114
|
+
|
|
115
|
+
if (!result.ok) {
|
|
116
|
+
throw new McpError(
|
|
117
|
+
ErrorCode.InternalError,
|
|
118
|
+
`visus_read failed: ${result.error.message}`
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
content: [
|
|
124
|
+
{
|
|
125
|
+
type: 'text',
|
|
126
|
+
text: JSON.stringify(result.value, null, 2)
|
|
127
|
+
}
|
|
128
|
+
]
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
case 'visus_search': {
|
|
133
|
+
const result = await visusSearch(args as any);
|
|
134
|
+
|
|
135
|
+
if (!result.ok) {
|
|
136
|
+
throw new McpError(
|
|
137
|
+
ErrorCode.InternalError,
|
|
138
|
+
`visus_search failed: ${result.error.message}`
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
content: [
|
|
144
|
+
{
|
|
145
|
+
type: 'text',
|
|
146
|
+
text: JSON.stringify(result.value, null, 2)
|
|
147
|
+
}
|
|
148
|
+
]
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
108
152
|
default:
|
|
109
153
|
throw new McpError(
|
|
110
154
|
ErrorCode.MethodNotFound,
|
|
@@ -138,7 +182,7 @@ async function startMcpServer() {
|
|
|
138
182
|
event: 'mcp_server_started',
|
|
139
183
|
name: 'visus-mcp',
|
|
140
184
|
version: '0.2.0',
|
|
141
|
-
tools: ['visus_fetch', 'visus_fetch_structured']
|
|
185
|
+
tools: ['visus_fetch', 'visus_fetch_structured', 'visus_read', 'visus_search']
|
|
142
186
|
}));
|
|
143
187
|
|
|
144
188
|
// Graceful shutdown
|
|
@@ -188,9 +232,9 @@ async function main() {
|
|
|
188
232
|
}
|
|
189
233
|
}
|
|
190
234
|
|
|
191
|
-
// Export Lambda
|
|
192
|
-
//
|
|
193
|
-
export { handler
|
|
235
|
+
// Export Lambda handler (for AWS deployment)
|
|
236
|
+
// This is only used when the file is imported as a module by Lambda runtime
|
|
237
|
+
export { handler } from './lambda-handler.js';
|
|
194
238
|
|
|
195
239
|
// Run stdio MCP server when executed directly (not in Lambda)
|
|
196
240
|
if (!process.env.AWS_LAMBDA_FUNCTION_NAME) {
|
package/src/lambda-handler.ts
CHANGED
|
@@ -14,10 +14,16 @@
|
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
import type { APIGatewayProxyEvent, APIGatewayProxyResult, Context } from 'aws-lambda';
|
|
17
|
+
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
|
18
|
+
import { DynamoDBDocumentClient, PutCommand } from '@aws-sdk/lib-dynamodb';
|
|
17
19
|
import { visusFetch } from './tools/fetch.js';
|
|
18
20
|
import { visusFetchStructured } from './tools/fetch-structured.js';
|
|
19
21
|
import { closeBrowser } from './browser/playwright-renderer.js';
|
|
20
22
|
|
|
23
|
+
// Initialize DynamoDB client
|
|
24
|
+
const ddbClient = new DynamoDBClient({});
|
|
25
|
+
const docClient = DynamoDBDocumentClient.from(ddbClient);
|
|
26
|
+
|
|
21
27
|
/**
|
|
22
28
|
* API request body for visus_fetch
|
|
23
29
|
*/
|
|
@@ -36,6 +42,63 @@ interface FetchStructuredRequest {
|
|
|
36
42
|
timeout_ms?: number;
|
|
37
43
|
}
|
|
38
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Fire-and-forget audit logging to DynamoDB
|
|
47
|
+
*
|
|
48
|
+
* Logs request metadata without blocking the response.
|
|
49
|
+
* Errors are logged but do not affect the API response.
|
|
50
|
+
*
|
|
51
|
+
* @param userId User ID from Cognito JWT
|
|
52
|
+
* @param requestId AWS request ID
|
|
53
|
+
* @param url URL being fetched
|
|
54
|
+
* @param endpoint API endpoint (/fetch or /fetch-structured)
|
|
55
|
+
* @param patternsDetected Sanitization patterns detected
|
|
56
|
+
* @param piiRedacted PII types redacted
|
|
57
|
+
*/
|
|
58
|
+
function logAuditEvent(
|
|
59
|
+
userId: string,
|
|
60
|
+
requestId: string,
|
|
61
|
+
url: string,
|
|
62
|
+
endpoint: string,
|
|
63
|
+
patternsDetected: string[],
|
|
64
|
+
piiRedacted: string[]
|
|
65
|
+
): void {
|
|
66
|
+
const tableName = process.env.AUDIT_TABLE_NAME;
|
|
67
|
+
|
|
68
|
+
if (!tableName) {
|
|
69
|
+
console.error('AUDIT_TABLE_NAME not set - skipping audit logging');
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const now = new Date();
|
|
74
|
+
const ttl = Math.floor(now.getTime() / 1000) + (30 * 24 * 60 * 60); // 30 days from now
|
|
75
|
+
|
|
76
|
+
const item = {
|
|
77
|
+
user_id: userId,
|
|
78
|
+
timestamp: now.toISOString(),
|
|
79
|
+
request_id: requestId,
|
|
80
|
+
url,
|
|
81
|
+
endpoint,
|
|
82
|
+
patterns_detected: patternsDetected,
|
|
83
|
+
pii_redacted: piiRedacted,
|
|
84
|
+
ttl, // Auto-delete after 30 days
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// Fire-and-forget: do not await
|
|
88
|
+
docClient.send(new PutCommand({
|
|
89
|
+
TableName: tableName,
|
|
90
|
+
Item: item,
|
|
91
|
+
})).catch((error: unknown) => {
|
|
92
|
+
// Log error but do not throw (fire-and-forget pattern)
|
|
93
|
+
console.error(JSON.stringify({
|
|
94
|
+
timestamp: now.toISOString(),
|
|
95
|
+
event: 'audit_logging_failed',
|
|
96
|
+
error: error instanceof Error ? error.message : String(error),
|
|
97
|
+
request_id: requestId,
|
|
98
|
+
}));
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
39
102
|
/**
|
|
40
103
|
* Lambda handler for Visus API
|
|
41
104
|
*
|
|
@@ -65,10 +128,14 @@ export async function handler(
|
|
|
65
128
|
}));
|
|
66
129
|
|
|
67
130
|
try {
|
|
68
|
-
// CORS headers for all responses
|
|
131
|
+
// CORS headers for all responses (environment-variable-driven allowlist)
|
|
132
|
+
const allowedOrigins = (process.env.ALLOWED_ORIGINS || '*').split(',');
|
|
133
|
+
const origin = event.headers.origin || event.headers.Origin || '';
|
|
134
|
+
const allowOrigin = allowedOrigins.includes(origin) ? origin : allowedOrigins[0] || '*';
|
|
135
|
+
|
|
69
136
|
const corsHeaders = {
|
|
70
|
-
'Access-Control-Allow-Origin':
|
|
71
|
-
'Access-Control-Allow-Methods': 'POST, OPTIONS',
|
|
137
|
+
'Access-Control-Allow-Origin': allowOrigin,
|
|
138
|
+
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
|
|
72
139
|
'Access-Control-Allow-Headers': 'Content-Type, Authorization',
|
|
73
140
|
'Content-Type': 'application/json',
|
|
74
141
|
};
|
|
@@ -82,7 +149,22 @@ export async function handler(
|
|
|
82
149
|
};
|
|
83
150
|
}
|
|
84
151
|
|
|
85
|
-
//
|
|
152
|
+
// Health check endpoint (no auth required, allows GET and POST)
|
|
153
|
+
// SECURITY FIX (FINDING 2): Moved before POST-only validation to support standard GET health checks
|
|
154
|
+
if (event.path === '/health' || event.path === '/dev/health' || event.path === '/prod/health') {
|
|
155
|
+
return {
|
|
156
|
+
statusCode: 200,
|
|
157
|
+
headers: corsHeaders,
|
|
158
|
+
body: JSON.stringify({
|
|
159
|
+
status: 'healthy',
|
|
160
|
+
service: 'visus-mcp',
|
|
161
|
+
version: '0.3.1',
|
|
162
|
+
timestamp: new Date().toISOString(),
|
|
163
|
+
}),
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Only allow POST requests for protected endpoints
|
|
86
168
|
if (event.httpMethod !== 'POST') {
|
|
87
169
|
return {
|
|
88
170
|
statusCode: 405,
|
|
@@ -103,8 +185,31 @@ export async function handler(
|
|
|
103
185
|
};
|
|
104
186
|
}
|
|
105
187
|
|
|
188
|
+
// SECURITY FIX (FINDING 1): Application-level authentication enforcement
|
|
189
|
+
// Extract user ID from Cognito authorizer
|
|
190
|
+
const userId = event.requestContext.authorizer?.claims?.sub;
|
|
191
|
+
|
|
192
|
+
// Require authentication for all protected endpoints (not already handled above)
|
|
193
|
+
if (!userId) {
|
|
194
|
+
console.error(JSON.stringify({
|
|
195
|
+
timestamp: new Date().toISOString(),
|
|
196
|
+
event: 'auth_required',
|
|
197
|
+
request_id: requestId,
|
|
198
|
+
path: event.path,
|
|
199
|
+
reason: 'Missing Cognito authorizer context - Lambda must be invoked via API Gateway',
|
|
200
|
+
}));
|
|
201
|
+
|
|
202
|
+
return {
|
|
203
|
+
statusCode: 401,
|
|
204
|
+
headers: corsHeaders,
|
|
205
|
+
body: JSON.stringify({
|
|
206
|
+
error: 'Unauthorized: Authentication required. This Lambda must be invoked via API Gateway with Cognito authorizer.',
|
|
207
|
+
}),
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
106
211
|
// Route based on path
|
|
107
|
-
if (event.path === '/fetch' || event.path === '/prod/fetch') {
|
|
212
|
+
if (event.path === '/fetch' || event.path === '/dev/fetch' || event.path === '/prod/fetch') {
|
|
108
213
|
const fetchReq = body as FetchRequest;
|
|
109
214
|
|
|
110
215
|
// Validate request
|
|
@@ -127,6 +232,16 @@ export async function handler(
|
|
|
127
232
|
};
|
|
128
233
|
}
|
|
129
234
|
|
|
235
|
+
// Fire-and-forget audit logging
|
|
236
|
+
logAuditEvent(
|
|
237
|
+
userId,
|
|
238
|
+
requestId,
|
|
239
|
+
fetchReq.url,
|
|
240
|
+
'/fetch',
|
|
241
|
+
result.value.sanitization.patterns_detected,
|
|
242
|
+
result.value.sanitization.pii_types_redacted
|
|
243
|
+
);
|
|
244
|
+
|
|
130
245
|
return {
|
|
131
246
|
statusCode: 200,
|
|
132
247
|
headers: corsHeaders,
|
|
@@ -134,7 +249,7 @@ export async function handler(
|
|
|
134
249
|
};
|
|
135
250
|
}
|
|
136
251
|
|
|
137
|
-
if (event.path === '/fetch-structured' || event.path === '/prod/fetch-structured') {
|
|
252
|
+
if (event.path === '/fetch-structured' || event.path === '/dev/fetch-structured' || event.path === '/prod/fetch-structured') {
|
|
138
253
|
const fetchReq = body as FetchStructuredRequest;
|
|
139
254
|
|
|
140
255
|
// Validate request
|
|
@@ -165,6 +280,16 @@ export async function handler(
|
|
|
165
280
|
};
|
|
166
281
|
}
|
|
167
282
|
|
|
283
|
+
// Fire-and-forget audit logging
|
|
284
|
+
logAuditEvent(
|
|
285
|
+
userId,
|
|
286
|
+
requestId,
|
|
287
|
+
fetchReq.url,
|
|
288
|
+
'/fetch-structured',
|
|
289
|
+
result.value.sanitization.patterns_detected,
|
|
290
|
+
result.value.sanitization.pii_types_redacted
|
|
291
|
+
);
|
|
292
|
+
|
|
168
293
|
return {
|
|
169
294
|
statusCode: 200,
|
|
170
295
|
headers: corsHeaders,
|
|
@@ -203,23 +328,3 @@ export async function handler(
|
|
|
203
328
|
await closeBrowser();
|
|
204
329
|
}
|
|
205
330
|
}
|
|
206
|
-
|
|
207
|
-
/**
|
|
208
|
-
* Health check handler
|
|
209
|
-
*
|
|
210
|
-
* @returns API Gateway response
|
|
211
|
-
*/
|
|
212
|
-
export async function healthCheck(): Promise<APIGatewayProxyResult> {
|
|
213
|
-
return {
|
|
214
|
-
statusCode: 200,
|
|
215
|
-
headers: {
|
|
216
|
-
'Content-Type': 'application/json',
|
|
217
|
-
},
|
|
218
|
-
body: JSON.stringify({
|
|
219
|
-
status: 'healthy',
|
|
220
|
-
service: 'visus-mcp',
|
|
221
|
-
version: '0.2.0',
|
|
222
|
-
timestamp: new Date().toISOString(),
|
|
223
|
-
}),
|
|
224
|
-
};
|
|
225
|
-
}
|