@j0hanz/superfetch 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -45
- package/dist/config/formatting.d.ts +1 -1
- package/dist/config/types/content.d.ts +3 -3
- package/dist/config/types/runtime.d.ts +1 -1
- package/dist/config/types/tools.d.ts +12 -12
- package/dist/http/cors.js +23 -23
- package/dist/http/download-routes.js +9 -4
- package/dist/http/mcp-routes.js +2 -13
- package/dist/http/mcp-validation.js +1 -1
- package/dist/http/server-middleware.js +2 -1
- package/dist/http/server.js +2 -0
- package/dist/index.js +5 -0
- package/dist/middleware/error-handler.js +1 -1
- package/dist/resources/cached-content.js +8 -4
- package/dist/server.js +2 -0
- package/dist/services/cache.d.ts +1 -1
- package/dist/services/cache.js +20 -7
- package/dist/services/context.d.ts +2 -4
- package/dist/services/context.js +1 -1
- package/dist/services/extractor.js +26 -21
- package/dist/services/fetcher/interceptors.d.ts +22 -0
- package/dist/services/fetcher/interceptors.js +18 -8
- package/dist/services/fetcher/response.js +32 -24
- package/dist/services/fetcher.d.ts +0 -1
- package/dist/services/fetcher.js +5 -7
- package/dist/services/metadata-collector.d.ts +10 -0
- package/dist/services/metadata-collector.js +11 -0
- package/dist/services/parser.js +26 -25
- package/dist/services/transform-worker-pool.d.ts +14 -0
- package/dist/services/transform-worker-pool.js +167 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +9 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +58 -30
- package/dist/tools/handlers/fetch-single.shared.d.ts +8 -3
- package/dist/tools/handlers/fetch-single.shared.js +42 -17
- package/dist/tools/handlers/fetch-url.tool.js +46 -16
- package/dist/tools/index.js +13 -0
- package/dist/tools/schemas.d.ts +33 -30
- package/dist/tools/schemas.js +4 -0
- package/dist/tools/utils/common.js +20 -16
- package/dist/tools/utils/content-transform-async.d.ts +6 -0
- package/dist/tools/utils/content-transform-async.js +33 -0
- package/dist/tools/utils/content-transform.d.ts +4 -1
- package/dist/tools/utils/content-transform.js +7 -2
- package/dist/tools/utils/fetch-pipeline.js +18 -10
- package/dist/utils/content-cleaner.d.ts +1 -1
- package/dist/utils/download-url.d.ts +9 -1
- package/dist/utils/download-url.js +9 -6
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +7 -7
- package/dist/utils/url-validator.js +38 -0
- package/dist/workers/transform-worker.d.ts +1 -0
- package/dist/workers/transform-worker.js +50 -0
- package/package.json +4 -6
package/README.md
CHANGED
|
@@ -291,13 +291,14 @@ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#h
|
|
|
291
291
|
|
|
292
292
|
Both tools return:
|
|
293
293
|
|
|
294
|
-
- `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`)
|
|
294
|
+
- `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`; Markdown responses may also include `file`)
|
|
295
295
|
- `content` blocks that include:
|
|
296
296
|
- a `text` block containing JSON of `structuredContent`
|
|
297
|
-
- in stdio mode, a `resource` block with a `file:///...` URI
|
|
298
|
-
- in HTTP mode, a `resource` block when inline content is available
|
|
297
|
+
- in stdio mode, a `resource` block with a `file:///...` URI embedding the full content
|
|
298
|
+
- in HTTP mode, a `resource` block when inline content is available
|
|
299
|
+
- when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled, a `resource_link` block points to `superfetch://cache/...` and `structuredContent.resourceUri` is set
|
|
299
300
|
|
|
300
|
-
If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output
|
|
301
|
+
If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output, appends `...[truncated]`, and sets `truncated: true`.
|
|
301
302
|
|
|
302
303
|
---
|
|
303
304
|
|
|
@@ -305,16 +306,19 @@ If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server
|
|
|
305
306
|
|
|
306
307
|
Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
|
|
307
308
|
|
|
308
|
-
| Parameter
|
|
309
|
-
|
|
|
310
|
-
| `url`
|
|
311
|
-
| `format`
|
|
312
|
-
| `
|
|
313
|
-
| `
|
|
314
|
-
| `
|
|
315
|
-
| `
|
|
316
|
-
| `
|
|
317
|
-
| `
|
|
309
|
+
| Parameter | Type | Default | Description |
|
|
310
|
+
| ---------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------ |
|
|
311
|
+
| `url` | string | required | URL to fetch |
|
|
312
|
+
| `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
|
|
313
|
+
| `includeContentBlocks` | boolean | `true` (jsonl), `false` (markdown) | Include content block counts when `format: "markdown"` |
|
|
314
|
+
| `extractMainContent` | boolean | `true` | Use Readability to extract main content |
|
|
315
|
+
| `includeMetadata` | boolean | `true` | Include page metadata |
|
|
316
|
+
| `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
|
|
317
|
+
| `customHeaders` | object | - | Custom HTTP headers (sanitized) |
|
|
318
|
+
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
|
|
319
|
+
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
320
|
+
|
|
321
|
+
When `format: "markdown"` and `includeContentBlocks` is `false`, `contentBlocks` will be `0`.
|
|
318
322
|
|
|
319
323
|
**Example `structuredContent`:**
|
|
320
324
|
|
|
@@ -337,15 +341,15 @@ Fetches a webpage and converts it to AI-readable JSONL format with semantic cont
|
|
|
337
341
|
|
|
338
342
|
Fetches a webpage and converts it to clean Markdown with optional frontmatter.
|
|
339
343
|
|
|
340
|
-
| Parameter | Type | Default | Description
|
|
341
|
-
| -------------------- | ------- | -------- |
|
|
342
|
-
| `url` | string | required | URL to fetch
|
|
343
|
-
| `extractMainContent` | boolean | `true` | Extract main content only
|
|
344
|
-
| `includeMetadata` | boolean | `true` | Include YAML frontmatter
|
|
345
|
-
| `maxContentLength` | number | - | Maximum content length in characters
|
|
346
|
-
| `customHeaders` | object | - | Custom HTTP headers (sanitized)
|
|
347
|
-
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000)
|
|
348
|
-
| `retries` | number | `3` | Number of retry attempts (1-10)
|
|
344
|
+
| Parameter | Type | Default | Description |
|
|
345
|
+
| -------------------- | ------- | -------- | ---------------------------------------------------- |
|
|
346
|
+
| `url` | string | required | URL to fetch |
|
|
347
|
+
| `extractMainContent` | boolean | `true` | Extract main content only |
|
|
348
|
+
| `includeMetadata` | boolean | `true` | Include YAML frontmatter |
|
|
349
|
+
| `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
|
|
350
|
+
| `customHeaders` | object | - | Custom HTTP headers (sanitized) |
|
|
351
|
+
| `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
|
|
352
|
+
| `retries` | number | `3` | Number of retry attempts (1-10) |
|
|
349
353
|
|
|
350
354
|
**Example `structuredContent`:**
|
|
351
355
|
|
|
@@ -373,9 +377,9 @@ Fetches a webpage and converts it to clean Markdown with optional frontmatter.
|
|
|
373
377
|
### Large Content Handling
|
|
374
378
|
|
|
375
379
|
- Inline limit is configurable via `MAX_INLINE_CONTENT_CHARS` (see `CONFIGURATION.md`).
|
|
376
|
-
- If content exceeds the limit and cache is enabled, responses include `resourceUri` and a `resource_link` block.
|
|
377
|
-
- If cache is disabled, content is truncated with `...[truncated]`.
|
|
378
|
-
- Use `maxContentLength` per request to enforce a lower limit.
|
|
380
|
+
- If content exceeds the limit and cache is enabled, responses include `resourceUri`/`resourceMimeType` and a `resource_link` block.
|
|
381
|
+
- If cache is disabled, content is truncated with `...[truncated]` and `truncated: true`.
|
|
382
|
+
- Use `maxContentLength` per request to enforce a lower limit (hard cap: 5,242,880 characters).
|
|
379
383
|
- Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
|
|
380
384
|
|
|
381
385
|
---
|
|
@@ -446,8 +450,12 @@ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
|
|
|
446
450
|
2. The server returns `mcp-session-id` in the response headers.
|
|
447
451
|
3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
|
|
448
452
|
|
|
453
|
+
`GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
|
|
454
|
+
|
|
449
455
|
If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
|
|
450
456
|
|
|
457
|
+
Host header validation is always enforced in HTTP mode. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send. If an `Origin` header is present, it must be allowed by `ALLOWED_ORIGINS` or `CORS_ALLOW_ALL`.
|
|
458
|
+
|
|
451
459
|
---
|
|
452
460
|
|
|
453
461
|
## Content Block Types
|
|
@@ -473,17 +481,22 @@ JSONL output includes semantic content blocks:
|
|
|
473
481
|
|
|
474
482
|
Blocked destinations include:
|
|
475
483
|
|
|
476
|
-
-
|
|
477
|
-
- Private
|
|
478
|
-
-
|
|
479
|
-
-
|
|
484
|
+
- Loopback and unspecified addresses (`127.0.0.0/8`, `::1`, `0.0.0.0`, `::`)
|
|
485
|
+
- Private/ULA ranges (`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`, `fc00::/7`)
|
|
486
|
+
- Link-local and shared address space (`169.254.0.0/16`, `100.64.0.0/10`, `fe80::/10`)
|
|
487
|
+
- Multicast/reserved ranges (`224.0.0.0/4`, `240.0.0.0/4`, `ff00::/8`)
|
|
488
|
+
- IPv6 transition ranges (`64:ff9b::/96`, `64:ff9b:1::/48`, `2001::/32`, `2002::/16`)
|
|
489
|
+
- Cloud metadata endpoints (AWS/GCP/Azure/Alibaba) like `169.254.169.254`, `metadata.google.internal`, `metadata.azure.com`, `100.100.100.200`, `instance-data`
|
|
480
490
|
- Internal suffixes such as `.local` and `.internal`
|
|
481
491
|
|
|
492
|
+
DNS resolution is performed and blocked if any resolved IP matches a blocked range.
|
|
493
|
+
|
|
482
494
|
### URL Validation
|
|
483
495
|
|
|
484
496
|
- Only `http` and `https` URLs
|
|
485
497
|
- No embedded credentials in URLs
|
|
486
498
|
- Max URL length: 2048 characters
|
|
499
|
+
- Hostnames ending in `.local` or `.internal` are rejected
|
|
487
500
|
|
|
488
501
|
### Header Sanitization
|
|
489
502
|
|
|
@@ -491,7 +504,7 @@ Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-i
|
|
|
491
504
|
|
|
492
505
|
### Rate Limiting
|
|
493
506
|
|
|
494
|
-
Rate limiting
|
|
507
|
+
Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RATE_LIMIT_ENABLED`, `RATE_LIMIT_MAX`, `RATE_LIMIT_WINDOW_MS`, and `RATE_LIMIT_CLEANUP_MS` (see `CONFIGURATION.md`).
|
|
495
508
|
|
|
496
509
|
---
|
|
497
510
|
|
|
@@ -499,20 +512,22 @@ Rate limiting thresholds are configurable via `RATE_LIMIT_MAX` and `RATE_LIMIT_W
|
|
|
499
512
|
|
|
500
513
|
### Scripts
|
|
501
514
|
|
|
502
|
-
| Command | Description
|
|
503
|
-
| ----------------------- |
|
|
504
|
-
| `npm run dev` | Development server with hot reload
|
|
505
|
-
| `npm run build` | Compile TypeScript
|
|
506
|
-
| `npm start` | Production server
|
|
507
|
-
| `npm run lint` | Run ESLint
|
|
508
|
-
| `npm run type-check` | TypeScript type checking
|
|
509
|
-
| `npm run format` | Format with Prettier
|
|
510
|
-
| `npm test` | Run
|
|
511
|
-
| `npm run test:coverage` | Run tests with coverage
|
|
512
|
-
| `npm run bench` | Run minimal performance benchmark
|
|
513
|
-
| `npm run release` | Create new release
|
|
514
|
-
| `npm run knip` | Find unused exports/dependencies
|
|
515
|
-
| `npm run knip:fix` | Auto-fix unused code
|
|
515
|
+
| Command | Description |
|
|
516
|
+
| ----------------------- | ------------------------------------ |
|
|
517
|
+
| `npm run dev` | Development server with hot reload |
|
|
518
|
+
| `npm run build` | Compile TypeScript |
|
|
519
|
+
| `npm start` | Production server |
|
|
520
|
+
| `npm run lint` | Run ESLint |
|
|
521
|
+
| `npm run type-check` | TypeScript type checking |
|
|
522
|
+
| `npm run format` | Format with Prettier |
|
|
523
|
+
| `npm test` | Run Node test runner (builds dist) |
|
|
524
|
+
| `npm run test:coverage` | Run tests with experimental coverage |
|
|
525
|
+
| `npm run bench` | Run minimal performance benchmark |
|
|
526
|
+
| `npm run release` | Create new release |
|
|
527
|
+
| `npm run knip` | Find unused exports/dependencies |
|
|
528
|
+
| `npm run knip:fix` | Auto-fix unused code |
|
|
529
|
+
|
|
530
|
+
> **Note:** Tests run via `node --test` with `--experimental-transform-types` to execute `.ts` test files. Node will emit an experimental warning.
|
|
516
531
|
|
|
517
532
|
### Tech Stack
|
|
518
533
|
|
|
@@ -4,4 +4,4 @@ export declare const CODE_BLOCK: {
|
|
|
4
4
|
readonly format: (code: string, language?: string) => string;
|
|
5
5
|
};
|
|
6
6
|
export declare const FRONTMATTER_DELIMITER = "---";
|
|
7
|
-
export declare const joinLines: (lines: string[]) => string;
|
|
7
|
+
export declare const joinLines: (lines: readonly string[]) => string;
|
|
@@ -22,7 +22,7 @@ export interface ParagraphBlock extends ContentBlock {
|
|
|
22
22
|
export interface ListBlock extends ContentBlock {
|
|
23
23
|
type: 'list';
|
|
24
24
|
ordered: boolean;
|
|
25
|
-
items: string[];
|
|
25
|
+
readonly items: readonly string[];
|
|
26
26
|
}
|
|
27
27
|
export interface CodeBlock extends ContentBlock {
|
|
28
28
|
type: 'code';
|
|
@@ -31,8 +31,8 @@ export interface CodeBlock extends ContentBlock {
|
|
|
31
31
|
}
|
|
32
32
|
export interface TableBlock extends ContentBlock {
|
|
33
33
|
type: 'table';
|
|
34
|
-
headers?: string[];
|
|
35
|
-
rows: string[][];
|
|
34
|
+
readonly headers?: readonly string[];
|
|
35
|
+
readonly rows: readonly (readonly string[])[];
|
|
36
36
|
}
|
|
37
37
|
export interface ImageBlock extends ContentBlock {
|
|
38
38
|
type: 'image';
|
|
@@ -65,7 +65,7 @@ export interface FetchPipelineOptions<T> {
|
|
|
65
65
|
/** Optional: cache variation input for headers/flags */
|
|
66
66
|
cacheVary?: Record<string, unknown> | string;
|
|
67
67
|
/** Transform function to process HTML into desired format */
|
|
68
|
-
transform: (html: string, url: string) => T
|
|
68
|
+
transform: (html: string, url: string) => T | Promise<T>;
|
|
69
69
|
/** Optional: serialize result for caching (defaults to JSON.stringify) */
|
|
70
70
|
serialize?: (result: T) => string;
|
|
71
71
|
/** Optional: deserialize cached content */
|
|
@@ -1,24 +1,25 @@
|
|
|
1
1
|
import type { ToolContentBlock } from './runtime.js';
|
|
2
2
|
interface RequestOptions {
|
|
3
3
|
/** Custom HTTP headers for the request */
|
|
4
|
-
customHeaders?: Record<string, string
|
|
4
|
+
customHeaders?: Record<string, string> | undefined;
|
|
5
5
|
/** Request timeout in milliseconds (1000-120000) */
|
|
6
|
-
timeout?: number;
|
|
6
|
+
timeout?: number | undefined;
|
|
7
7
|
/** Number of retry attempts (1-10) */
|
|
8
|
-
retries?: number;
|
|
8
|
+
retries?: number | undefined;
|
|
9
9
|
}
|
|
10
10
|
export interface FetchUrlInput extends RequestOptions {
|
|
11
11
|
url: string;
|
|
12
|
-
extractMainContent?: boolean;
|
|
13
|
-
includeMetadata?: boolean;
|
|
14
|
-
maxContentLength?: number;
|
|
15
|
-
format?: 'jsonl' | 'markdown';
|
|
12
|
+
extractMainContent?: boolean | undefined;
|
|
13
|
+
includeMetadata?: boolean | undefined;
|
|
14
|
+
maxContentLength?: number | undefined;
|
|
15
|
+
format?: 'jsonl' | 'markdown' | undefined;
|
|
16
|
+
includeContentBlocks?: boolean | undefined;
|
|
16
17
|
}
|
|
17
18
|
export interface FetchMarkdownInput extends RequestOptions {
|
|
18
19
|
url: string;
|
|
19
|
-
extractMainContent?: boolean;
|
|
20
|
-
includeMetadata?: boolean;
|
|
21
|
-
maxContentLength?: number;
|
|
20
|
+
extractMainContent?: boolean | undefined;
|
|
21
|
+
includeMetadata?: boolean | undefined;
|
|
22
|
+
maxContentLength?: number | undefined;
|
|
22
23
|
}
|
|
23
24
|
export interface FileDownloadInfo {
|
|
24
25
|
downloadUrl: string;
|
|
@@ -38,11 +39,10 @@ export interface ToolErrorResponse {
|
|
|
38
39
|
[x: string]: unknown;
|
|
39
40
|
content: ToolContentBlock[];
|
|
40
41
|
structuredContent: {
|
|
41
|
-
[x: string]: unknown;
|
|
42
42
|
error: string;
|
|
43
43
|
url: string;
|
|
44
44
|
errorCode: string;
|
|
45
|
-
}
|
|
45
|
+
} & Record<string, unknown>;
|
|
46
46
|
isError: true;
|
|
47
47
|
}
|
|
48
48
|
export interface ToolResponseBase {
|
package/dist/http/cors.js
CHANGED
|
@@ -13,13 +13,22 @@ function isValidOrigin(origin) {
|
|
|
13
13
|
export function createCorsMiddleware(options) {
|
|
14
14
|
return (req, res, next) => {
|
|
15
15
|
const origin = resolveOrigin(req);
|
|
16
|
-
if (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
16
|
+
if (origin) {
|
|
17
|
+
if (!isValidOrigin(origin)) {
|
|
18
|
+
res.status(403).json({
|
|
19
|
+
error: 'Origin not allowed',
|
|
20
|
+
code: 'ORIGIN_NOT_ALLOWED',
|
|
21
|
+
});
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
if (!isOriginAllowed(origin, options)) {
|
|
25
|
+
res.status(403).json({
|
|
26
|
+
error: 'Origin not allowed',
|
|
27
|
+
code: 'ORIGIN_NOT_ALLOWED',
|
|
28
|
+
});
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
applyCorsHeaders(res, origin);
|
|
23
32
|
}
|
|
24
33
|
if (req.method === 'OPTIONS') {
|
|
25
34
|
res.sendStatus(200);
|
|
@@ -31,20 +40,11 @@ export function createCorsMiddleware(options) {
|
|
|
31
40
|
function resolveOrigin(req) {
|
|
32
41
|
return req.headers.origin;
|
|
33
42
|
}
|
|
34
|
-
function
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
}
|
|
42
|
-
res.header('Access-Control-Allow-Origin', origin ?? '*');
|
|
43
|
-
res.header('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
|
|
44
|
-
res.header('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id, Authorization, X-API-Key');
|
|
45
|
-
res.header('Access-Control-Expose-Headers', 'mcp-session-id');
|
|
46
|
-
res.header('Access-Control-Max-Age', '86400');
|
|
47
|
-
return true;
|
|
48
|
-
}
|
|
49
|
-
return options.allowedOrigins.length === 0;
|
|
43
|
+
function applyCorsHeaders(res, origin) {
|
|
44
|
+
res.vary('Origin');
|
|
45
|
+
res.header('Access-Control-Allow-Origin', origin);
|
|
46
|
+
res.header('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
|
|
47
|
+
res.header('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id, Authorization, X-API-Key');
|
|
48
|
+
res.header('Access-Control-Expose-Headers', 'mcp-session-id');
|
|
49
|
+
res.header('Access-Control-Max-Age', '86400');
|
|
50
50
|
}
|
|
@@ -52,15 +52,20 @@ function resolveExtension(namespace) {
|
|
|
52
52
|
function parseCachedPayload(raw) {
|
|
53
53
|
try {
|
|
54
54
|
const parsed = JSON.parse(raw);
|
|
55
|
-
|
|
56
|
-
return parsed;
|
|
57
|
-
}
|
|
58
|
-
return null;
|
|
55
|
+
return isCachedPayload(parsed) ? parsed : null;
|
|
59
56
|
}
|
|
60
57
|
catch {
|
|
61
58
|
return null;
|
|
62
59
|
}
|
|
63
60
|
}
|
|
61
|
+
function isCachedPayload(value) {
|
|
62
|
+
if (!value || typeof value !== 'object')
|
|
63
|
+
return false;
|
|
64
|
+
const record = value;
|
|
65
|
+
return ((record.content === undefined || typeof record.content === 'string') &&
|
|
66
|
+
(record.markdown === undefined || typeof record.markdown === 'string') &&
|
|
67
|
+
(record.title === undefined || typeof record.title === 'string'));
|
|
68
|
+
}
|
|
64
69
|
function resolvePayloadContent(payload, namespace) {
|
|
65
70
|
if (namespace === 'markdown') {
|
|
66
71
|
if (typeof payload.markdown === 'string') {
|
package/dist/http/mcp-routes.js
CHANGED
|
@@ -62,15 +62,6 @@ function resolveSessionTransport(sessionId, options, res) {
|
|
|
62
62
|
options.sessionStore.touch(sessionId);
|
|
63
63
|
return session.transport;
|
|
64
64
|
}
|
|
65
|
-
function resolveSessionTransportForDelete(sessionId, options) {
|
|
66
|
-
if (!sessionId)
|
|
67
|
-
return null;
|
|
68
|
-
const session = options.sessionStore.get(sessionId);
|
|
69
|
-
if (!session)
|
|
70
|
-
return null;
|
|
71
|
-
options.sessionStore.touch(sessionId);
|
|
72
|
-
return session.transport;
|
|
73
|
-
}
|
|
74
65
|
async function handlePost(req, res, options) {
|
|
75
66
|
const sessionId = getSessionId(req);
|
|
76
67
|
const { body } = req;
|
|
@@ -91,11 +82,9 @@ async function handleGet(req, res, options) {
|
|
|
91
82
|
await handleTransportRequest(transport, req, res);
|
|
92
83
|
}
|
|
93
84
|
async function handleDelete(req, res, options) {
|
|
94
|
-
const transport =
|
|
95
|
-
if (!transport)
|
|
96
|
-
res.status(204).end();
|
|
85
|
+
const transport = resolveSessionTransport(getSessionId(req), options, res);
|
|
86
|
+
if (!transport)
|
|
97
87
|
return;
|
|
98
|
-
}
|
|
99
88
|
await handleTransportRequest(transport, req, res);
|
|
100
89
|
}
|
|
101
90
|
export function registerMcpRoutes(app, options) {
|
|
@@ -2,7 +2,7 @@ function isRecord(value) {
|
|
|
2
2
|
return value !== null && typeof value === 'object';
|
|
3
3
|
}
|
|
4
4
|
export function isMcpRequestBody(body) {
|
|
5
|
-
if (!isRecord(body))
|
|
5
|
+
if (!isRecord(body) || Array.isArray(body))
|
|
6
6
|
return false;
|
|
7
7
|
const { method, id, jsonrpc, params } = body;
|
|
8
8
|
const methodValid = method === undefined || typeof method === 'string';
|
|
@@ -84,7 +84,8 @@ export function createContextMiddleware() {
|
|
|
84
84
|
return (req, _res, next) => {
|
|
85
85
|
const requestId = randomUUID();
|
|
86
86
|
const sessionId = getSessionId(req);
|
|
87
|
-
|
|
87
|
+
const context = sessionId === undefined ? { requestId } : { requestId, sessionId };
|
|
88
|
+
runWithRequestContext(context, () => {
|
|
88
89
|
const boundNext = bindToRequestContext(next);
|
|
89
90
|
boundNext();
|
|
90
91
|
});
|
package/dist/http/server.js
CHANGED
|
@@ -2,6 +2,7 @@ import { styleText } from 'node:util';
|
|
|
2
2
|
import { config, enableHttpMode } from '../config/index.js';
|
|
3
3
|
import { destroyAgents } from '../services/fetcher.js';
|
|
4
4
|
import { logError, logInfo, logWarn } from '../services/logger.js';
|
|
5
|
+
import { destroyTransformWorkers } from '../services/transform-worker-pool.js';
|
|
5
6
|
import { errorHandler } from '../middleware/error-handler.js';
|
|
6
7
|
import { getErrorMessage } from '../utils/error-utils.js';
|
|
7
8
|
import { createAuthMiddleware } from './auth.js';
|
|
@@ -54,6 +55,7 @@ function createShutdownHandler(server, sessionStore, sessionCleanupController, s
|
|
|
54
55
|
});
|
|
55
56
|
})));
|
|
56
57
|
destroyAgents();
|
|
58
|
+
destroyTransformWorkers();
|
|
57
59
|
server.close(() => {
|
|
58
60
|
logInfo('HTTP server closed');
|
|
59
61
|
process.exit(0);
|
package/dist/index.js
CHANGED
|
@@ -34,6 +34,11 @@ process.on('unhandledRejection', (reason) => {
|
|
|
34
34
|
const error = reason instanceof Error ? reason : new Error(String(reason));
|
|
35
35
|
logError('Unhandled rejection', error);
|
|
36
36
|
process.stderr.write(`Unhandled rejection: ${error.message}\n`);
|
|
37
|
+
if (shouldAttemptShutdown()) {
|
|
38
|
+
attemptShutdown('UNHANDLED_REJECTION');
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
process.exit(1);
|
|
37
42
|
});
|
|
38
43
|
try {
|
|
39
44
|
if (isStdioMode) {
|
|
@@ -163,15 +163,19 @@ function resolveStringParam(value) {
|
|
|
163
163
|
function parseCachedPayload(raw) {
|
|
164
164
|
try {
|
|
165
165
|
const parsed = JSON.parse(raw);
|
|
166
|
-
|
|
167
|
-
return parsed;
|
|
168
|
-
}
|
|
169
|
-
return null;
|
|
166
|
+
return isCachedPayload(parsed) ? parsed : null;
|
|
170
167
|
}
|
|
171
168
|
catch {
|
|
172
169
|
return null;
|
|
173
170
|
}
|
|
174
171
|
}
|
|
172
|
+
function isCachedPayload(value) {
|
|
173
|
+
if (!value || typeof value !== 'object')
|
|
174
|
+
return false;
|
|
175
|
+
const record = value;
|
|
176
|
+
return ((record.content === undefined || typeof record.content === 'string') &&
|
|
177
|
+
(record.markdown === undefined || typeof record.markdown === 'string'));
|
|
178
|
+
}
|
|
175
179
|
function resolvePayloadContent(payload, namespace) {
|
|
176
180
|
if (namespace === 'markdown') {
|
|
177
181
|
if (typeof payload.markdown === 'string') {
|
package/dist/server.js
CHANGED
|
@@ -3,6 +3,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
3
3
|
import { config } from './config/index.js';
|
|
4
4
|
import { destroyAgents } from './services/fetcher.js';
|
|
5
5
|
import { logError, logInfo } from './services/logger.js';
|
|
6
|
+
import { destroyTransformWorkers } from './services/transform-worker-pool.js';
|
|
6
7
|
import { registerTools } from './tools/index.js';
|
|
7
8
|
import { registerResources } from './resources/index.js';
|
|
8
9
|
export function createMcpServer() {
|
|
@@ -30,6 +31,7 @@ export async function startStdioServer() {
|
|
|
30
31
|
const handleShutdown = (signal) => {
|
|
31
32
|
process.stderr.write(`\n${signal} received, shutting down superFetch MCP server...\n`);
|
|
32
33
|
destroyAgents();
|
|
34
|
+
destroyTransformWorkers();
|
|
33
35
|
server
|
|
34
36
|
.close()
|
|
35
37
|
.catch((err) => {
|
package/dist/services/cache.d.ts
CHANGED
|
@@ -17,6 +17,6 @@ export declare function toResourceUri(cacheKey: string): string | null;
|
|
|
17
17
|
export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
|
|
18
18
|
export declare function get(cacheKey: string | null): CacheEntry | undefined;
|
|
19
19
|
export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
|
|
20
|
-
export declare function keys(): string[];
|
|
20
|
+
export declare function keys(): readonly string[];
|
|
21
21
|
export declare function isEnabled(): boolean;
|
|
22
22
|
export {};
|
package/dist/services/cache.js
CHANGED
|
@@ -22,19 +22,16 @@ async function runCleanupLoop(signal) {
|
|
|
22
22
|
signal,
|
|
23
23
|
ref: false,
|
|
24
24
|
})) {
|
|
25
|
-
|
|
25
|
+
evictEntries();
|
|
26
26
|
}
|
|
27
27
|
}
|
|
28
|
-
function
|
|
28
|
+
function evictEntries() {
|
|
29
29
|
const now = Date.now();
|
|
30
30
|
for (const [key, item] of contentCache.entries()) {
|
|
31
31
|
if (now > item.expiresAt) {
|
|
32
32
|
contentCache.delete(key);
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
|
-
enforceMaxKeys();
|
|
36
|
-
}
|
|
37
|
-
function enforceMaxKeys() {
|
|
38
35
|
if (contentCache.size <= config.cache.maxKeys)
|
|
39
36
|
return;
|
|
40
37
|
const keysToRemove = contentCache.size - config.cache.maxKeys;
|
|
@@ -179,16 +176,32 @@ export function isEnabled() {
|
|
|
179
176
|
return config.cache.enabled;
|
|
180
177
|
}
|
|
181
178
|
function buildCacheEntry(cacheKey, content, metadata) {
|
|
182
|
-
|
|
179
|
+
const entry = {
|
|
183
180
|
url: metadata.url,
|
|
184
|
-
title: metadata.title,
|
|
185
181
|
content,
|
|
186
182
|
fetchedAt: new Date().toISOString(),
|
|
187
183
|
expiresAt: new Date(Date.now() + config.cache.ttl * 1000).toISOString(),
|
|
188
184
|
};
|
|
185
|
+
if (metadata.title !== undefined) {
|
|
186
|
+
entry.title = metadata.title;
|
|
187
|
+
}
|
|
188
|
+
return entry;
|
|
189
189
|
}
|
|
190
190
|
function persistCacheEntry(cacheKey, entry) {
|
|
191
191
|
const expiresAt = Date.now() + config.cache.ttl * 1000;
|
|
192
192
|
contentCache.set(cacheKey, { entry, expiresAt });
|
|
193
|
+
enforceMaxKeysLimit();
|
|
193
194
|
emitCacheUpdate(cacheKey);
|
|
194
195
|
}
|
|
196
|
+
function enforceMaxKeysLimit() {
|
|
197
|
+
if (contentCache.size <= config.cache.maxKeys)
|
|
198
|
+
return;
|
|
199
|
+
const keysToRemove = contentCache.size - config.cache.maxKeys;
|
|
200
|
+
const iterator = contentCache.keys();
|
|
201
|
+
for (let i = 0; i < keysToRemove; i++) {
|
|
202
|
+
const { value, done } = iterator.next();
|
|
203
|
+
if (done)
|
|
204
|
+
break;
|
|
205
|
+
contentCache.delete(value);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
1
|
interface RequestContext {
|
|
3
|
-
requestId: string;
|
|
4
|
-
sessionId?: string;
|
|
2
|
+
readonly requestId: string;
|
|
3
|
+
readonly sessionId?: string;
|
|
5
4
|
}
|
|
6
|
-
export declare const requestContext: AsyncLocalStorage<RequestContext>;
|
|
7
5
|
export declare function runWithRequestContext<T>(context: RequestContext, fn: () => T): T;
|
|
8
6
|
export declare function bindToRequestContext<T extends (...args: unknown[]) => unknown>(fn: T): T;
|
|
9
7
|
export declare function getRequestId(): string | undefined;
|
package/dist/services/context.js
CHANGED
|
@@ -3,17 +3,7 @@ import { Readability } from '@mozilla/readability';
|
|
|
3
3
|
import { getErrorMessage } from '../utils/error-utils.js';
|
|
4
4
|
import { truncateHtml } from '../utils/html-truncator.js';
|
|
5
5
|
import { logError, logInfo, logWarn } from './logger.js';
|
|
6
|
-
|
|
7
|
-
const sources = state[field];
|
|
8
|
-
return sources.og ?? sources.twitter ?? sources.standard;
|
|
9
|
-
}
|
|
10
|
-
function createMetaCollectorState() {
|
|
11
|
-
return {
|
|
12
|
-
title: {},
|
|
13
|
-
description: {},
|
|
14
|
-
author: {},
|
|
15
|
-
};
|
|
16
|
-
}
|
|
6
|
+
import { createMetaCollectorState, resolveMetaField, } from './metadata-collector.js';
|
|
17
7
|
function collectMetaTag(state, tag) {
|
|
18
8
|
const content = getMetaContent(tag);
|
|
19
9
|
if (!content)
|
|
@@ -76,11 +66,17 @@ function extractMetadata(document) {
|
|
|
76
66
|
const state = createMetaCollectorState();
|
|
77
67
|
scanMetaTags(document, state);
|
|
78
68
|
ensureTitleFallback(document, state);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
69
|
+
const metadata = {};
|
|
70
|
+
const title = resolveMetaField(state, 'title');
|
|
71
|
+
const description = resolveMetaField(state, 'description');
|
|
72
|
+
const author = resolveMetaField(state, 'author');
|
|
73
|
+
if (title !== undefined)
|
|
74
|
+
metadata.title = title;
|
|
75
|
+
if (description !== undefined)
|
|
76
|
+
metadata.description = description;
|
|
77
|
+
if (author !== undefined)
|
|
78
|
+
metadata.author = author;
|
|
79
|
+
return metadata;
|
|
84
80
|
}
|
|
85
81
|
function isReadabilityCompatible(doc) {
|
|
86
82
|
if (!doc || typeof doc !== 'object')
|
|
@@ -113,14 +109,23 @@ function parseReadabilityArticle(document) {
|
|
|
113
109
|
}
|
|
114
110
|
}
|
|
115
111
|
function mapReadabilityResult(parsed) {
|
|
116
|
-
|
|
117
|
-
title: toOptional(parsed.title),
|
|
118
|
-
byline: toOptional(parsed.byline),
|
|
112
|
+
const article = {
|
|
119
113
|
content: parsed.content ?? '',
|
|
120
114
|
textContent: parsed.textContent ?? '',
|
|
121
|
-
excerpt: toOptional(parsed.excerpt),
|
|
122
|
-
siteName: toOptional(parsed.siteName),
|
|
123
115
|
};
|
|
116
|
+
const title = toOptional(parsed.title);
|
|
117
|
+
if (title !== undefined)
|
|
118
|
+
article.title = title;
|
|
119
|
+
const byline = toOptional(parsed.byline);
|
|
120
|
+
if (byline !== undefined)
|
|
121
|
+
article.byline = byline;
|
|
122
|
+
const excerpt = toOptional(parsed.excerpt);
|
|
123
|
+
if (excerpt !== undefined)
|
|
124
|
+
article.excerpt = excerpt;
|
|
125
|
+
const siteName = toOptional(parsed.siteName);
|
|
126
|
+
if (siteName !== undefined)
|
|
127
|
+
article.siteName = siteName;
|
|
128
|
+
return article;
|
|
124
129
|
}
|
|
125
130
|
function toOptional(value) {
|
|
126
131
|
return value ?? undefined;
|