@j0hanz/superfetch 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +61 -46
  2. package/dist/config/formatting.d.ts +1 -1
  3. package/dist/config/types/content.d.ts +3 -3
  4. package/dist/config/types/runtime.d.ts +1 -1
  5. package/dist/config/types/tools.d.ts +12 -12
  6. package/dist/http/cors.js +23 -23
  7. package/dist/http/download-routes.js +9 -4
  8. package/dist/http/mcp-routes.js +2 -13
  9. package/dist/http/mcp-validation.js +1 -1
  10. package/dist/http/server-middleware.js +2 -1
  11. package/dist/http/server.js +2 -0
  12. package/dist/index.js +5 -0
  13. package/dist/middleware/error-handler.js +1 -1
  14. package/dist/resources/cached-content.js +8 -4
  15. package/dist/server.js +2 -0
  16. package/dist/services/cache.d.ts +1 -1
  17. package/dist/services/cache.js +20 -7
  18. package/dist/services/context.d.ts +2 -4
  19. package/dist/services/context.js +1 -1
  20. package/dist/services/extractor.js +26 -21
  21. package/dist/services/fetcher/interceptors.d.ts +22 -0
  22. package/dist/services/fetcher/interceptors.js +18 -8
  23. package/dist/services/fetcher/response.js +32 -24
  24. package/dist/services/fetcher.d.ts +0 -1
  25. package/dist/services/fetcher.js +5 -7
  26. package/dist/services/metadata-collector.d.ts +10 -0
  27. package/dist/services/metadata-collector.js +11 -0
  28. package/dist/services/parser.js +26 -25
  29. package/dist/services/transform-worker-pool.d.ts +14 -0
  30. package/dist/services/transform-worker-pool.js +167 -0
  31. package/dist/tools/handlers/fetch-markdown.tool.d.ts +9 -1
  32. package/dist/tools/handlers/fetch-markdown.tool.js +58 -30
  33. package/dist/tools/handlers/fetch-single.shared.d.ts +8 -3
  34. package/dist/tools/handlers/fetch-single.shared.js +42 -17
  35. package/dist/tools/handlers/fetch-url.tool.js +46 -16
  36. package/dist/tools/index.js +13 -0
  37. package/dist/tools/schemas.d.ts +29 -133
  38. package/dist/tools/schemas.js +22 -32
  39. package/dist/tools/utils/common.js +20 -16
  40. package/dist/tools/utils/content-transform-async.d.ts +6 -0
  41. package/dist/tools/utils/content-transform-async.js +33 -0
  42. package/dist/tools/utils/content-transform.d.ts +4 -1
  43. package/dist/tools/utils/content-transform.js +7 -2
  44. package/dist/tools/utils/fetch-pipeline.js +18 -10
  45. package/dist/utils/content-cleaner.d.ts +1 -1
  46. package/dist/utils/download-url.d.ts +9 -1
  47. package/dist/utils/download-url.js +9 -6
  48. package/dist/utils/tool-error-handler.d.ts +2 -2
  49. package/dist/utils/tool-error-handler.js +7 -7
  50. package/dist/utils/url-validator.js +38 -0
  51. package/dist/workers/transform-worker.d.ts +1 -0
  52. package/dist/workers/transform-worker.js +50 -0
  53. package/package.json +5 -7
package/README.md CHANGED
@@ -291,13 +291,14 @@ Sessions are managed via the `mcp-session-id` header (see [HTTP Mode Details](#h
291
291
 
292
292
  Both tools return:
293
293
 
294
- - `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`)
294
+ - `structuredContent` for machine-readable fields (includes `contentSize`, `cached`, and optional `resourceUri`/`resourceMimeType`/`truncated`; Markdown responses may also include `file`)
295
295
  - `content` blocks that include:
296
296
  - a `text` block containing JSON of `structuredContent`
297
- - in stdio mode, a `resource` block with a `file:///...` URI containing the full content
298
- - in HTTP mode, a `resource` block when inline content is available; large payloads include a `resource_link` block when cache is enabled
297
+ - in stdio mode, a `resource` block with a `file:///...` URI embedding the full content
298
+ - in HTTP mode, a `resource` block when inline content is available
299
+ - when content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is enabled, a `resource_link` block points to `superfetch://cache/...` and `structuredContent.resourceUri` is set
299
300
 
300
- If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output and appends `...[truncated]`.
301
+ If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server truncates output, appends `...[truncated]`, and sets `truncated: true`.
301
302
 
302
303
  ---
303
304
 
@@ -305,16 +306,19 @@ If content exceeds `MAX_INLINE_CONTENT_CHARS` and cache is disabled, the server
305
306
 
306
307
  Fetches a webpage and converts it to AI-readable JSONL format with semantic content blocks. You can also request Markdown with `format: "markdown"`.
307
308
 
308
- | Parameter | Type | Default | Description |
309
- | -------------------- | --------------------- | --------- | --------------------------------------------- |
310
- | `url` | string | required | URL to fetch |
311
- | `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
312
- | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
313
- | `includeMetadata` | boolean | `true` | Include page metadata |
314
- | `maxContentLength` | number | - | Maximum content length in characters |
315
- | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
316
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
317
- | `retries` | number | `3` | Number of retry attempts (1-10) |
309
+ | Parameter | Type | Default | Description |
310
+ | ---------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------ |
311
+ | `url` | string | required | URL to fetch |
312
+ | `format` | "jsonl" \| "markdown" | `"jsonl"` | Output format |
313
+ | `includeContentBlocks` | boolean | `true` (jsonl), `false` (markdown) | Include content block counts when `format: "markdown"` |
314
+ | `extractMainContent` | boolean | `true` | Use Readability to extract main content |
315
+ | `includeMetadata` | boolean | `true` | Include page metadata |
316
+ | `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
317
+ | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
318
+ | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
319
+ | `retries` | number | `3` | Number of retry attempts (1-10) |
320
+
321
+ When `format: "markdown"` and `includeContentBlocks` is `false`, `contentBlocks` will be `0`.
318
322
 
319
323
  **Example `structuredContent`:**
320
324
 
@@ -337,15 +341,15 @@ Fetches a webpage and converts it to AI-readable JSONL format with semantic cont
337
341
 
338
342
  Fetches a webpage and converts it to clean Markdown with optional frontmatter.
339
343
 
340
- | Parameter | Type | Default | Description |
341
- | -------------------- | ------- | -------- | --------------------------------------------- |
342
- | `url` | string | required | URL to fetch |
343
- | `extractMainContent` | boolean | `true` | Extract main content only |
344
- | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
345
- | `maxContentLength` | number | - | Maximum content length in characters |
346
- | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
347
- | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
348
- | `retries` | number | `3` | Number of retry attempts (1-10) |
344
+ | Parameter | Type | Default | Description |
345
+ | -------------------- | ------- | -------- | ---------------------------------------------------- |
346
+ | `url` | string | required | URL to fetch |
347
+ | `extractMainContent` | boolean | `true` | Extract main content only |
348
+ | `includeMetadata` | boolean | `true` | Include YAML frontmatter |
349
+ | `maxContentLength` | number | - | Maximum content length in characters (max 5,242,880) |
350
+ | `customHeaders` | object | - | Custom HTTP headers (sanitized) |
351
+ | `timeout` | number | `30000` | Request timeout in milliseconds (1000-120000) |
352
+ | `retries` | number | `3` | Number of retry attempts (1-10) |
349
353
 
350
354
  **Example `structuredContent`:**
351
355
 
@@ -373,9 +377,9 @@ Fetches a webpage and converts it to clean Markdown with optional frontmatter.
373
377
  ### Large Content Handling
374
378
 
375
379
  - Inline limit is configurable via `MAX_INLINE_CONTENT_CHARS` (see `CONFIGURATION.md`).
376
- - If content exceeds the limit and cache is enabled, responses include `resourceUri` and a `resource_link` block.
377
- - If cache is disabled, content is truncated with `...[truncated]`.
378
- - Use `maxContentLength` per request to enforce a lower limit.
380
+ - If content exceeds the limit and cache is enabled, responses include `resourceUri`/`resourceMimeType` and a `resource_link` block.
381
+ - If cache is disabled, content is truncated with `...[truncated]` and `truncated: true`.
382
+ - Use `maxContentLength` per request to enforce a lower limit (hard cap: 5,242,880 characters).
379
383
  - Upstream fetch size is capped at 10 MB of HTML; larger responses fail.
380
384
 
381
385
  ---
@@ -446,8 +450,12 @@ HTTP mode uses the MCP Streamable HTTP transport. The workflow is:
446
450
  2. The server returns `mcp-session-id` in the response headers.
447
451
  3. Use that header for subsequent `POST /mcp`, `GET /mcp`, and `DELETE /mcp` requests.
448
452
 
453
+ `GET /mcp` and `DELETE /mcp` require `mcp-session-id`. `POST /mcp` without an `initialize` request will return 400.
454
+
449
455
  If `MAX_SESSIONS` is reached, the server evicts the oldest session when possible, otherwise returns a 503.
450
456
 
457
+ Host header validation is always enforced in HTTP mode. When binding to `0.0.0.0` or `::`, set `ALLOWED_HOSTS` to the hostnames clients will send. If an `Origin` header is present, it must be allowed by `ALLOWED_ORIGINS` or `CORS_ALLOW_ALL`.
458
+
451
459
  ---
452
460
 
453
461
  ## Content Block Types
@@ -473,17 +481,22 @@ JSONL output includes semantic content blocks:
473
481
 
474
482
  Blocked destinations include:
475
483
 
476
- - Localhost and loopback addresses
477
- - Private IP ranges (`10.x.x.x`, `172.16-31.x.x`, `192.168.x.x`)
478
- - Cloud metadata endpoints (AWS, GCP, Azure)
479
- - IPv6 link-local and unique local addresses
484
+ - Loopback and unspecified addresses (`127.0.0.0/8`, `::1`, `0.0.0.0`, `::`)
485
+ - Private/ULA ranges (`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`, `fc00::/7`)
486
+ - Link-local and shared address space (`169.254.0.0/16`, `100.64.0.0/10`, `fe80::/10`)
487
+ - Multicast/reserved ranges (`224.0.0.0/4`, `240.0.0.0/4`, `ff00::/8`)
488
+ - IPv6 transition ranges (`64:ff9b::/96`, `64:ff9b:1::/48`, `2001::/32`, `2002::/16`)
489
+ - Cloud metadata endpoints (AWS/GCP/Azure/Alibaba) like `169.254.169.254`, `metadata.google.internal`, `metadata.azure.com`, `100.100.100.200`, `instance-data`
480
490
  - Internal suffixes such as `.local` and `.internal`
481
491
 
492
+ DNS resolution is performed and blocked if any resolved IP matches a blocked range.
493
+
482
494
  ### URL Validation
483
495
 
484
496
  - Only `http` and `https` URLs
485
497
  - No embedded credentials in URLs
486
498
  - Max URL length: 2048 characters
499
+ - Hostnames ending in `.local` or `.internal` are rejected
487
500
 
488
501
  ### Header Sanitization
489
502
 
@@ -491,7 +504,7 @@ Blocked headers: `host`, `authorization`, `cookie`, `x-forwarded-for`, `x-real-i
491
504
 
492
505
  ### Rate Limiting
493
506
 
494
- Rate limiting thresholds are configurable via `RATE_LIMIT_MAX` and `RATE_LIMIT_WINDOW_MS` (see `CONFIGURATION.md`).
507
+ Rate limiting applies to `/mcp` and `/mcp/downloads` and is configurable via `RATE_LIMIT_ENABLED`, `RATE_LIMIT_MAX`, `RATE_LIMIT_WINDOW_MS`, and `RATE_LIMIT_CLEANUP_MS` (see `CONFIGURATION.md`).
495
508
 
496
509
  ---
497
510
 
@@ -499,20 +512,22 @@ Rate limiting thresholds are configurable via `RATE_LIMIT_MAX` and `RATE_LIMIT_W
499
512
 
500
513
  ### Scripts
501
514
 
502
- | Command | Description |
503
- | ----------------------- | ---------------------------------- |
504
- | `npm run dev` | Development server with hot reload |
505
- | `npm run build` | Compile TypeScript |
506
- | `npm start` | Production server |
507
- | `npm run lint` | Run ESLint |
508
- | `npm run type-check` | TypeScript type checking |
509
- | `npm run format` | Format with Prettier |
510
- | `npm test` | Run Vitest tests |
511
- | `npm run test:coverage` | Run tests with coverage |
512
- | `npm run bench` | Run minimal performance benchmark |
513
- | `npm run release` | Create new release |
514
- | `npm run knip` | Find unused exports/dependencies |
515
- | `npm run knip:fix` | Auto-fix unused code |
515
+ | Command | Description |
516
+ | ----------------------- | ------------------------------------ |
517
+ | `npm run dev` | Development server with hot reload |
518
+ | `npm run build` | Compile TypeScript |
519
+ | `npm start` | Production server |
520
+ | `npm run lint` | Run ESLint |
521
+ | `npm run type-check` | TypeScript type checking |
522
+ | `npm run format` | Format with Prettier |
523
+ | `npm test` | Run Node test runner (builds dist) |
524
+ | `npm run test:coverage` | Run tests with experimental coverage |
525
+ | `npm run bench` | Run minimal performance benchmark |
526
+ | `npm run release` | Create new release |
527
+ | `npm run knip` | Find unused exports/dependencies |
528
+ | `npm run knip:fix` | Auto-fix unused code |
529
+
530
+ > **Note:** Tests run via `node --test` with `--experimental-transform-types` to execute `.ts` test files. Node will emit an experimental warning.
516
531
 
517
532
  ### Tech Stack
518
533
 
@@ -525,7 +540,7 @@ Rate limiting thresholds are configurable via `RATE_LIMIT_MAX` and `RATE_LIMIT_W
525
540
  | HTML Parsing | Cheerio ^1.1.2, LinkeDOM ^0.18.12 |
526
541
  | Markdown | Turndown ^7.2.2 |
527
542
  | HTTP | Express ^5.2.1, undici ^6.22.0 |
528
- | Validation | Zod ^3.24.1 |
543
+ | Validation | Zod ^4.3.4 |
529
544
 
530
545
  ---
531
546
 
@@ -4,4 +4,4 @@ export declare const CODE_BLOCK: {
4
4
  readonly format: (code: string, language?: string) => string;
5
5
  };
6
6
  export declare const FRONTMATTER_DELIMITER = "---";
7
- export declare const joinLines: (lines: string[]) => string;
7
+ export declare const joinLines: (lines: readonly string[]) => string;
@@ -22,7 +22,7 @@ export interface ParagraphBlock extends ContentBlock {
22
22
  export interface ListBlock extends ContentBlock {
23
23
  type: 'list';
24
24
  ordered: boolean;
25
- items: string[];
25
+ readonly items: readonly string[];
26
26
  }
27
27
  export interface CodeBlock extends ContentBlock {
28
28
  type: 'code';
@@ -31,8 +31,8 @@ export interface CodeBlock extends ContentBlock {
31
31
  }
32
32
  export interface TableBlock extends ContentBlock {
33
33
  type: 'table';
34
- headers?: string[];
35
- rows: string[][];
34
+ readonly headers?: readonly string[];
35
+ readonly rows: readonly (readonly string[])[];
36
36
  }
37
37
  export interface ImageBlock extends ContentBlock {
38
38
  type: 'image';
@@ -65,7 +65,7 @@ export interface FetchPipelineOptions<T> {
65
65
  /** Optional: cache variation input for headers/flags */
66
66
  cacheVary?: Record<string, unknown> | string;
67
67
  /** Transform function to process HTML into desired format */
68
- transform: (html: string, url: string) => T;
68
+ transform: (html: string, url: string) => T | Promise<T>;
69
69
  /** Optional: serialize result for caching (defaults to JSON.stringify) */
70
70
  serialize?: (result: T) => string;
71
71
  /** Optional: deserialize cached content */
@@ -1,24 +1,25 @@
1
1
  import type { ToolContentBlock } from './runtime.js';
2
2
  interface RequestOptions {
3
3
  /** Custom HTTP headers for the request */
4
- customHeaders?: Record<string, string>;
4
+ customHeaders?: Record<string, string> | undefined;
5
5
  /** Request timeout in milliseconds (1000-120000) */
6
- timeout?: number;
6
+ timeout?: number | undefined;
7
7
  /** Number of retry attempts (1-10) */
8
- retries?: number;
8
+ retries?: number | undefined;
9
9
  }
10
10
  export interface FetchUrlInput extends RequestOptions {
11
11
  url: string;
12
- extractMainContent?: boolean;
13
- includeMetadata?: boolean;
14
- maxContentLength?: number;
15
- format?: 'jsonl' | 'markdown';
12
+ extractMainContent?: boolean | undefined;
13
+ includeMetadata?: boolean | undefined;
14
+ maxContentLength?: number | undefined;
15
+ format?: 'jsonl' | 'markdown' | undefined;
16
+ includeContentBlocks?: boolean | undefined;
16
17
  }
17
18
  export interface FetchMarkdownInput extends RequestOptions {
18
19
  url: string;
19
- extractMainContent?: boolean;
20
- includeMetadata?: boolean;
21
- maxContentLength?: number;
20
+ extractMainContent?: boolean | undefined;
21
+ includeMetadata?: boolean | undefined;
22
+ maxContentLength?: number | undefined;
22
23
  }
23
24
  export interface FileDownloadInfo {
24
25
  downloadUrl: string;
@@ -38,11 +39,10 @@ export interface ToolErrorResponse {
38
39
  [x: string]: unknown;
39
40
  content: ToolContentBlock[];
40
41
  structuredContent: {
41
- [x: string]: unknown;
42
42
  error: string;
43
43
  url: string;
44
44
  errorCode: string;
45
- };
45
+ } & Record<string, unknown>;
46
46
  isError: true;
47
47
  }
48
48
  export interface ToolResponseBase {
package/dist/http/cors.js CHANGED
@@ -13,13 +13,22 @@ function isValidOrigin(origin) {
13
13
  export function createCorsMiddleware(options) {
14
14
  return (req, res, next) => {
15
15
  const origin = resolveOrigin(req);
16
- if (shouldSkipInvalidOrigin(origin)) {
17
- next();
18
- return;
19
- }
20
- if (!applyCorsHeaders(res, origin, options)) {
21
- next();
22
- return;
16
+ if (origin) {
17
+ if (!isValidOrigin(origin)) {
18
+ res.status(403).json({
19
+ error: 'Origin not allowed',
20
+ code: 'ORIGIN_NOT_ALLOWED',
21
+ });
22
+ return;
23
+ }
24
+ if (!isOriginAllowed(origin, options)) {
25
+ res.status(403).json({
26
+ error: 'Origin not allowed',
27
+ code: 'ORIGIN_NOT_ALLOWED',
28
+ });
29
+ return;
30
+ }
31
+ applyCorsHeaders(res, origin);
23
32
  }
24
33
  if (req.method === 'OPTIONS') {
25
34
  res.sendStatus(200);
@@ -31,20 +40,11 @@ export function createCorsMiddleware(options) {
31
40
  function resolveOrigin(req) {
32
41
  return req.headers.origin;
33
42
  }
34
- function shouldSkipInvalidOrigin(origin) {
35
- return Boolean(origin && !isValidOrigin(origin));
36
- }
37
- function applyCorsHeaders(res, origin, options) {
38
- if (isOriginAllowed(origin, options)) {
39
- if (origin) {
40
- res.vary('Origin');
41
- }
42
- res.header('Access-Control-Allow-Origin', origin ?? '*');
43
- res.header('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
44
- res.header('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id, Authorization, X-API-Key');
45
- res.header('Access-Control-Expose-Headers', 'mcp-session-id');
46
- res.header('Access-Control-Max-Age', '86400');
47
- return true;
48
- }
49
- return options.allowedOrigins.length === 0;
43
+ function applyCorsHeaders(res, origin) {
44
+ res.vary('Origin');
45
+ res.header('Access-Control-Allow-Origin', origin);
46
+ res.header('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
47
+ res.header('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id, Authorization, X-API-Key');
48
+ res.header('Access-Control-Expose-Headers', 'mcp-session-id');
49
+ res.header('Access-Control-Max-Age', '86400');
50
50
  }
@@ -52,15 +52,20 @@ function resolveExtension(namespace) {
52
52
  function parseCachedPayload(raw) {
53
53
  try {
54
54
  const parsed = JSON.parse(raw);
55
- if (parsed && typeof parsed === 'object') {
56
- return parsed;
57
- }
58
- return null;
55
+ return isCachedPayload(parsed) ? parsed : null;
59
56
  }
60
57
  catch {
61
58
  return null;
62
59
  }
63
60
  }
61
+ function isCachedPayload(value) {
62
+ if (!value || typeof value !== 'object')
63
+ return false;
64
+ const record = value;
65
+ return ((record.content === undefined || typeof record.content === 'string') &&
66
+ (record.markdown === undefined || typeof record.markdown === 'string') &&
67
+ (record.title === undefined || typeof record.title === 'string'));
68
+ }
64
69
  function resolvePayloadContent(payload, namespace) {
65
70
  if (namespace === 'markdown') {
66
71
  if (typeof payload.markdown === 'string') {
@@ -62,15 +62,6 @@ function resolveSessionTransport(sessionId, options, res) {
62
62
  options.sessionStore.touch(sessionId);
63
63
  return session.transport;
64
64
  }
65
- function resolveSessionTransportForDelete(sessionId, options) {
66
- if (!sessionId)
67
- return null;
68
- const session = options.sessionStore.get(sessionId);
69
- if (!session)
70
- return null;
71
- options.sessionStore.touch(sessionId);
72
- return session.transport;
73
- }
74
65
  async function handlePost(req, res, options) {
75
66
  const sessionId = getSessionId(req);
76
67
  const { body } = req;
@@ -91,11 +82,9 @@ async function handleGet(req, res, options) {
91
82
  await handleTransportRequest(transport, req, res);
92
83
  }
93
84
  async function handleDelete(req, res, options) {
94
- const transport = resolveSessionTransportForDelete(getSessionId(req), options);
95
- if (!transport) {
96
- res.status(204).end();
85
+ const transport = resolveSessionTransport(getSessionId(req), options, res);
86
+ if (!transport)
97
87
  return;
98
- }
99
88
  await handleTransportRequest(transport, req, res);
100
89
  }
101
90
  export function registerMcpRoutes(app, options) {
@@ -2,7 +2,7 @@ function isRecord(value) {
2
2
  return value !== null && typeof value === 'object';
3
3
  }
4
4
  export function isMcpRequestBody(body) {
5
- if (!isRecord(body))
5
+ if (!isRecord(body) || Array.isArray(body))
6
6
  return false;
7
7
  const { method, id, jsonrpc, params } = body;
8
8
  const methodValid = method === undefined || typeof method === 'string';
@@ -84,7 +84,8 @@ export function createContextMiddleware() {
84
84
  return (req, _res, next) => {
85
85
  const requestId = randomUUID();
86
86
  const sessionId = getSessionId(req);
87
- runWithRequestContext({ requestId, sessionId }, () => {
87
+ const context = sessionId === undefined ? { requestId } : { requestId, sessionId };
88
+ runWithRequestContext(context, () => {
88
89
  const boundNext = bindToRequestContext(next);
89
90
  boundNext();
90
91
  });
@@ -2,6 +2,7 @@ import { styleText } from 'node:util';
2
2
  import { config, enableHttpMode } from '../config/index.js';
3
3
  import { destroyAgents } from '../services/fetcher.js';
4
4
  import { logError, logInfo, logWarn } from '../services/logger.js';
5
+ import { destroyTransformWorkers } from '../services/transform-worker-pool.js';
5
6
  import { errorHandler } from '../middleware/error-handler.js';
6
7
  import { getErrorMessage } from '../utils/error-utils.js';
7
8
  import { createAuthMiddleware } from './auth.js';
@@ -54,6 +55,7 @@ function createShutdownHandler(server, sessionStore, sessionCleanupController, s
54
55
  });
55
56
  })));
56
57
  destroyAgents();
58
+ destroyTransformWorkers();
57
59
  server.close(() => {
58
60
  logInfo('HTTP server closed');
59
61
  process.exit(0);
package/dist/index.js CHANGED
@@ -34,6 +34,11 @@ process.on('unhandledRejection', (reason) => {
34
34
  const error = reason instanceof Error ? reason : new Error(String(reason));
35
35
  logError('Unhandled rejection', error);
36
36
  process.stderr.write(`Unhandled rejection: ${error.message}\n`);
37
+ if (shouldAttemptShutdown()) {
38
+ attemptShutdown('UNHANDLED_REJECTION');
39
+ return;
40
+ }
41
+ process.exit(1);
37
42
  });
38
43
  try {
39
44
  if (isStdioMode) {
@@ -31,7 +31,7 @@ function buildErrorResponse(err) {
31
31
  ...(details && { details }),
32
32
  },
33
33
  };
34
- if (process.env.NODE_ENV === 'development') {
34
+ if (process.env.NODE_ENV === 'development' && err.stack) {
35
35
  response.error.stack = err.stack;
36
36
  }
37
37
  return response;
@@ -163,15 +163,19 @@ function resolveStringParam(value) {
163
163
  function parseCachedPayload(raw) {
164
164
  try {
165
165
  const parsed = JSON.parse(raw);
166
- if (parsed && typeof parsed === 'object') {
167
- return parsed;
168
- }
169
- return null;
166
+ return isCachedPayload(parsed) ? parsed : null;
170
167
  }
171
168
  catch {
172
169
  return null;
173
170
  }
174
171
  }
172
+ function isCachedPayload(value) {
173
+ if (!value || typeof value !== 'object')
174
+ return false;
175
+ const record = value;
176
+ return ((record.content === undefined || typeof record.content === 'string') &&
177
+ (record.markdown === undefined || typeof record.markdown === 'string'));
178
+ }
175
179
  function resolvePayloadContent(payload, namespace) {
176
180
  if (namespace === 'markdown') {
177
181
  if (typeof payload.markdown === 'string') {
package/dist/server.js CHANGED
@@ -3,6 +3,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
3
3
  import { config } from './config/index.js';
4
4
  import { destroyAgents } from './services/fetcher.js';
5
5
  import { logError, logInfo } from './services/logger.js';
6
+ import { destroyTransformWorkers } from './services/transform-worker-pool.js';
6
7
  import { registerTools } from './tools/index.js';
7
8
  import { registerResources } from './resources/index.js';
8
9
  export function createMcpServer() {
@@ -30,6 +31,7 @@ export async function startStdioServer() {
30
31
  const handleShutdown = (signal) => {
31
32
  process.stderr.write(`\n${signal} received, shutting down superFetch MCP server...\n`);
32
33
  destroyAgents();
34
+ destroyTransformWorkers();
33
35
  server
34
36
  .close()
35
37
  .catch((err) => {
@@ -17,6 +17,6 @@ export declare function toResourceUri(cacheKey: string): string | null;
17
17
  export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
18
18
  export declare function get(cacheKey: string | null): CacheEntry | undefined;
19
19
  export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
20
- export declare function keys(): string[];
20
+ export declare function keys(): readonly string[];
21
21
  export declare function isEnabled(): boolean;
22
22
  export {};
@@ -22,19 +22,16 @@ async function runCleanupLoop(signal) {
22
22
  signal,
23
23
  ref: false,
24
24
  })) {
25
- evictExpiredEntries();
25
+ evictEntries();
26
26
  }
27
27
  }
28
- function evictExpiredEntries() {
28
+ function evictEntries() {
29
29
  const now = Date.now();
30
30
  for (const [key, item] of contentCache.entries()) {
31
31
  if (now > item.expiresAt) {
32
32
  contentCache.delete(key);
33
33
  }
34
34
  }
35
- enforceMaxKeys();
36
- }
37
- function enforceMaxKeys() {
38
35
  if (contentCache.size <= config.cache.maxKeys)
39
36
  return;
40
37
  const keysToRemove = contentCache.size - config.cache.maxKeys;
@@ -179,16 +176,32 @@ export function isEnabled() {
179
176
  return config.cache.enabled;
180
177
  }
181
178
  function buildCacheEntry(cacheKey, content, metadata) {
182
- return {
179
+ const entry = {
183
180
  url: metadata.url,
184
- title: metadata.title,
185
181
  content,
186
182
  fetchedAt: new Date().toISOString(),
187
183
  expiresAt: new Date(Date.now() + config.cache.ttl * 1000).toISOString(),
188
184
  };
185
+ if (metadata.title !== undefined) {
186
+ entry.title = metadata.title;
187
+ }
188
+ return entry;
189
189
  }
190
190
  function persistCacheEntry(cacheKey, entry) {
191
191
  const expiresAt = Date.now() + config.cache.ttl * 1000;
192
192
  contentCache.set(cacheKey, { entry, expiresAt });
193
+ enforceMaxKeysLimit();
193
194
  emitCacheUpdate(cacheKey);
194
195
  }
196
+ function enforceMaxKeysLimit() {
197
+ if (contentCache.size <= config.cache.maxKeys)
198
+ return;
199
+ const keysToRemove = contentCache.size - config.cache.maxKeys;
200
+ const iterator = contentCache.keys();
201
+ for (let i = 0; i < keysToRemove; i++) {
202
+ const { value, done } = iterator.next();
203
+ if (done)
204
+ break;
205
+ contentCache.delete(value);
206
+ }
207
+ }
@@ -1,9 +1,7 @@
1
- import { AsyncLocalStorage } from 'node:async_hooks';
2
1
  interface RequestContext {
3
- requestId: string;
4
- sessionId?: string;
2
+ readonly requestId: string;
3
+ readonly sessionId?: string;
5
4
  }
6
- export declare const requestContext: AsyncLocalStorage<RequestContext>;
7
5
  export declare function runWithRequestContext<T>(context: RequestContext, fn: () => T): T;
8
6
  export declare function bindToRequestContext<T extends (...args: unknown[]) => unknown>(fn: T): T;
9
7
  export declare function getRequestId(): string | undefined;
@@ -1,5 +1,5 @@
1
1
  import { AsyncLocalStorage } from 'node:async_hooks';
2
- export const requestContext = new AsyncLocalStorage();
2
+ const requestContext = new AsyncLocalStorage();
3
3
  export function runWithRequestContext(context, fn) {
4
4
  return requestContext.run(context, fn);
5
5
  }
@@ -3,17 +3,7 @@ import { Readability } from '@mozilla/readability';
3
3
  import { getErrorMessage } from '../utils/error-utils.js';
4
4
  import { truncateHtml } from '../utils/html-truncator.js';
5
5
  import { logError, logInfo, logWarn } from './logger.js';
6
- function resolveMetaField(state, field) {
7
- const sources = state[field];
8
- return sources.og ?? sources.twitter ?? sources.standard;
9
- }
10
- function createMetaCollectorState() {
11
- return {
12
- title: {},
13
- description: {},
14
- author: {},
15
- };
16
- }
6
+ import { createMetaCollectorState, resolveMetaField, } from './metadata-collector.js';
17
7
  function collectMetaTag(state, tag) {
18
8
  const content = getMetaContent(tag);
19
9
  if (!content)
@@ -76,11 +66,17 @@ function extractMetadata(document) {
76
66
  const state = createMetaCollectorState();
77
67
  scanMetaTags(document, state);
78
68
  ensureTitleFallback(document, state);
79
- return {
80
- title: resolveMetaField(state, 'title'),
81
- description: resolveMetaField(state, 'description'),
82
- author: resolveMetaField(state, 'author'),
83
- };
69
+ const metadata = {};
70
+ const title = resolveMetaField(state, 'title');
71
+ const description = resolveMetaField(state, 'description');
72
+ const author = resolveMetaField(state, 'author');
73
+ if (title !== undefined)
74
+ metadata.title = title;
75
+ if (description !== undefined)
76
+ metadata.description = description;
77
+ if (author !== undefined)
78
+ metadata.author = author;
79
+ return metadata;
84
80
  }
85
81
  function isReadabilityCompatible(doc) {
86
82
  if (!doc || typeof doc !== 'object')
@@ -113,14 +109,23 @@ function parseReadabilityArticle(document) {
113
109
  }
114
110
  }
115
111
  function mapReadabilityResult(parsed) {
116
- return {
117
- title: toOptional(parsed.title),
118
- byline: toOptional(parsed.byline),
112
+ const article = {
119
113
  content: parsed.content ?? '',
120
114
  textContent: parsed.textContent ?? '',
121
- excerpt: toOptional(parsed.excerpt),
122
- siteName: toOptional(parsed.siteName),
123
115
  };
116
+ const title = toOptional(parsed.title);
117
+ if (title !== undefined)
118
+ article.title = title;
119
+ const byline = toOptional(parsed.byline);
120
+ if (byline !== undefined)
121
+ article.byline = byline;
122
+ const excerpt = toOptional(parsed.excerpt);
123
+ if (excerpt !== undefined)
124
+ article.excerpt = excerpt;
125
+ const siteName = toOptional(parsed.siteName);
126
+ if (siteName !== undefined)
127
+ article.siteName = siteName;
128
+ return article;
124
129
  }
125
130
  function toOptional(value) {
126
131
  return value ?? undefined;