@j0hanz/superfetch 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +28 -17
  2. package/dist/config/index.js +11 -6
  3. package/dist/http/auth.js +161 -2
  4. package/dist/http/host-allowlist.d.ts +3 -0
  5. package/dist/http/host-allowlist.js +117 -0
  6. package/dist/http/mcp-routes.d.ts +8 -2
  7. package/dist/http/mcp-routes.js +101 -8
  8. package/dist/http/mcp-session-eviction.d.ts +3 -0
  9. package/dist/http/mcp-session-eviction.js +24 -0
  10. package/dist/http/mcp-session-init.d.ts +7 -0
  11. package/dist/http/mcp-session-init.js +94 -0
  12. package/dist/http/mcp-session-slots.d.ts +17 -0
  13. package/dist/http/mcp-session-slots.js +55 -0
  14. package/dist/http/mcp-session-transport-init.d.ts +7 -0
  15. package/dist/http/mcp-session-transport-init.js +41 -0
  16. package/dist/http/mcp-session-types.d.ts +5 -0
  17. package/dist/http/mcp-session-types.js +1 -0
  18. package/dist/http/mcp-session.d.ts +9 -9
  19. package/dist/http/mcp-session.js +5 -114
  20. package/dist/http/mcp-sessions.d.ts +43 -0
  21. package/dist/http/mcp-sessions.js +392 -0
  22. package/dist/http/rate-limit.js +2 -2
  23. package/dist/http/server-middleware.d.ts +6 -1
  24. package/dist/http/server-middleware.js +3 -117
  25. package/dist/http/server-shutdown.js +1 -1
  26. package/dist/http/server.d.ts +10 -0
  27. package/dist/http/server.js +508 -11
  28. package/dist/http/session-cleanup.js +8 -5
  29. package/dist/middleware/error-handler.d.ts +1 -1
  30. package/dist/middleware/error-handler.js +31 -30
  31. package/dist/resources/cached-content-params.d.ts +5 -0
  32. package/dist/resources/cached-content-params.js +36 -0
  33. package/dist/resources/cached-content.js +33 -33
  34. package/dist/server.js +1 -1
  35. package/dist/services/cache-events.d.ts +8 -0
  36. package/dist/services/cache-events.js +19 -0
  37. package/dist/services/cache.d.ts +5 -4
  38. package/dist/services/cache.js +49 -45
  39. package/dist/services/extractor.js +49 -38
  40. package/dist/services/fetcher/agents.js +1 -1
  41. package/dist/services/fetcher/dns-selection.js +1 -1
  42. package/dist/services/fetcher/interceptors.js +29 -60
  43. package/dist/services/fetcher/redirects.js +12 -4
  44. package/dist/services/fetcher/response.js +18 -8
  45. package/dist/services/fetcher.d.ts +21 -0
  46. package/dist/services/fetcher.js +532 -13
  47. package/dist/tools/handlers/fetch-single.shared.d.ts +11 -3
  48. package/dist/tools/handlers/fetch-single.shared.js +131 -2
  49. package/dist/tools/handlers/fetch-url.tool.d.ts +6 -0
  50. package/dist/tools/handlers/fetch-url.tool.js +48 -6
  51. package/dist/tools/utils/content-shaping.js +19 -4
  52. package/dist/tools/utils/content-transform.d.ts +4 -1
  53. package/dist/tools/utils/content-transform.js +110 -96
  54. package/dist/tools/utils/fetch-pipeline.js +47 -56
  55. package/dist/tools/utils/frontmatter.d.ts +3 -0
  56. package/dist/tools/utils/frontmatter.js +73 -0
  57. package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
  58. package/dist/tools/utils/markdown-heuristics.js +19 -0
  59. package/dist/tools/utils/markdown-signals.d.ts +1 -0
  60. package/dist/tools/utils/markdown-signals.js +19 -0
  61. package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
  62. package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
  63. package/dist/tools/utils/raw-markdown.d.ts +6 -0
  64. package/dist/tools/utils/raw-markdown.js +135 -0
  65. package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
  66. package/dist/transformers/markdown/fenced-code-rule.js +38 -0
  67. package/dist/transformers/markdown/frontmatter.d.ts +2 -0
  68. package/dist/transformers/markdown/frontmatter.js +45 -0
  69. package/dist/transformers/markdown/noise-rule.d.ts +2 -0
  70. package/dist/transformers/markdown/noise-rule.js +80 -0
  71. package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
  72. package/dist/transformers/markdown/turndown-instance.js +19 -0
  73. package/dist/transformers/markdown.d.ts +2 -0
  74. package/dist/transformers/markdown.js +185 -0
  75. package/dist/transformers/markdown.transformer.js +2 -189
  76. package/dist/utils/code-language-bash.d.ts +1 -0
  77. package/dist/utils/code-language-bash.js +48 -0
  78. package/dist/utils/code-language-core.d.ts +2 -0
  79. package/dist/utils/code-language-core.js +13 -0
  80. package/dist/utils/code-language-detectors.d.ts +5 -0
  81. package/dist/utils/code-language-detectors.js +142 -0
  82. package/dist/utils/code-language-helpers.d.ts +5 -0
  83. package/dist/utils/code-language-helpers.js +62 -0
  84. package/dist/utils/code-language-parsing.d.ts +5 -0
  85. package/dist/utils/code-language-parsing.js +62 -0
  86. package/dist/utils/code-language.d.ts +9 -0
  87. package/dist/utils/code-language.js +250 -46
  88. package/dist/utils/error-details.d.ts +3 -0
  89. package/dist/utils/error-details.js +12 -0
  90. package/dist/utils/filename-generator.js +14 -3
  91. package/dist/utils/ip-address.d.ts +4 -0
  92. package/dist/utils/ip-address.js +6 -0
  93. package/dist/utils/tool-error-handler.js +12 -17
  94. package/dist/utils/url-validator.js +33 -21
  95. package/package.json +7 -5
@@ -0,0 +1,36 @@
1
+ import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
2
+ import { isRecord } from '../utils/guards.js';
3
+ export const CACHE_NAMESPACE = 'markdown';
4
+ const HASH_PATTERN = /^[a-f0-9.]+$/i;
5
+ export function resolveCacheParams(params) {
6
+ const parsed = requireRecordParams(params);
7
+ const namespace = requireParamString(parsed, 'namespace');
8
+ const urlHash = requireParamString(parsed, 'urlHash');
9
+ if (!isValidNamespace(namespace) || !isValidHash(urlHash)) {
10
+ throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
11
+ }
12
+ return { namespace, urlHash };
13
+ }
14
+ function requireRecordParams(value) {
15
+ if (!isRecord(value)) {
16
+ throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
17
+ }
18
+ return value;
19
+ }
20
+ function requireParamString(params, key) {
21
+ const raw = params[key];
22
+ const resolved = resolveStringParam(raw);
23
+ if (!resolved) {
24
+ throw new McpError(ErrorCode.InvalidParams, 'Both namespace and urlHash parameters are required');
25
+ }
26
+ return resolved;
27
+ }
28
+ function isValidNamespace(namespace) {
29
+ return namespace === CACHE_NAMESPACE;
30
+ }
31
+ function isValidHash(hash) {
32
+ return HASH_PATTERN.test(hash) && hash.length >= 8 && hash.length <= 64;
33
+ }
34
+ function resolveStringParam(value) {
35
+ return typeof value === 'string' ? value : null;
36
+ }
@@ -4,10 +4,42 @@ import * as cache from '../services/cache.js';
4
4
  import { parseCacheKey, toResourceUri } from '../services/cache-keys.js';
5
5
  import { logWarn } from '../services/logger.js';
6
6
  import { parseCachedPayload, resolveCachedPayloadContent, } from '../utils/cached-payload.js';
7
- import { getErrorMessage } from '../utils/error-utils.js';
7
+ import { getErrorMessage } from '../utils/error-details.js';
8
8
  import { isRecord } from '../utils/guards.js';
9
9
  const CACHE_NAMESPACE = 'markdown';
10
10
  const HASH_PATTERN = /^[a-f0-9.]+$/i;
11
+ function resolveCacheParams(params) {
12
+ const parsed = requireRecordParams(params);
13
+ const namespace = requireParamString(parsed, 'namespace');
14
+ const urlHash = requireParamString(parsed, 'urlHash');
15
+ if (!isValidNamespace(namespace) || !isValidHash(urlHash)) {
16
+ throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
17
+ }
18
+ return { namespace, urlHash };
19
+ }
20
+ function requireRecordParams(value) {
21
+ if (!isRecord(value)) {
22
+ throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
23
+ }
24
+ return value;
25
+ }
26
+ function requireParamString(params, key) {
27
+ const raw = params[key];
28
+ const resolved = resolveStringParam(raw);
29
+ if (!resolved) {
30
+ throw new McpError(ErrorCode.InvalidParams, 'Both namespace and urlHash parameters are required');
31
+ }
32
+ return resolved;
33
+ }
34
+ function isValidNamespace(namespace) {
35
+ return namespace === CACHE_NAMESPACE;
36
+ }
37
+ function isValidHash(hash) {
38
+ return HASH_PATTERN.test(hash) && hash.length >= 8 && hash.length <= 64;
39
+ }
40
+ function resolveStringParam(value) {
41
+ return typeof value === 'string' ? value : null;
42
+ }
11
43
  function buildResourceEntry(namespace, urlHash) {
12
44
  return {
13
45
  name: `${namespace}:${urlHash}`,
@@ -42,29 +74,6 @@ export function registerCachedContentResource(server) {
42
74
  registerCacheContentResource(server);
43
75
  registerCacheUpdateSubscription(server);
44
76
  }
45
- function resolveCacheParams(params) {
46
- const parsed = requireRecordParams(params);
47
- const namespace = requireParamString(parsed, 'namespace');
48
- const urlHash = requireParamString(parsed, 'urlHash');
49
- if (!isValidNamespace(namespace) || !isValidHash(urlHash)) {
50
- throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
51
- }
52
- return { namespace, urlHash };
53
- }
54
- function requireRecordParams(value) {
55
- if (!isRecord(value)) {
56
- throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
57
- }
58
- return value;
59
- }
60
- function requireParamString(params, key) {
61
- const raw = params[key];
62
- const resolved = resolveStringParam(raw);
63
- if (!resolved) {
64
- throw new McpError(ErrorCode.InvalidParams, 'Both namespace and urlHash parameters are required');
65
- }
66
- return resolved;
67
- }
68
77
  function buildCachedContentResponse(uri, cacheKey) {
69
78
  const cached = requireCacheEntry(cacheKey);
70
79
  return buildMarkdownContentResponse(uri, cached.content);
@@ -98,15 +107,6 @@ function registerCacheUpdateSubscription(server) {
98
107
  unsubscribe();
99
108
  };
100
109
  }
101
- function isValidNamespace(namespace) {
102
- return namespace === CACHE_NAMESPACE;
103
- }
104
- function isValidHash(hash) {
105
- return HASH_PATTERN.test(hash) && hash.length >= 8 && hash.length <= 64;
106
- }
107
- function resolveStringParam(value) {
108
- return typeof value === 'string' ? value : null;
109
- }
110
110
  function requireCacheEntry(cacheKey) {
111
111
  const cached = cache.get(cacheKey);
112
112
  if (!cached) {
package/dist/server.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
2
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
3
3
  import { config } from './config/index.js';
4
- import { destroyAgents } from './services/fetcher/agents.js';
4
+ import { destroyAgents } from './services/fetcher.js';
5
5
  import { logError, logInfo } from './services/logger.js';
6
6
  import { registerTools } from './tools/index.js';
7
7
  import { registerCachedContentResource } from './resources/cached-content.js';
@@ -0,0 +1,8 @@
1
+ import type { CacheKeyParts } from './cache-keys.js';
2
+ export interface CacheUpdateEvent extends CacheKeyParts {
3
+ cacheKey: string;
4
+ }
5
+ type CacheUpdateListener = (event: CacheUpdateEvent) => void;
6
+ export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
7
+ export declare function notifyCacheUpdate(cacheKey: string): void;
8
+ export {};
@@ -0,0 +1,19 @@
1
+ import { parseCacheKey } from './cache-keys.js';
2
+ const updateListeners = new Set();
3
+ export function onCacheUpdate(listener) {
4
+ updateListeners.add(listener);
5
+ return () => {
6
+ updateListeners.delete(listener);
7
+ };
8
+ }
9
+ export function notifyCacheUpdate(cacheKey) {
10
+ if (updateListeners.size === 0)
11
+ return;
12
+ const parts = parseCacheKey(cacheKey);
13
+ if (!parts)
14
+ return;
15
+ const event = { cacheKey, ...parts };
16
+ for (const listener of updateListeners) {
17
+ listener(event);
18
+ }
19
+ }
@@ -1,14 +1,15 @@
1
1
  import type { CacheEntry } from '../config/types/content.js';
2
- import type { CacheKeyParts } from './cache-keys.js';
3
- interface CacheUpdateEvent extends CacheKeyParts {
2
+ export interface CacheUpdateEvent {
4
3
  cacheKey: string;
4
+ namespace: string;
5
+ urlHash: string;
5
6
  }
7
+ type CacheUpdateListener = (event: CacheUpdateEvent) => void;
8
+ export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
6
9
  interface CacheEntryMetadata {
7
10
  url: string;
8
11
  title?: string;
9
12
  }
10
- type CacheUpdateListener = (event: CacheUpdateEvent) => void;
11
- export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
12
13
  export declare function get(cacheKey: string | null): CacheEntry | undefined;
13
14
  export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
14
15
  export declare function keys(): readonly string[];
@@ -1,10 +1,28 @@
1
1
  import { setInterval as setIntervalPromise } from 'node:timers/promises';
2
2
  import { config } from '../config/index.js';
3
- import { getErrorMessage } from '../utils/error-utils.js';
3
+ import { getErrorMessage } from '../utils/error-details.js';
4
4
  import { parseCacheKey } from './cache-keys.js';
5
5
  import { logWarn } from './logger.js';
6
6
  const contentCache = new Map();
7
7
  let cleanupController = null;
8
+ const updateListeners = new Set();
9
+ export function onCacheUpdate(listener) {
10
+ updateListeners.add(listener);
11
+ return () => {
12
+ updateListeners.delete(listener);
13
+ };
14
+ }
15
+ function notifyCacheUpdate(cacheKey) {
16
+ if (updateListeners.size === 0)
17
+ return;
18
+ const parts = parseCacheKey(cacheKey);
19
+ if (!parts)
20
+ return;
21
+ const event = { cacheKey, ...parts };
22
+ for (const listener of updateListeners) {
23
+ listener(event);
24
+ }
25
+ }
8
26
  function startCleanupLoop() {
9
27
  if (cleanupController)
10
28
  return;
@@ -17,15 +35,14 @@ function startCleanupLoop() {
17
35
  }
18
36
  async function runCleanupLoop(signal) {
19
37
  const intervalMs = Math.floor(config.cache.ttl * 1000);
20
- for await (const _ of setIntervalPromise(intervalMs, undefined, {
38
+ for await (const getNow of setIntervalPromise(intervalMs, Date.now, {
21
39
  signal,
22
40
  ref: false,
23
41
  })) {
24
- enforceCacheLimits();
42
+ enforceCacheLimits(getNow());
25
43
  }
26
44
  }
27
- function enforceCacheLimits() {
28
- const now = Date.now();
45
+ function enforceCacheLimits(now) {
29
46
  for (const [key, item] of contentCache.entries()) {
30
47
  if (now > item.expiresAt) {
31
48
  contentCache.delete(key);
@@ -33,21 +50,6 @@ function enforceCacheLimits() {
33
50
  }
34
51
  trimCacheToMaxKeys();
35
52
  }
36
- const updateListeners = new Set();
37
- export function onCacheUpdate(listener) {
38
- updateListeners.add(listener);
39
- return () => {
40
- updateListeners.delete(listener);
41
- };
42
- }
43
- function emitCacheUpdate(cacheKey) {
44
- const parts = parseCacheKey(cacheKey);
45
- if (!parts)
46
- return;
47
- for (const listener of updateListeners) {
48
- listener({ cacheKey, ...parts });
49
- }
50
- }
51
53
  export function get(cacheKey) {
52
54
  if (!isCacheReadable(cacheKey))
53
55
  return undefined;
@@ -69,16 +71,17 @@ function runCacheOperation(cacheKey, message, operation) {
69
71
  }
70
72
  }
71
73
  function readCacheEntry(cacheKey) {
72
- return readCacheItem(cacheKey)?.entry;
74
+ const now = Date.now();
75
+ return readCacheItem(cacheKey, now)?.entry;
73
76
  }
74
- function isExpired(item) {
75
- return Date.now() > item.expiresAt;
77
+ function isExpired(item, now) {
78
+ return now > item.expiresAt;
76
79
  }
77
- function readCacheItem(cacheKey) {
80
+ function readCacheItem(cacheKey, now) {
78
81
  const item = contentCache.get(cacheKey);
79
82
  if (!item)
80
83
  return undefined;
81
- if (isExpired(item)) {
84
+ if (isExpired(item, now)) {
82
85
  contentCache.delete(cacheKey);
83
86
  return undefined;
84
87
  }
@@ -89,8 +92,15 @@ export function set(cacheKey, content, metadata) {
89
92
  return;
90
93
  runCacheOperation(cacheKey, 'Cache set error', () => {
91
94
  startCleanupLoop();
92
- const entry = buildCacheEntry(content, metadata);
93
- persistCacheEntry(cacheKey, entry);
95
+ const now = Date.now();
96
+ const expiresAtMs = now + config.cache.ttl * 1000;
97
+ const entry = buildCacheEntry({
98
+ content,
99
+ metadata,
100
+ fetchedAtMs: now,
101
+ expiresAtMs,
102
+ });
103
+ persistCacheEntry(cacheKey, entry, expiresAtMs);
94
104
  });
95
105
  }
96
106
  export function keys() {
@@ -99,20 +109,19 @@ export function keys() {
99
109
  export function isEnabled() {
100
110
  return config.cache.enabled;
101
111
  }
102
- function buildCacheEntry(content, metadata) {
112
+ function buildCacheEntry({ content, metadata, fetchedAtMs, expiresAtMs, }) {
103
113
  return {
104
114
  url: metadata.url,
105
115
  content,
106
- fetchedAt: new Date().toISOString(),
107
- expiresAt: new Date(resolveExpiryTimestamp()).toISOString(),
116
+ fetchedAt: new Date(fetchedAtMs).toISOString(),
117
+ expiresAt: new Date(expiresAtMs).toISOString(),
108
118
  ...(metadata.title === undefined ? {} : { title: metadata.title }),
109
119
  };
110
120
  }
111
- function persistCacheEntry(cacheKey, entry) {
112
- const expiresAt = resolveExpiryTimestamp();
113
- contentCache.set(cacheKey, { entry, expiresAt });
121
+ function persistCacheEntry(cacheKey, entry, expiresAtMs) {
122
+ contentCache.set(cacheKey, { entry, expiresAt: expiresAtMs });
114
123
  trimCacheToMaxKeys();
115
- emitCacheUpdate(cacheKey);
124
+ notifyCacheUpdate(cacheKey);
116
125
  }
117
126
  function trimCacheToMaxKeys() {
118
127
  if (contentCache.size <= config.cache.maxKeys)
@@ -120,19 +129,14 @@ function trimCacheToMaxKeys() {
120
129
  removeOldestEntries(contentCache.size - config.cache.maxKeys);
121
130
  }
122
131
  function removeOldestEntries(count) {
123
- if (count <= 0)
124
- return;
125
- let removed = 0;
126
- for (const key of contentCache.keys()) {
127
- contentCache.delete(key);
128
- removed += 1;
129
- if (removed >= count)
130
- return;
132
+ const iterator = contentCache.keys();
133
+ for (let removed = 0; removed < count; removed += 1) {
134
+ const next = iterator.next();
135
+ if (next.done)
136
+ break;
137
+ contentCache.delete(next.value);
131
138
  }
132
139
  }
133
- function resolveExpiryTimestamp() {
134
- return Date.now() + config.cache.ttl * 1000;
135
- }
136
140
  function logCacheError(message, cacheKey, error) {
137
141
  logWarn(message, {
138
142
  key: cacheKey.length > 100 ? cacheKey.slice(0, 100) : cacheKey,
@@ -1,6 +1,6 @@
1
1
  import { parseHTML } from 'linkedom';
2
2
  import { Readability } from '@mozilla/readability';
3
- import { getErrorMessage } from '../utils/error-utils.js';
3
+ import { getErrorMessage } from '../utils/error-details.js';
4
4
  import { isRecord } from '../utils/guards.js';
5
5
  import { truncateHtml } from '../utils/html-truncator.js';
6
6
  import { logError, logInfo, logWarn } from './logger.js';
@@ -8,21 +8,21 @@ import { extractMetadata } from './metadata-collector.js';
8
8
  function isReadabilityCompatible(doc) {
9
9
  if (!isRecord(doc))
10
10
  return false;
11
- if (!('documentElement' in doc))
12
- return false;
13
- if (typeof doc.querySelectorAll !== 'function')
14
- return false;
15
- if (typeof doc.querySelector !== 'function')
16
- return false;
17
- return true;
11
+ return hasDocumentElement(doc) && hasQuerySelectors(doc);
12
+ }
13
+ function hasDocumentElement(record) {
14
+ return 'documentElement' in record;
15
+ }
16
+ function hasQuerySelectors(record) {
17
+ return (typeof record.querySelectorAll === 'function' &&
18
+ typeof record.querySelector === 'function');
18
19
  }
19
20
  function extractArticle(document) {
20
21
  if (!isReadabilityCompatible(document)) {
21
22
  logWarn('Document not compatible with Readability');
22
23
  return null;
23
24
  }
24
- const parsed = parseReadabilityArticle(document);
25
- return parsed ? mapReadabilityResult(parsed) : null;
25
+ return mapParsedArticle(parseReadabilityArticle(document));
26
26
  }
27
27
  function parseReadabilityArticle(document) {
28
28
  try {
@@ -31,31 +31,38 @@ function parseReadabilityArticle(document) {
31
31
  return reader.parse();
32
32
  }
33
33
  catch (error) {
34
- logError('Failed to extract article with Readability', error instanceof Error ? error : undefined);
34
+ logError('Failed to extract article with Readability', asError(error));
35
35
  return null;
36
36
  }
37
37
  }
38
+ function asError(error) {
39
+ if (error instanceof Error) {
40
+ return error;
41
+ }
42
+ return undefined;
43
+ }
44
+ function mapParsedArticle(parsed) {
45
+ return parsed ? mapReadabilityResult(parsed) : null;
46
+ }
38
47
  function mapReadabilityResult(parsed) {
39
- const article = {
48
+ return {
40
49
  content: parsed.content ?? '',
41
50
  textContent: parsed.textContent ?? '',
51
+ ...buildOptionalArticleFields(parsed),
42
52
  };
43
- const title = toOptional(parsed.title);
44
- if (title !== undefined)
45
- article.title = title;
46
- const byline = toOptional(parsed.byline);
47
- if (byline !== undefined)
48
- article.byline = byline;
49
- const excerpt = toOptional(parsed.excerpt);
50
- if (excerpt !== undefined)
51
- article.excerpt = excerpt;
52
- const siteName = toOptional(parsed.siteName);
53
- if (siteName !== undefined)
54
- article.siteName = siteName;
55
- return article;
56
- }
57
- function toOptional(value) {
58
- return value ?? undefined;
53
+ }
54
+ function buildOptionalArticleFields(parsed) {
55
+ const optional = {};
56
+ addOptionalField(optional, 'title', parsed.title);
57
+ addOptionalField(optional, 'byline', parsed.byline);
58
+ addOptionalField(optional, 'excerpt', parsed.excerpt);
59
+ addOptionalField(optional, 'siteName', parsed.siteName);
60
+ return optional;
61
+ }
62
+ function addOptionalField(target, key, value) {
63
+ if (value == null)
64
+ return;
65
+ target[key] = value;
59
66
  }
60
67
  export function extractContent(html, url, options = { extractArticle: true }) {
61
68
  if (!isValidInput(html, url)) {
@@ -69,7 +76,7 @@ function tryExtractContent(html, url, options) {
69
76
  applyBaseUri(document, url);
70
77
  const metadata = extractMetadata(document);
71
78
  return {
72
- article: options.extractArticle ? extractArticle(document) : null,
79
+ article: resolveArticleExtraction(document, options.extractArticle),
73
80
  metadata,
74
81
  };
75
82
  }
@@ -79,15 +86,19 @@ function tryExtractContent(html, url, options) {
79
86
  }
80
87
  }
81
88
  function isValidInput(html, url) {
82
- if (!html || typeof html !== 'string') {
83
- logWarn('extractContent called with invalid HTML input');
84
- return false;
85
- }
86
- if (!url || typeof url !== 'string') {
87
- logWarn('extractContent called with invalid URL');
88
- return false;
89
- }
90
- return true;
89
+ return (validateRequiredString(html, 'extractContent called with invalid HTML input') && validateRequiredString(url, 'extractContent called with invalid URL'));
90
+ }
91
+ function validateRequiredString(value, message) {
92
+ if (isNonEmptyString(value))
93
+ return true;
94
+ logWarn(message);
95
+ return false;
96
+ }
97
+ function isNonEmptyString(value) {
98
+ return typeof value === 'string' && value.length > 0;
99
+ }
100
+ function resolveArticleExtraction(document, shouldExtract) {
101
+ return shouldExtract ? extractArticle(document) : null;
91
102
  }
92
103
  function applyBaseUri(document, url) {
93
104
  try {
@@ -1,7 +1,7 @@
1
1
  import dns from 'node:dns';
2
2
  import os from 'node:os';
3
3
  import { Agent } from 'undici';
4
- import { createErrorWithCode } from '../../utils/error-utils.js';
4
+ import { createErrorWithCode } from '../../utils/error-details.js';
5
5
  import { isRecord } from '../../utils/guards.js';
6
6
  import { handleLookupResult } from './dns-selection.js';
7
7
  const DNS_LOOKUP_TIMEOUT_MS = 5000;
@@ -1,4 +1,4 @@
1
- import { createErrorWithCode } from '../../utils/error-utils.js';
1
+ import { createErrorWithCode } from '../../utils/error-details.js';
2
2
  import { isBlockedIp } from '../../utils/url-validator.js';
3
3
  function normalizeLookupResults(addresses, family) {
4
4
  if (Array.isArray(addresses)) {
@@ -1,7 +1,7 @@
1
1
  import { randomUUID } from 'node:crypto';
2
2
  import diagnosticsChannel from 'node:diagnostics_channel';
3
3
  import { performance } from 'node:perf_hooks';
4
- import { isSystemError } from '../../utils/error-utils.js';
4
+ import { isSystemError } from '../../utils/error-details.js';
5
5
  import { logDebug, logError, logWarn } from '../logger.js';
6
6
  const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
7
7
  function redactUrl(rawUrl) {
@@ -27,7 +27,14 @@ function publishFetchEvent(event) {
27
27
  // Avoid crashing the publisher if a subscriber throws.
28
28
  }
29
29
  }
30
- function publishAndLogFetchStart(context) {
30
+ export function startFetchTelemetry(url, method) {
31
+ const safeUrl = redactUrl(url);
32
+ const context = {
33
+ requestId: randomUUID(),
34
+ startTime: performance.now(),
35
+ url: safeUrl,
36
+ method: method.toUpperCase(),
37
+ };
31
38
  publishFetchEvent({
32
39
  v: 1,
33
40
  type: 'start',
@@ -40,65 +47,40 @@ function publishAndLogFetchStart(context) {
40
47
  method: context.method,
41
48
  url: context.url,
42
49
  });
43
- }
44
- export function startFetchTelemetry(url, method) {
45
- const safeUrl = redactUrl(url);
46
- const context = {
47
- requestId: randomUUID(),
48
- startTime: performance.now(),
49
- url: safeUrl,
50
- method: method.toUpperCase(),
51
- };
52
- publishAndLogFetchStart(context);
53
50
  return context;
54
51
  }
55
52
  export function recordFetchResponse(context, response, contentSize) {
56
53
  const duration = performance.now() - context.startTime;
57
- publishFetchEnd(context, response.status, duration);
58
- logDebug('HTTP Response', {
59
- requestId: context.requestId,
60
- status: response.status,
61
- url: context.url,
62
- ...buildResponseMeta(response, contentSize, duration),
63
- });
64
- logSlowRequestIfNeeded(context, duration);
65
- }
66
- function publishFetchEnd(context, status, duration) {
54
+ const durationLabel = `${Math.round(duration)}ms`;
67
55
  publishFetchEvent({
68
56
  v: 1,
69
57
  type: 'end',
70
58
  requestId: context.requestId,
71
- status,
59
+ status: response.status,
72
60
  duration,
73
61
  });
74
- }
75
- function buildResponseMeta(response, contentSize, duration) {
76
- const contentLength = response.headers.get('content-length') ?? contentSize?.toString();
77
- const meta = {
78
- duration: `${Math.round(duration)}ms`,
79
- };
80
62
  const contentType = response.headers.get('content-type');
81
- if (contentType !== null) {
82
- meta.contentType = contentType;
83
- }
84
- if (contentLength !== undefined) {
85
- meta.size = contentLength;
86
- }
87
- return meta;
88
- }
89
- function logSlowRequestIfNeeded(context, duration) {
90
- if (duration <= 5000)
91
- return;
92
- logWarn('Slow HTTP request detected', {
63
+ const contentLength = response.headers.get('content-length') ??
64
+ (contentSize === undefined ? undefined : String(contentSize));
65
+ logDebug('HTTP Response', {
93
66
  requestId: context.requestId,
67
+ status: response.status,
94
68
  url: context.url,
95
- duration: `${Math.round(duration)}ms`,
69
+ duration: durationLabel,
70
+ ...(contentType ? { contentType } : {}),
71
+ ...(contentLength ? { size: contentLength } : {}),
96
72
  });
73
+ if (duration > 5000) {
74
+ logWarn('Slow HTTP request detected', {
75
+ requestId: context.requestId,
76
+ url: context.url,
77
+ duration: durationLabel,
78
+ });
79
+ }
97
80
  }
98
- function normalizeError(error) {
99
- return error instanceof Error ? error : new Error(String(error));
100
- }
101
- function buildFetchErrorEvent(context, err, duration, status) {
81
+ export function recordFetchError(context, error, status) {
82
+ const duration = performance.now() - context.startTime;
83
+ const err = error instanceof Error ? error : new Error(String(error));
102
84
  const event = {
103
85
  v: 1,
104
86
  type: 'error',
@@ -107,10 +89,6 @@ function buildFetchErrorEvent(context, err, duration, status) {
107
89
  error: err.message,
108
90
  duration,
109
91
  };
110
- addOptionalErrorFields(event, err, status);
111
- return event;
112
- }
113
- function addOptionalErrorFields(event, err, status) {
114
92
  const code = isSystemError(err) ? err.code : undefined;
115
93
  if (code !== undefined) {
116
94
  event.code = code;
@@ -118,17 +96,8 @@ function addOptionalErrorFields(event, err, status) {
118
96
  if (status !== undefined) {
119
97
  event.status = status;
120
98
  }
121
- }
122
- function selectErrorLogger(status) {
123
- return status === 429 ? logWarn : logError;
124
- }
125
- export function recordFetchError(context, error, status) {
126
- const duration = performance.now() - context.startTime;
127
- const err = normalizeError(error);
128
- const event = buildFetchErrorEvent(context, err, duration, status);
129
99
  publishFetchEvent(event);
130
- const log = selectErrorLogger(status);
131
- const code = isSystemError(err) ? err.code : undefined;
100
+ const log = status === 429 ? logWarn : logError;
132
101
  log('HTTP Request Error', {
133
102
  requestId: context.requestId,
134
103
  url: context.url,