@j0hanz/superfetch 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -17
- package/dist/config/index.js +11 -6
- package/dist/http/auth.js +161 -2
- package/dist/http/host-allowlist.d.ts +3 -0
- package/dist/http/host-allowlist.js +117 -0
- package/dist/http/mcp-routes.d.ts +8 -2
- package/dist/http/mcp-routes.js +101 -8
- package/dist/http/mcp-session-eviction.d.ts +3 -0
- package/dist/http/mcp-session-eviction.js +24 -0
- package/dist/http/mcp-session-init.d.ts +7 -0
- package/dist/http/mcp-session-init.js +94 -0
- package/dist/http/mcp-session-slots.d.ts +17 -0
- package/dist/http/mcp-session-slots.js +55 -0
- package/dist/http/mcp-session-transport-init.d.ts +7 -0
- package/dist/http/mcp-session-transport-init.js +41 -0
- package/dist/http/mcp-session-types.d.ts +5 -0
- package/dist/http/mcp-session-types.js +1 -0
- package/dist/http/mcp-session.d.ts +9 -9
- package/dist/http/mcp-session.js +5 -114
- package/dist/http/mcp-sessions.d.ts +43 -0
- package/dist/http/mcp-sessions.js +392 -0
- package/dist/http/rate-limit.js +2 -2
- package/dist/http/server-middleware.d.ts +6 -1
- package/dist/http/server-middleware.js +3 -117
- package/dist/http/server-shutdown.js +1 -1
- package/dist/http/server.d.ts +10 -0
- package/dist/http/server.js +508 -11
- package/dist/http/session-cleanup.js +8 -5
- package/dist/middleware/error-handler.d.ts +1 -1
- package/dist/middleware/error-handler.js +31 -30
- package/dist/resources/cached-content-params.d.ts +5 -0
- package/dist/resources/cached-content-params.js +36 -0
- package/dist/resources/cached-content.js +33 -33
- package/dist/server.js +1 -1
- package/dist/services/cache-events.d.ts +8 -0
- package/dist/services/cache-events.js +19 -0
- package/dist/services/cache.d.ts +5 -4
- package/dist/services/cache.js +49 -45
- package/dist/services/extractor.js +49 -38
- package/dist/services/fetcher/agents.js +1 -1
- package/dist/services/fetcher/dns-selection.js +1 -1
- package/dist/services/fetcher/interceptors.js +29 -60
- package/dist/services/fetcher/redirects.js +12 -4
- package/dist/services/fetcher/response.js +18 -8
- package/dist/services/fetcher.d.ts +21 -0
- package/dist/services/fetcher.js +532 -13
- package/dist/tools/handlers/fetch-single.shared.d.ts +11 -3
- package/dist/tools/handlers/fetch-single.shared.js +131 -2
- package/dist/tools/handlers/fetch-url.tool.d.ts +6 -0
- package/dist/tools/handlers/fetch-url.tool.js +48 -6
- package/dist/tools/utils/content-shaping.js +19 -4
- package/dist/tools/utils/content-transform.d.ts +4 -1
- package/dist/tools/utils/content-transform.js +110 -96
- package/dist/tools/utils/fetch-pipeline.js +47 -56
- package/dist/tools/utils/frontmatter.d.ts +3 -0
- package/dist/tools/utils/frontmatter.js +73 -0
- package/dist/tools/utils/markdown-heuristics.d.ts +1 -0
- package/dist/tools/utils/markdown-heuristics.js +19 -0
- package/dist/tools/utils/markdown-signals.d.ts +1 -0
- package/dist/tools/utils/markdown-signals.js +19 -0
- package/dist/tools/utils/raw-markdown-frontmatter.d.ts +3 -0
- package/dist/tools/utils/raw-markdown-frontmatter.js +73 -0
- package/dist/tools/utils/raw-markdown.d.ts +6 -0
- package/dist/tools/utils/raw-markdown.js +135 -0
- package/dist/transformers/markdown/fenced-code-rule.d.ts +2 -0
- package/dist/transformers/markdown/fenced-code-rule.js +38 -0
- package/dist/transformers/markdown/frontmatter.d.ts +2 -0
- package/dist/transformers/markdown/frontmatter.js +45 -0
- package/dist/transformers/markdown/noise-rule.d.ts +2 -0
- package/dist/transformers/markdown/noise-rule.js +80 -0
- package/dist/transformers/markdown/turndown-instance.d.ts +2 -0
- package/dist/transformers/markdown/turndown-instance.js +19 -0
- package/dist/transformers/markdown.d.ts +2 -0
- package/dist/transformers/markdown.js +185 -0
- package/dist/transformers/markdown.transformer.js +2 -189
- package/dist/utils/code-language-bash.d.ts +1 -0
- package/dist/utils/code-language-bash.js +48 -0
- package/dist/utils/code-language-core.d.ts +2 -0
- package/dist/utils/code-language-core.js +13 -0
- package/dist/utils/code-language-detectors.d.ts +5 -0
- package/dist/utils/code-language-detectors.js +142 -0
- package/dist/utils/code-language-helpers.d.ts +5 -0
- package/dist/utils/code-language-helpers.js +62 -0
- package/dist/utils/code-language-parsing.d.ts +5 -0
- package/dist/utils/code-language-parsing.js +62 -0
- package/dist/utils/code-language.d.ts +9 -0
- package/dist/utils/code-language.js +250 -46
- package/dist/utils/error-details.d.ts +3 -0
- package/dist/utils/error-details.js +12 -0
- package/dist/utils/filename-generator.js +14 -3
- package/dist/utils/ip-address.d.ts +4 -0
- package/dist/utils/ip-address.js +6 -0
- package/dist/utils/tool-error-handler.js +12 -17
- package/dist/utils/url-validator.js +33 -21
- package/package.json +7 -5
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
|
|
2
|
+
import { isRecord } from '../utils/guards.js';
|
|
3
|
+
export const CACHE_NAMESPACE = 'markdown';
|
|
4
|
+
const HASH_PATTERN = /^[a-f0-9.]+$/i;
|
|
5
|
+
export function resolveCacheParams(params) {
|
|
6
|
+
const parsed = requireRecordParams(params);
|
|
7
|
+
const namespace = requireParamString(parsed, 'namespace');
|
|
8
|
+
const urlHash = requireParamString(parsed, 'urlHash');
|
|
9
|
+
if (!isValidNamespace(namespace) || !isValidHash(urlHash)) {
|
|
10
|
+
throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
|
|
11
|
+
}
|
|
12
|
+
return { namespace, urlHash };
|
|
13
|
+
}
|
|
14
|
+
function requireRecordParams(value) {
|
|
15
|
+
if (!isRecord(value)) {
|
|
16
|
+
throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
|
|
17
|
+
}
|
|
18
|
+
return value;
|
|
19
|
+
}
|
|
20
|
+
function requireParamString(params, key) {
|
|
21
|
+
const raw = params[key];
|
|
22
|
+
const resolved = resolveStringParam(raw);
|
|
23
|
+
if (!resolved) {
|
|
24
|
+
throw new McpError(ErrorCode.InvalidParams, 'Both namespace and urlHash parameters are required');
|
|
25
|
+
}
|
|
26
|
+
return resolved;
|
|
27
|
+
}
|
|
28
|
+
function isValidNamespace(namespace) {
|
|
29
|
+
return namespace === CACHE_NAMESPACE;
|
|
30
|
+
}
|
|
31
|
+
function isValidHash(hash) {
|
|
32
|
+
return HASH_PATTERN.test(hash) && hash.length >= 8 && hash.length <= 64;
|
|
33
|
+
}
|
|
34
|
+
function resolveStringParam(value) {
|
|
35
|
+
return typeof value === 'string' ? value : null;
|
|
36
|
+
}
|
|
@@ -4,10 +4,42 @@ import * as cache from '../services/cache.js';
|
|
|
4
4
|
import { parseCacheKey, toResourceUri } from '../services/cache-keys.js';
|
|
5
5
|
import { logWarn } from '../services/logger.js';
|
|
6
6
|
import { parseCachedPayload, resolveCachedPayloadContent, } from '../utils/cached-payload.js';
|
|
7
|
-
import { getErrorMessage } from '../utils/error-
|
|
7
|
+
import { getErrorMessage } from '../utils/error-details.js';
|
|
8
8
|
import { isRecord } from '../utils/guards.js';
|
|
9
9
|
const CACHE_NAMESPACE = 'markdown';
|
|
10
10
|
const HASH_PATTERN = /^[a-f0-9.]+$/i;
|
|
11
|
+
function resolveCacheParams(params) {
|
|
12
|
+
const parsed = requireRecordParams(params);
|
|
13
|
+
const namespace = requireParamString(parsed, 'namespace');
|
|
14
|
+
const urlHash = requireParamString(parsed, 'urlHash');
|
|
15
|
+
if (!isValidNamespace(namespace) || !isValidHash(urlHash)) {
|
|
16
|
+
throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
|
|
17
|
+
}
|
|
18
|
+
return { namespace, urlHash };
|
|
19
|
+
}
|
|
20
|
+
function requireRecordParams(value) {
|
|
21
|
+
if (!isRecord(value)) {
|
|
22
|
+
throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
|
|
23
|
+
}
|
|
24
|
+
return value;
|
|
25
|
+
}
|
|
26
|
+
function requireParamString(params, key) {
|
|
27
|
+
const raw = params[key];
|
|
28
|
+
const resolved = resolveStringParam(raw);
|
|
29
|
+
if (!resolved) {
|
|
30
|
+
throw new McpError(ErrorCode.InvalidParams, 'Both namespace and urlHash parameters are required');
|
|
31
|
+
}
|
|
32
|
+
return resolved;
|
|
33
|
+
}
|
|
34
|
+
function isValidNamespace(namespace) {
|
|
35
|
+
return namespace === CACHE_NAMESPACE;
|
|
36
|
+
}
|
|
37
|
+
function isValidHash(hash) {
|
|
38
|
+
return HASH_PATTERN.test(hash) && hash.length >= 8 && hash.length <= 64;
|
|
39
|
+
}
|
|
40
|
+
function resolveStringParam(value) {
|
|
41
|
+
return typeof value === 'string' ? value : null;
|
|
42
|
+
}
|
|
11
43
|
function buildResourceEntry(namespace, urlHash) {
|
|
12
44
|
return {
|
|
13
45
|
name: `${namespace}:${urlHash}`,
|
|
@@ -42,29 +74,6 @@ export function registerCachedContentResource(server) {
|
|
|
42
74
|
registerCacheContentResource(server);
|
|
43
75
|
registerCacheUpdateSubscription(server);
|
|
44
76
|
}
|
|
45
|
-
function resolveCacheParams(params) {
|
|
46
|
-
const parsed = requireRecordParams(params);
|
|
47
|
-
const namespace = requireParamString(parsed, 'namespace');
|
|
48
|
-
const urlHash = requireParamString(parsed, 'urlHash');
|
|
49
|
-
if (!isValidNamespace(namespace) || !isValidHash(urlHash)) {
|
|
50
|
-
throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
|
|
51
|
-
}
|
|
52
|
-
return { namespace, urlHash };
|
|
53
|
-
}
|
|
54
|
-
function requireRecordParams(value) {
|
|
55
|
-
if (!isRecord(value)) {
|
|
56
|
-
throw new McpError(ErrorCode.InvalidParams, 'Invalid cache resource parameters');
|
|
57
|
-
}
|
|
58
|
-
return value;
|
|
59
|
-
}
|
|
60
|
-
function requireParamString(params, key) {
|
|
61
|
-
const raw = params[key];
|
|
62
|
-
const resolved = resolveStringParam(raw);
|
|
63
|
-
if (!resolved) {
|
|
64
|
-
throw new McpError(ErrorCode.InvalidParams, 'Both namespace and urlHash parameters are required');
|
|
65
|
-
}
|
|
66
|
-
return resolved;
|
|
67
|
-
}
|
|
68
77
|
function buildCachedContentResponse(uri, cacheKey) {
|
|
69
78
|
const cached = requireCacheEntry(cacheKey);
|
|
70
79
|
return buildMarkdownContentResponse(uri, cached.content);
|
|
@@ -98,15 +107,6 @@ function registerCacheUpdateSubscription(server) {
|
|
|
98
107
|
unsubscribe();
|
|
99
108
|
};
|
|
100
109
|
}
|
|
101
|
-
function isValidNamespace(namespace) {
|
|
102
|
-
return namespace === CACHE_NAMESPACE;
|
|
103
|
-
}
|
|
104
|
-
function isValidHash(hash) {
|
|
105
|
-
return HASH_PATTERN.test(hash) && hash.length >= 8 && hash.length <= 64;
|
|
106
|
-
}
|
|
107
|
-
function resolveStringParam(value) {
|
|
108
|
-
return typeof value === 'string' ? value : null;
|
|
109
|
-
}
|
|
110
110
|
function requireCacheEntry(cacheKey) {
|
|
111
111
|
const cached = cache.get(cacheKey);
|
|
112
112
|
if (!cached) {
|
package/dist/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
2
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
3
3
|
import { config } from './config/index.js';
|
|
4
|
-
import { destroyAgents } from './services/fetcher
|
|
4
|
+
import { destroyAgents } from './services/fetcher.js';
|
|
5
5
|
import { logError, logInfo } from './services/logger.js';
|
|
6
6
|
import { registerTools } from './tools/index.js';
|
|
7
7
|
import { registerCachedContentResource } from './resources/cached-content.js';
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { CacheKeyParts } from './cache-keys.js';
|
|
2
|
+
export interface CacheUpdateEvent extends CacheKeyParts {
|
|
3
|
+
cacheKey: string;
|
|
4
|
+
}
|
|
5
|
+
type CacheUpdateListener = (event: CacheUpdateEvent) => void;
|
|
6
|
+
export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
|
|
7
|
+
export declare function notifyCacheUpdate(cacheKey: string): void;
|
|
8
|
+
export {};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { parseCacheKey } from './cache-keys.js';
|
|
2
|
+
const updateListeners = new Set();
|
|
3
|
+
export function onCacheUpdate(listener) {
|
|
4
|
+
updateListeners.add(listener);
|
|
5
|
+
return () => {
|
|
6
|
+
updateListeners.delete(listener);
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
export function notifyCacheUpdate(cacheKey) {
|
|
10
|
+
if (updateListeners.size === 0)
|
|
11
|
+
return;
|
|
12
|
+
const parts = parseCacheKey(cacheKey);
|
|
13
|
+
if (!parts)
|
|
14
|
+
return;
|
|
15
|
+
const event = { cacheKey, ...parts };
|
|
16
|
+
for (const listener of updateListeners) {
|
|
17
|
+
listener(event);
|
|
18
|
+
}
|
|
19
|
+
}
|
package/dist/services/cache.d.ts
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import type { CacheEntry } from '../config/types/content.js';
|
|
2
|
-
|
|
3
|
-
interface CacheUpdateEvent extends CacheKeyParts {
|
|
2
|
+
export interface CacheUpdateEvent {
|
|
4
3
|
cacheKey: string;
|
|
4
|
+
namespace: string;
|
|
5
|
+
urlHash: string;
|
|
5
6
|
}
|
|
7
|
+
type CacheUpdateListener = (event: CacheUpdateEvent) => void;
|
|
8
|
+
export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
|
|
6
9
|
interface CacheEntryMetadata {
|
|
7
10
|
url: string;
|
|
8
11
|
title?: string;
|
|
9
12
|
}
|
|
10
|
-
type CacheUpdateListener = (event: CacheUpdateEvent) => void;
|
|
11
|
-
export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
|
|
12
13
|
export declare function get(cacheKey: string | null): CacheEntry | undefined;
|
|
13
14
|
export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata): void;
|
|
14
15
|
export declare function keys(): readonly string[];
|
package/dist/services/cache.js
CHANGED
|
@@ -1,10 +1,28 @@
|
|
|
1
1
|
import { setInterval as setIntervalPromise } from 'node:timers/promises';
|
|
2
2
|
import { config } from '../config/index.js';
|
|
3
|
-
import { getErrorMessage } from '../utils/error-
|
|
3
|
+
import { getErrorMessage } from '../utils/error-details.js';
|
|
4
4
|
import { parseCacheKey } from './cache-keys.js';
|
|
5
5
|
import { logWarn } from './logger.js';
|
|
6
6
|
const contentCache = new Map();
|
|
7
7
|
let cleanupController = null;
|
|
8
|
+
const updateListeners = new Set();
|
|
9
|
+
export function onCacheUpdate(listener) {
|
|
10
|
+
updateListeners.add(listener);
|
|
11
|
+
return () => {
|
|
12
|
+
updateListeners.delete(listener);
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function notifyCacheUpdate(cacheKey) {
|
|
16
|
+
if (updateListeners.size === 0)
|
|
17
|
+
return;
|
|
18
|
+
const parts = parseCacheKey(cacheKey);
|
|
19
|
+
if (!parts)
|
|
20
|
+
return;
|
|
21
|
+
const event = { cacheKey, ...parts };
|
|
22
|
+
for (const listener of updateListeners) {
|
|
23
|
+
listener(event);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
8
26
|
function startCleanupLoop() {
|
|
9
27
|
if (cleanupController)
|
|
10
28
|
return;
|
|
@@ -17,15 +35,14 @@ function startCleanupLoop() {
|
|
|
17
35
|
}
|
|
18
36
|
async function runCleanupLoop(signal) {
|
|
19
37
|
const intervalMs = Math.floor(config.cache.ttl * 1000);
|
|
20
|
-
for await (const
|
|
38
|
+
for await (const getNow of setIntervalPromise(intervalMs, Date.now, {
|
|
21
39
|
signal,
|
|
22
40
|
ref: false,
|
|
23
41
|
})) {
|
|
24
|
-
enforceCacheLimits();
|
|
42
|
+
enforceCacheLimits(getNow());
|
|
25
43
|
}
|
|
26
44
|
}
|
|
27
|
-
function enforceCacheLimits() {
|
|
28
|
-
const now = Date.now();
|
|
45
|
+
function enforceCacheLimits(now) {
|
|
29
46
|
for (const [key, item] of contentCache.entries()) {
|
|
30
47
|
if (now > item.expiresAt) {
|
|
31
48
|
contentCache.delete(key);
|
|
@@ -33,21 +50,6 @@ function enforceCacheLimits() {
|
|
|
33
50
|
}
|
|
34
51
|
trimCacheToMaxKeys();
|
|
35
52
|
}
|
|
36
|
-
const updateListeners = new Set();
|
|
37
|
-
export function onCacheUpdate(listener) {
|
|
38
|
-
updateListeners.add(listener);
|
|
39
|
-
return () => {
|
|
40
|
-
updateListeners.delete(listener);
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
function emitCacheUpdate(cacheKey) {
|
|
44
|
-
const parts = parseCacheKey(cacheKey);
|
|
45
|
-
if (!parts)
|
|
46
|
-
return;
|
|
47
|
-
for (const listener of updateListeners) {
|
|
48
|
-
listener({ cacheKey, ...parts });
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
53
|
export function get(cacheKey) {
|
|
52
54
|
if (!isCacheReadable(cacheKey))
|
|
53
55
|
return undefined;
|
|
@@ -69,16 +71,17 @@ function runCacheOperation(cacheKey, message, operation) {
|
|
|
69
71
|
}
|
|
70
72
|
}
|
|
71
73
|
function readCacheEntry(cacheKey) {
|
|
72
|
-
|
|
74
|
+
const now = Date.now();
|
|
75
|
+
return readCacheItem(cacheKey, now)?.entry;
|
|
73
76
|
}
|
|
74
|
-
function isExpired(item) {
|
|
75
|
-
return
|
|
77
|
+
function isExpired(item, now) {
|
|
78
|
+
return now > item.expiresAt;
|
|
76
79
|
}
|
|
77
|
-
function readCacheItem(cacheKey) {
|
|
80
|
+
function readCacheItem(cacheKey, now) {
|
|
78
81
|
const item = contentCache.get(cacheKey);
|
|
79
82
|
if (!item)
|
|
80
83
|
return undefined;
|
|
81
|
-
if (isExpired(item)) {
|
|
84
|
+
if (isExpired(item, now)) {
|
|
82
85
|
contentCache.delete(cacheKey);
|
|
83
86
|
return undefined;
|
|
84
87
|
}
|
|
@@ -89,8 +92,15 @@ export function set(cacheKey, content, metadata) {
|
|
|
89
92
|
return;
|
|
90
93
|
runCacheOperation(cacheKey, 'Cache set error', () => {
|
|
91
94
|
startCleanupLoop();
|
|
92
|
-
const
|
|
93
|
-
|
|
95
|
+
const now = Date.now();
|
|
96
|
+
const expiresAtMs = now + config.cache.ttl * 1000;
|
|
97
|
+
const entry = buildCacheEntry({
|
|
98
|
+
content,
|
|
99
|
+
metadata,
|
|
100
|
+
fetchedAtMs: now,
|
|
101
|
+
expiresAtMs,
|
|
102
|
+
});
|
|
103
|
+
persistCacheEntry(cacheKey, entry, expiresAtMs);
|
|
94
104
|
});
|
|
95
105
|
}
|
|
96
106
|
export function keys() {
|
|
@@ -99,20 +109,19 @@ export function keys() {
|
|
|
99
109
|
export function isEnabled() {
|
|
100
110
|
return config.cache.enabled;
|
|
101
111
|
}
|
|
102
|
-
function buildCacheEntry(content, metadata) {
|
|
112
|
+
function buildCacheEntry({ content, metadata, fetchedAtMs, expiresAtMs, }) {
|
|
103
113
|
return {
|
|
104
114
|
url: metadata.url,
|
|
105
115
|
content,
|
|
106
|
-
fetchedAt: new Date().toISOString(),
|
|
107
|
-
expiresAt: new Date(
|
|
116
|
+
fetchedAt: new Date(fetchedAtMs).toISOString(),
|
|
117
|
+
expiresAt: new Date(expiresAtMs).toISOString(),
|
|
108
118
|
...(metadata.title === undefined ? {} : { title: metadata.title }),
|
|
109
119
|
};
|
|
110
120
|
}
|
|
111
|
-
function persistCacheEntry(cacheKey, entry) {
|
|
112
|
-
|
|
113
|
-
contentCache.set(cacheKey, { entry, expiresAt });
|
|
121
|
+
function persistCacheEntry(cacheKey, entry, expiresAtMs) {
|
|
122
|
+
contentCache.set(cacheKey, { entry, expiresAt: expiresAtMs });
|
|
114
123
|
trimCacheToMaxKeys();
|
|
115
|
-
|
|
124
|
+
notifyCacheUpdate(cacheKey);
|
|
116
125
|
}
|
|
117
126
|
function trimCacheToMaxKeys() {
|
|
118
127
|
if (contentCache.size <= config.cache.maxKeys)
|
|
@@ -120,19 +129,14 @@ function trimCacheToMaxKeys() {
|
|
|
120
129
|
removeOldestEntries(contentCache.size - config.cache.maxKeys);
|
|
121
130
|
}
|
|
122
131
|
function removeOldestEntries(count) {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
if (removed >= count)
|
|
130
|
-
return;
|
|
132
|
+
const iterator = contentCache.keys();
|
|
133
|
+
for (let removed = 0; removed < count; removed += 1) {
|
|
134
|
+
const next = iterator.next();
|
|
135
|
+
if (next.done)
|
|
136
|
+
break;
|
|
137
|
+
contentCache.delete(next.value);
|
|
131
138
|
}
|
|
132
139
|
}
|
|
133
|
-
function resolveExpiryTimestamp() {
|
|
134
|
-
return Date.now() + config.cache.ttl * 1000;
|
|
135
|
-
}
|
|
136
140
|
function logCacheError(message, cacheKey, error) {
|
|
137
141
|
logWarn(message, {
|
|
138
142
|
key: cacheKey.length > 100 ? cacheKey.slice(0, 100) : cacheKey,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { parseHTML } from 'linkedom';
|
|
2
2
|
import { Readability } from '@mozilla/readability';
|
|
3
|
-
import { getErrorMessage } from '../utils/error-
|
|
3
|
+
import { getErrorMessage } from '../utils/error-details.js';
|
|
4
4
|
import { isRecord } from '../utils/guards.js';
|
|
5
5
|
import { truncateHtml } from '../utils/html-truncator.js';
|
|
6
6
|
import { logError, logInfo, logWarn } from './logger.js';
|
|
@@ -8,21 +8,21 @@ import { extractMetadata } from './metadata-collector.js';
|
|
|
8
8
|
function isReadabilityCompatible(doc) {
|
|
9
9
|
if (!isRecord(doc))
|
|
10
10
|
return false;
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
return
|
|
11
|
+
return hasDocumentElement(doc) && hasQuerySelectors(doc);
|
|
12
|
+
}
|
|
13
|
+
function hasDocumentElement(record) {
|
|
14
|
+
return 'documentElement' in record;
|
|
15
|
+
}
|
|
16
|
+
function hasQuerySelectors(record) {
|
|
17
|
+
return (typeof record.querySelectorAll === 'function' &&
|
|
18
|
+
typeof record.querySelector === 'function');
|
|
18
19
|
}
|
|
19
20
|
function extractArticle(document) {
|
|
20
21
|
if (!isReadabilityCompatible(document)) {
|
|
21
22
|
logWarn('Document not compatible with Readability');
|
|
22
23
|
return null;
|
|
23
24
|
}
|
|
24
|
-
|
|
25
|
-
return parsed ? mapReadabilityResult(parsed) : null;
|
|
25
|
+
return mapParsedArticle(parseReadabilityArticle(document));
|
|
26
26
|
}
|
|
27
27
|
function parseReadabilityArticle(document) {
|
|
28
28
|
try {
|
|
@@ -31,31 +31,38 @@ function parseReadabilityArticle(document) {
|
|
|
31
31
|
return reader.parse();
|
|
32
32
|
}
|
|
33
33
|
catch (error) {
|
|
34
|
-
logError('Failed to extract article with Readability', error
|
|
34
|
+
logError('Failed to extract article with Readability', asError(error));
|
|
35
35
|
return null;
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
|
+
function asError(error) {
|
|
39
|
+
if (error instanceof Error) {
|
|
40
|
+
return error;
|
|
41
|
+
}
|
|
42
|
+
return undefined;
|
|
43
|
+
}
|
|
44
|
+
function mapParsedArticle(parsed) {
|
|
45
|
+
return parsed ? mapReadabilityResult(parsed) : null;
|
|
46
|
+
}
|
|
38
47
|
function mapReadabilityResult(parsed) {
|
|
39
|
-
|
|
48
|
+
return {
|
|
40
49
|
content: parsed.content ?? '',
|
|
41
50
|
textContent: parsed.textContent ?? '',
|
|
51
|
+
...buildOptionalArticleFields(parsed),
|
|
42
52
|
};
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if (
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
}
|
|
57
|
-
function toOptional(value) {
|
|
58
|
-
return value ?? undefined;
|
|
53
|
+
}
|
|
54
|
+
function buildOptionalArticleFields(parsed) {
|
|
55
|
+
const optional = {};
|
|
56
|
+
addOptionalField(optional, 'title', parsed.title);
|
|
57
|
+
addOptionalField(optional, 'byline', parsed.byline);
|
|
58
|
+
addOptionalField(optional, 'excerpt', parsed.excerpt);
|
|
59
|
+
addOptionalField(optional, 'siteName', parsed.siteName);
|
|
60
|
+
return optional;
|
|
61
|
+
}
|
|
62
|
+
function addOptionalField(target, key, value) {
|
|
63
|
+
if (value == null)
|
|
64
|
+
return;
|
|
65
|
+
target[key] = value;
|
|
59
66
|
}
|
|
60
67
|
export function extractContent(html, url, options = { extractArticle: true }) {
|
|
61
68
|
if (!isValidInput(html, url)) {
|
|
@@ -69,7 +76,7 @@ function tryExtractContent(html, url, options) {
|
|
|
69
76
|
applyBaseUri(document, url);
|
|
70
77
|
const metadata = extractMetadata(document);
|
|
71
78
|
return {
|
|
72
|
-
article: options.extractArticle
|
|
79
|
+
article: resolveArticleExtraction(document, options.extractArticle),
|
|
73
80
|
metadata,
|
|
74
81
|
};
|
|
75
82
|
}
|
|
@@ -79,15 +86,19 @@ function tryExtractContent(html, url, options) {
|
|
|
79
86
|
}
|
|
80
87
|
}
|
|
81
88
|
function isValidInput(html, url) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
89
|
+
return (validateRequiredString(html, 'extractContent called with invalid HTML input') && validateRequiredString(url, 'extractContent called with invalid URL'));
|
|
90
|
+
}
|
|
91
|
+
function validateRequiredString(value, message) {
|
|
92
|
+
if (isNonEmptyString(value))
|
|
93
|
+
return true;
|
|
94
|
+
logWarn(message);
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
function isNonEmptyString(value) {
|
|
98
|
+
return typeof value === 'string' && value.length > 0;
|
|
99
|
+
}
|
|
100
|
+
function resolveArticleExtraction(document, shouldExtract) {
|
|
101
|
+
return shouldExtract ? extractArticle(document) : null;
|
|
91
102
|
}
|
|
92
103
|
function applyBaseUri(document, url) {
|
|
93
104
|
try {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import dns from 'node:dns';
|
|
2
2
|
import os from 'node:os';
|
|
3
3
|
import { Agent } from 'undici';
|
|
4
|
-
import { createErrorWithCode } from '../../utils/error-
|
|
4
|
+
import { createErrorWithCode } from '../../utils/error-details.js';
|
|
5
5
|
import { isRecord } from '../../utils/guards.js';
|
|
6
6
|
import { handleLookupResult } from './dns-selection.js';
|
|
7
7
|
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { createErrorWithCode } from '../../utils/error-
|
|
1
|
+
import { createErrorWithCode } from '../../utils/error-details.js';
|
|
2
2
|
import { isBlockedIp } from '../../utils/url-validator.js';
|
|
3
3
|
function normalizeLookupResults(addresses, family) {
|
|
4
4
|
if (Array.isArray(addresses)) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
2
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
3
3
|
import { performance } from 'node:perf_hooks';
|
|
4
|
-
import { isSystemError } from '../../utils/error-
|
|
4
|
+
import { isSystemError } from '../../utils/error-details.js';
|
|
5
5
|
import { logDebug, logError, logWarn } from '../logger.js';
|
|
6
6
|
const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
|
|
7
7
|
function redactUrl(rawUrl) {
|
|
@@ -27,7 +27,14 @@ function publishFetchEvent(event) {
|
|
|
27
27
|
// Avoid crashing the publisher if a subscriber throws.
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
function
|
|
30
|
+
export function startFetchTelemetry(url, method) {
|
|
31
|
+
const safeUrl = redactUrl(url);
|
|
32
|
+
const context = {
|
|
33
|
+
requestId: randomUUID(),
|
|
34
|
+
startTime: performance.now(),
|
|
35
|
+
url: safeUrl,
|
|
36
|
+
method: method.toUpperCase(),
|
|
37
|
+
};
|
|
31
38
|
publishFetchEvent({
|
|
32
39
|
v: 1,
|
|
33
40
|
type: 'start',
|
|
@@ -40,65 +47,40 @@ function publishAndLogFetchStart(context) {
|
|
|
40
47
|
method: context.method,
|
|
41
48
|
url: context.url,
|
|
42
49
|
});
|
|
43
|
-
}
|
|
44
|
-
export function startFetchTelemetry(url, method) {
|
|
45
|
-
const safeUrl = redactUrl(url);
|
|
46
|
-
const context = {
|
|
47
|
-
requestId: randomUUID(),
|
|
48
|
-
startTime: performance.now(),
|
|
49
|
-
url: safeUrl,
|
|
50
|
-
method: method.toUpperCase(),
|
|
51
|
-
};
|
|
52
|
-
publishAndLogFetchStart(context);
|
|
53
50
|
return context;
|
|
54
51
|
}
|
|
55
52
|
export function recordFetchResponse(context, response, contentSize) {
|
|
56
53
|
const duration = performance.now() - context.startTime;
|
|
57
|
-
|
|
58
|
-
logDebug('HTTP Response', {
|
|
59
|
-
requestId: context.requestId,
|
|
60
|
-
status: response.status,
|
|
61
|
-
url: context.url,
|
|
62
|
-
...buildResponseMeta(response, contentSize, duration),
|
|
63
|
-
});
|
|
64
|
-
logSlowRequestIfNeeded(context, duration);
|
|
65
|
-
}
|
|
66
|
-
function publishFetchEnd(context, status, duration) {
|
|
54
|
+
const durationLabel = `${Math.round(duration)}ms`;
|
|
67
55
|
publishFetchEvent({
|
|
68
56
|
v: 1,
|
|
69
57
|
type: 'end',
|
|
70
58
|
requestId: context.requestId,
|
|
71
|
-
status,
|
|
59
|
+
status: response.status,
|
|
72
60
|
duration,
|
|
73
61
|
});
|
|
74
|
-
}
|
|
75
|
-
function buildResponseMeta(response, contentSize, duration) {
|
|
76
|
-
const contentLength = response.headers.get('content-length') ?? contentSize?.toString();
|
|
77
|
-
const meta = {
|
|
78
|
-
duration: `${Math.round(duration)}ms`,
|
|
79
|
-
};
|
|
80
62
|
const contentType = response.headers.get('content-type');
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if (contentLength !== undefined) {
|
|
85
|
-
meta.size = contentLength;
|
|
86
|
-
}
|
|
87
|
-
return meta;
|
|
88
|
-
}
|
|
89
|
-
function logSlowRequestIfNeeded(context, duration) {
|
|
90
|
-
if (duration <= 5000)
|
|
91
|
-
return;
|
|
92
|
-
logWarn('Slow HTTP request detected', {
|
|
63
|
+
const contentLength = response.headers.get('content-length') ??
|
|
64
|
+
(contentSize === undefined ? undefined : String(contentSize));
|
|
65
|
+
logDebug('HTTP Response', {
|
|
93
66
|
requestId: context.requestId,
|
|
67
|
+
status: response.status,
|
|
94
68
|
url: context.url,
|
|
95
|
-
duration:
|
|
69
|
+
duration: durationLabel,
|
|
70
|
+
...(contentType ? { contentType } : {}),
|
|
71
|
+
...(contentLength ? { size: contentLength } : {}),
|
|
96
72
|
});
|
|
73
|
+
if (duration > 5000) {
|
|
74
|
+
logWarn('Slow HTTP request detected', {
|
|
75
|
+
requestId: context.requestId,
|
|
76
|
+
url: context.url,
|
|
77
|
+
duration: durationLabel,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
97
80
|
}
|
|
98
|
-
function
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
function buildFetchErrorEvent(context, err, duration, status) {
|
|
81
|
+
export function recordFetchError(context, error, status) {
|
|
82
|
+
const duration = performance.now() - context.startTime;
|
|
83
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
102
84
|
const event = {
|
|
103
85
|
v: 1,
|
|
104
86
|
type: 'error',
|
|
@@ -107,10 +89,6 @@ function buildFetchErrorEvent(context, err, duration, status) {
|
|
|
107
89
|
error: err.message,
|
|
108
90
|
duration,
|
|
109
91
|
};
|
|
110
|
-
addOptionalErrorFields(event, err, status);
|
|
111
|
-
return event;
|
|
112
|
-
}
|
|
113
|
-
function addOptionalErrorFields(event, err, status) {
|
|
114
92
|
const code = isSystemError(err) ? err.code : undefined;
|
|
115
93
|
if (code !== undefined) {
|
|
116
94
|
event.code = code;
|
|
@@ -118,17 +96,8 @@ function addOptionalErrorFields(event, err, status) {
|
|
|
118
96
|
if (status !== undefined) {
|
|
119
97
|
event.status = status;
|
|
120
98
|
}
|
|
121
|
-
}
|
|
122
|
-
function selectErrorLogger(status) {
|
|
123
|
-
return status === 429 ? logWarn : logError;
|
|
124
|
-
}
|
|
125
|
-
export function recordFetchError(context, error, status) {
|
|
126
|
-
const duration = performance.now() - context.startTime;
|
|
127
|
-
const err = normalizeError(error);
|
|
128
|
-
const event = buildFetchErrorEvent(context, err, duration, status);
|
|
129
99
|
publishFetchEvent(event);
|
|
130
|
-
const log =
|
|
131
|
-
const code = isSystemError(err) ? err.code : undefined;
|
|
100
|
+
const log = status === 429 ? logWarn : logError;
|
|
132
101
|
log('HTTP Request Error', {
|
|
133
102
|
requestId: context.requestId,
|
|
134
103
|
url: context.url,
|