@j0hanz/superfetch 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -152
- package/dist/config/auth-config.d.ts +16 -0
- package/dist/config/auth-config.js +53 -0
- package/dist/config/constants.d.ts +11 -13
- package/dist/config/constants.js +1 -3
- package/dist/config/env-parsers.d.ts +7 -0
- package/dist/config/env-parsers.js +84 -0
- package/dist/config/formatting.d.ts +2 -2
- package/dist/config/index.d.ts +47 -53
- package/dist/config/index.js +25 -59
- package/dist/config/types/content.d.ts +1 -49
- package/dist/config/types/runtime.d.ts +8 -16
- package/dist/config/types/tools.d.ts +2 -28
- package/dist/http/accept-policy.d.ts +3 -0
- package/dist/http/accept-policy.js +45 -0
- package/dist/http/async-handler.d.ts +2 -0
- package/dist/http/async-handler.js +5 -0
- package/dist/http/auth-introspection.d.ts +2 -0
- package/dist/http/auth-introspection.js +141 -0
- package/dist/http/auth-static.d.ts +2 -0
- package/dist/http/auth-static.js +23 -0
- package/dist/http/auth.d.ts +3 -2
- package/dist/http/auth.js +98 -26
- package/dist/http/cors.d.ts +6 -6
- package/dist/http/cors.js +7 -42
- package/dist/http/download-routes.d.ts +0 -12
- package/dist/http/download-routes.js +21 -58
- package/dist/http/jsonrpc-http.d.ts +2 -0
- package/dist/http/jsonrpc-http.js +10 -0
- package/dist/http/mcp-routes.d.ts +0 -1
- package/dist/http/mcp-routes.js +43 -30
- package/dist/http/mcp-session-helpers.d.ts +0 -1
- package/dist/http/mcp-session-helpers.js +1 -1
- package/dist/http/mcp-session-transport.d.ts +7 -0
- package/dist/http/mcp-session-transport.js +57 -0
- package/dist/http/mcp-session.js +60 -73
- package/dist/http/mcp-validation.d.ts +1 -0
- package/dist/http/mcp-validation.js +11 -10
- package/dist/http/protocol-policy.d.ts +2 -0
- package/dist/http/protocol-policy.js +31 -0
- package/dist/http/rate-limit.js +5 -2
- package/dist/http/server-config.d.ts +1 -0
- package/dist/http/server-config.js +40 -0
- package/dist/http/server-middleware.d.ts +2 -9
- package/dist/http/server-middleware.js +96 -43
- package/dist/http/server-shutdown.d.ts +4 -0
- package/dist/http/server-shutdown.js +43 -0
- package/dist/http/server.js +52 -64
- package/dist/http/session-cleanup.js +1 -1
- package/dist/middleware/error-handler.js +1 -3
- package/dist/resources/cached-content.js +50 -108
- package/dist/resources/index.js +0 -82
- package/dist/server.js +51 -30
- package/dist/services/cache-keys.d.ts +7 -0
- package/dist/services/cache-keys.js +57 -0
- package/dist/services/cache.d.ts +1 -7
- package/dist/services/cache.js +53 -119
- package/dist/services/context.d.ts +0 -1
- package/dist/services/context.js +0 -7
- package/dist/services/extractor.js +10 -82
- package/dist/services/fetcher/agents.d.ts +2 -2
- package/dist/services/fetcher/agents.js +34 -95
- package/dist/services/fetcher/dns-selection.d.ts +2 -0
- package/dist/services/fetcher/dns-selection.js +72 -0
- package/dist/services/fetcher/interceptors.d.ts +0 -22
- package/dist/services/fetcher/interceptors.js +30 -13
- package/dist/services/fetcher/redirects.js +4 -3
- package/dist/services/fetcher/response.js +66 -31
- package/dist/services/fetcher.d.ts +1 -3
- package/dist/services/fetcher.js +14 -33
- package/dist/services/fifo-queue.d.ts +8 -0
- package/dist/services/fifo-queue.js +25 -0
- package/dist/services/logger.js +2 -2
- package/dist/services/metadata-collector.d.ts +1 -9
- package/dist/services/metadata-collector.js +71 -2
- package/dist/services/transform-worker-pool.d.ts +4 -14
- package/dist/services/transform-worker-pool.js +177 -129
- package/dist/services/transform-worker-types.d.ts +32 -0
- package/dist/services/transform-worker-types.js +14 -0
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
- package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
- package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
- package/dist/tools/handlers/fetch-single.shared.js +44 -87
- package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +46 -123
- package/dist/tools/index.js +21 -40
- package/dist/tools/schemas.d.ts +1 -51
- package/dist/tools/schemas.js +2 -108
- package/dist/tools/utils/cached-markdown.d.ts +5 -0
- package/dist/tools/utils/cached-markdown.js +46 -0
- package/dist/tools/utils/content-shaping.d.ts +4 -0
- package/dist/tools/utils/content-shaping.js +52 -0
- package/dist/tools/utils/content-transform.d.ts +2 -17
- package/dist/tools/utils/content-transform.js +120 -114
- package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
- package/dist/tools/utils/fetch-pipeline.js +65 -62
- package/dist/tools/utils/inline-content.d.ts +1 -2
- package/dist/tools/utils/inline-content.js +4 -7
- package/dist/transformers/markdown.transformer.js +109 -34
- package/dist/utils/cached-payload.d.ts +7 -0
- package/dist/utils/cached-payload.js +36 -0
- package/dist/utils/error-utils.js +1 -1
- package/dist/utils/filename-generator.js +21 -10
- package/dist/utils/guards.d.ts +1 -0
- package/dist/utils/guards.js +3 -0
- package/dist/utils/header-normalizer.d.ts +0 -3
- package/dist/utils/header-normalizer.js +3 -3
- package/dist/utils/tool-error-handler.d.ts +2 -2
- package/dist/utils/tool-error-handler.js +11 -38
- package/dist/utils/url-transformer.d.ts +7 -0
- package/dist/utils/url-transformer.js +147 -0
- package/dist/utils/url-validator.d.ts +1 -2
- package/dist/utils/url-validator.js +20 -93
- package/dist/workers/content-transform.worker.d.ts +1 -0
- package/dist/workers/content-transform.worker.js +40 -0
- package/package.json +13 -16
|
@@ -27,14 +27,7 @@ function publishFetchEvent(event) {
|
|
|
27
27
|
// Avoid crashing the publisher if a subscriber throws.
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
|
|
31
|
-
const safeUrl = redactUrl(url);
|
|
32
|
-
const context = {
|
|
33
|
-
requestId: randomUUID(),
|
|
34
|
-
startTime: performance.now(),
|
|
35
|
-
url: safeUrl,
|
|
36
|
-
method: method.toUpperCase(),
|
|
37
|
-
};
|
|
30
|
+
function publishAndLogFetchStart(context) {
|
|
38
31
|
publishFetchEvent({
|
|
39
32
|
v: 1,
|
|
40
33
|
type: 'start',
|
|
@@ -47,6 +40,16 @@ export function startFetchTelemetry(url, method) {
|
|
|
47
40
|
method: context.method,
|
|
48
41
|
url: context.url,
|
|
49
42
|
});
|
|
43
|
+
}
|
|
44
|
+
export function startFetchTelemetry(url, method) {
|
|
45
|
+
const safeUrl = redactUrl(url);
|
|
46
|
+
const context = {
|
|
47
|
+
requestId: randomUUID(),
|
|
48
|
+
startTime: performance.now(),
|
|
49
|
+
url: safeUrl,
|
|
50
|
+
method: method.toUpperCase(),
|
|
51
|
+
};
|
|
52
|
+
publishAndLogFetchStart(context);
|
|
50
53
|
return context;
|
|
51
54
|
}
|
|
52
55
|
export function recordFetchResponse(context, response, contentSize) {
|
|
@@ -92,10 +95,10 @@ function logSlowRequestIfNeeded(context, duration) {
|
|
|
92
95
|
duration: `${Math.round(duration)}ms`,
|
|
93
96
|
});
|
|
94
97
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
98
|
+
function normalizeError(error) {
|
|
99
|
+
return error instanceof Error ? error : new Error(String(error));
|
|
100
|
+
}
|
|
101
|
+
function buildFetchErrorEvent(context, err, duration, status) {
|
|
99
102
|
const event = {
|
|
100
103
|
v: 1,
|
|
101
104
|
type: 'error',
|
|
@@ -104,14 +107,28 @@ export function recordFetchError(context, error, status) {
|
|
|
104
107
|
error: err.message,
|
|
105
108
|
duration,
|
|
106
109
|
};
|
|
110
|
+
addOptionalErrorFields(event, err, status);
|
|
111
|
+
return event;
|
|
112
|
+
}
|
|
113
|
+
function addOptionalErrorFields(event, err, status) {
|
|
114
|
+
const code = isSystemError(err) ? err.code : undefined;
|
|
107
115
|
if (code !== undefined) {
|
|
108
116
|
event.code = code;
|
|
109
117
|
}
|
|
110
118
|
if (status !== undefined) {
|
|
111
119
|
event.status = status;
|
|
112
120
|
}
|
|
121
|
+
}
|
|
122
|
+
function selectErrorLogger(status) {
|
|
123
|
+
return status === 429 ? logWarn : logError;
|
|
124
|
+
}
|
|
125
|
+
export function recordFetchError(context, error, status) {
|
|
126
|
+
const duration = performance.now() - context.startTime;
|
|
127
|
+
const err = normalizeError(error);
|
|
128
|
+
const event = buildFetchErrorEvent(context, err, duration, status);
|
|
113
129
|
publishFetchEvent(event);
|
|
114
|
-
const log = status
|
|
130
|
+
const log = selectErrorLogger(status);
|
|
131
|
+
const code = isSystemError(err) ? err.code : undefined;
|
|
115
132
|
log('HTTP Request Error', {
|
|
116
133
|
requestId: context.requestId,
|
|
117
134
|
url: context.url,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { FetchError } from '../../errors/app-error.js';
|
|
2
2
|
import { createErrorWithCode } from '../../utils/error-utils.js';
|
|
3
|
+
import { isRecord } from '../../utils/guards.js';
|
|
3
4
|
import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
|
|
4
5
|
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
5
6
|
function isRedirectStatus(status) {
|
|
@@ -15,7 +16,7 @@ async function performFetchCycle(currentUrl, init, redirectLimit, redirectCount)
|
|
|
15
16
|
void response.body?.cancel();
|
|
16
17
|
return {
|
|
17
18
|
response,
|
|
18
|
-
nextUrl:
|
|
19
|
+
nextUrl: resolveRedirectTarget(currentUrl, location),
|
|
19
20
|
};
|
|
20
21
|
}
|
|
21
22
|
function assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount) {
|
|
@@ -32,11 +33,11 @@ function getRedirectLocation(response, currentUrl) {
|
|
|
32
33
|
throw new FetchError('Redirect response missing Location header', currentUrl);
|
|
33
34
|
}
|
|
34
35
|
function annotateRedirectError(error, url) {
|
|
35
|
-
if (!error
|
|
36
|
+
if (!isRecord(error))
|
|
36
37
|
return;
|
|
37
38
|
error.requestUrl = url;
|
|
38
39
|
}
|
|
39
|
-
|
|
40
|
+
function resolveRedirectTarget(baseUrl, location) {
|
|
40
41
|
if (!URL.canParse(location, baseUrl)) {
|
|
41
42
|
throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
|
|
42
43
|
}
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { Readable, Writable } from 'node:stream';
|
|
2
|
-
import { pipeline } from 'node:stream/promises';
|
|
3
1
|
import { FetchError } from '../../errors/app-error.js';
|
|
4
2
|
function assertContentLengthWithinLimit(response, url, maxBytes) {
|
|
5
3
|
const contentLengthHeader = response.headers.get('content-length');
|
|
@@ -9,51 +7,88 @@ function assertContentLengthWithinLimit(response, url, maxBytes) {
|
|
|
9
7
|
if (Number.isNaN(contentLength) || contentLength <= maxBytes) {
|
|
10
8
|
return;
|
|
11
9
|
}
|
|
10
|
+
void response.body?.cancel();
|
|
12
11
|
throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
|
|
13
12
|
}
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
if (typeof chunk === 'string') {
|
|
20
|
-
return Buffer.from(chunk);
|
|
21
|
-
}
|
|
22
|
-
return Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
13
|
+
function createReadState() {
|
|
14
|
+
return {
|
|
15
|
+
decoder: new TextDecoder(),
|
|
16
|
+
parts: [],
|
|
17
|
+
total: 0,
|
|
23
18
|
};
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
19
|
+
}
|
|
20
|
+
function appendChunk(state, chunk, maxBytes, url) {
|
|
21
|
+
state.total += chunk.byteLength;
|
|
22
|
+
if (state.total > maxBytes) {
|
|
23
|
+
throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
|
|
24
|
+
}
|
|
25
|
+
const decoded = state.decoder.decode(chunk, { stream: true });
|
|
26
|
+
if (decoded)
|
|
27
|
+
state.parts.push(decoded);
|
|
28
|
+
}
|
|
29
|
+
function finalizeRead(state) {
|
|
30
|
+
const decoded = state.decoder.decode();
|
|
31
|
+
if (decoded)
|
|
32
|
+
state.parts.push(decoded);
|
|
33
|
+
}
|
|
34
|
+
function createAbortError(url) {
|
|
35
|
+
return new FetchError('Request was aborted during response read', url, 499, {
|
|
36
|
+
reason: 'aborted',
|
|
39
37
|
});
|
|
38
|
+
}
|
|
39
|
+
async function cancelReaderQuietly(reader) {
|
|
40
40
|
try {
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
await reader.cancel();
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// Ignore cancel errors; we're already failing this read.
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
async function throwIfAborted(signal, url, reader) {
|
|
48
|
+
if (!signal?.aborted)
|
|
49
|
+
return;
|
|
50
|
+
await cancelReaderQuietly(reader);
|
|
51
|
+
throw createAbortError(url);
|
|
52
|
+
}
|
|
53
|
+
async function readAllChunks(reader, state, url, maxBytes, signal) {
|
|
54
|
+
await throwIfAborted(signal, url, reader);
|
|
55
|
+
let result = await reader.read();
|
|
56
|
+
while (!result.done) {
|
|
57
|
+
appendChunk(state, result.value, maxBytes, url);
|
|
58
|
+
await throwIfAborted(signal, url, reader);
|
|
59
|
+
result = await reader.read();
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
async function readStreamWithLimit(stream, url, maxBytes, signal) {
|
|
63
|
+
const state = createReadState();
|
|
64
|
+
const reader = stream.getReader();
|
|
65
|
+
try {
|
|
66
|
+
await readAllChunks(reader, state, url, maxBytes, signal);
|
|
43
67
|
}
|
|
44
68
|
catch (error) {
|
|
69
|
+
if (!signal?.aborted) {
|
|
70
|
+
await cancelReaderQuietly(reader);
|
|
71
|
+
}
|
|
45
72
|
if (signal?.aborted) {
|
|
46
|
-
throw
|
|
73
|
+
throw createAbortError(url);
|
|
47
74
|
}
|
|
48
75
|
throw error;
|
|
49
76
|
}
|
|
50
|
-
|
|
77
|
+
finally {
|
|
78
|
+
reader.releaseLock();
|
|
79
|
+
}
|
|
80
|
+
finalizeRead(state);
|
|
81
|
+
return { text: state.parts.join(''), size: state.total };
|
|
51
82
|
}
|
|
52
83
|
export async function readResponseText(response, url, maxBytes, signal) {
|
|
53
84
|
assertContentLengthWithinLimit(response, url, maxBytes);
|
|
54
85
|
if (!response.body) {
|
|
55
86
|
const text = await response.text();
|
|
56
|
-
|
|
87
|
+
const size = Buffer.byteLength(text);
|
|
88
|
+
if (size > maxBytes) {
|
|
89
|
+
throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
|
|
90
|
+
}
|
|
91
|
+
return { text, size };
|
|
57
92
|
}
|
|
58
93
|
return readStreamWithLimit(response.body, url, maxBytes, signal);
|
|
59
94
|
}
|
|
@@ -1,4 +1,2 @@
|
|
|
1
1
|
import type { FetchOptions } from '../config/types/runtime.js';
|
|
2
|
-
|
|
3
|
-
export { destroyAgents };
|
|
4
|
-
export declare function fetchNormalizedUrlWithRetry(normalizedUrl: string, options?: FetchOptions, maxRetries?: number): Promise<string>;
|
|
2
|
+
export declare function fetchNormalizedUrl(normalizedUrl: string, options?: FetchOptions): Promise<string>;
|
package/dist/services/fetcher.js
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
import { config } from '../config/index.js';
|
|
2
|
-
import {
|
|
3
|
-
import { destroyAgents, dispatcher } from './fetcher/agents.js';
|
|
2
|
+
import { dispatcher } from './fetcher/agents.js';
|
|
4
3
|
import { createHttpError, createRateLimitError, mapFetchError, } from './fetcher/errors.js';
|
|
5
4
|
import { recordFetchError, recordFetchResponse, startFetchTelemetry, } from './fetcher/interceptors.js';
|
|
6
5
|
import { fetchWithRedirects } from './fetcher/redirects.js';
|
|
7
6
|
import { readResponseText } from './fetcher/response.js';
|
|
8
|
-
import { executeWithRetry } from './fetcher/retry-policy.js';
|
|
9
|
-
export { destroyAgents };
|
|
10
7
|
const DEFAULT_HEADERS = {
|
|
11
8
|
'User-Agent': config.fetcher.userAgent,
|
|
12
9
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
@@ -14,15 +11,8 @@ const DEFAULT_HEADERS = {
|
|
|
14
11
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
15
12
|
Connection: 'keep-alive',
|
|
16
13
|
};
|
|
17
|
-
function buildHeaders(
|
|
18
|
-
|
|
19
|
-
const sanitized = normalizeHeaderRecord(customHeaders, config.security.blockedHeaders);
|
|
20
|
-
if (sanitized) {
|
|
21
|
-
for (const [key, value] of Object.entries(sanitized)) {
|
|
22
|
-
headers.set(key, value);
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
return headers;
|
|
14
|
+
function buildHeaders() {
|
|
15
|
+
return new Headers(DEFAULT_HEADERS);
|
|
26
16
|
}
|
|
27
17
|
function buildRequestSignal(timeoutMs, external) {
|
|
28
18
|
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
@@ -54,9 +44,7 @@ async function handleFetchResponse(response, finalUrl, telemetry, signal) {
|
|
|
54
44
|
async function fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs) {
|
|
55
45
|
const telemetry = startFetchTelemetry(normalizedUrl, 'GET');
|
|
56
46
|
try {
|
|
57
|
-
|
|
58
|
-
telemetry.url = finalUrl;
|
|
59
|
-
return await handleFetchResponse(response, finalUrl, telemetry, requestInit.signal ?? undefined);
|
|
47
|
+
return await fetchAndHandle(normalizedUrl, requestInit, telemetry);
|
|
60
48
|
}
|
|
61
49
|
catch (error) {
|
|
62
50
|
const mapped = mapFetchError(error, normalizedUrl, timeoutMs);
|
|
@@ -65,22 +53,15 @@ async function fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs) {
|
|
|
65
53
|
throw mapped;
|
|
66
54
|
}
|
|
67
55
|
}
|
|
68
|
-
|
|
69
|
-
const
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
function buildRequestContext(options) {
|
|
73
|
-
const context = {
|
|
74
|
-
timeoutMs: options?.timeout ?? config.fetcher.timeout,
|
|
75
|
-
headers: buildHeaders(options?.customHeaders),
|
|
76
|
-
};
|
|
77
|
-
if (options?.signal) {
|
|
78
|
-
context.signal = options.signal;
|
|
79
|
-
}
|
|
80
|
-
return context;
|
|
56
|
+
async function fetchAndHandle(normalizedUrl, requestInit, telemetry) {
|
|
57
|
+
const { response, url: finalUrl } = await fetchWithRedirects(normalizedUrl, requestInit, config.fetcher.maxRedirects);
|
|
58
|
+
telemetry.url = finalUrl;
|
|
59
|
+
return handleFetchResponse(response, finalUrl, telemetry, requestInit.signal ?? undefined);
|
|
81
60
|
}
|
|
82
|
-
async function
|
|
83
|
-
const
|
|
84
|
-
const
|
|
85
|
-
|
|
61
|
+
export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
62
|
+
const timeoutMs = config.fetcher.timeout;
|
|
63
|
+
const headers = buildHeaders();
|
|
64
|
+
const signal = buildRequestSignal(timeoutMs, options?.signal);
|
|
65
|
+
const requestInit = buildRequestInit(headers, signal);
|
|
66
|
+
return fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs);
|
|
86
67
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export class FifoQueue {
|
|
2
|
+
items = [];
|
|
3
|
+
head = 0;
|
|
4
|
+
get length() {
|
|
5
|
+
return this.items.length - this.head;
|
|
6
|
+
}
|
|
7
|
+
push(item) {
|
|
8
|
+
this.items.push(item);
|
|
9
|
+
}
|
|
10
|
+
shift() {
|
|
11
|
+
if (this.head >= this.items.length)
|
|
12
|
+
return undefined;
|
|
13
|
+
const item = this.items[this.head];
|
|
14
|
+
this.head += 1;
|
|
15
|
+
if (this.head > 64 && this.head * 2 >= this.items.length) {
|
|
16
|
+
this.items = this.items.slice(this.head);
|
|
17
|
+
this.head = 0;
|
|
18
|
+
}
|
|
19
|
+
return item;
|
|
20
|
+
}
|
|
21
|
+
clear() {
|
|
22
|
+
this.items.length = 0;
|
|
23
|
+
this.head = 0;
|
|
24
|
+
}
|
|
25
|
+
}
|
package/dist/services/logger.js
CHANGED
|
@@ -18,10 +18,10 @@ function formatLogEntry(level, message, meta) {
|
|
|
18
18
|
return `[${createTimestamp()}] ${level.toUpperCase()}: ${message}${formatMetadata(meta)}`;
|
|
19
19
|
}
|
|
20
20
|
function shouldLog(level) {
|
|
21
|
-
|
|
22
|
-
return false;
|
|
21
|
+
// Debug logs only when LOG_LEVEL=debug
|
|
23
22
|
if (level === 'debug')
|
|
24
23
|
return config.logging.level === 'debug';
|
|
24
|
+
// All other levels always log
|
|
25
25
|
return true;
|
|
26
26
|
}
|
|
27
27
|
export function logInfo(message, meta) {
|
|
@@ -1,10 +1,2 @@
|
|
|
1
1
|
import type { ExtractedMetadata } from '../config/types/content.js';
|
|
2
|
-
export
|
|
3
|
-
export type MetaField = keyof ExtractedMetadata;
|
|
4
|
-
export interface MetaCollectorState {
|
|
5
|
-
title: Partial<Record<MetaSource, string>>;
|
|
6
|
-
description: Partial<Record<MetaSource, string>>;
|
|
7
|
-
author: Partial<Record<MetaSource, string>>;
|
|
8
|
-
}
|
|
9
|
-
export declare function createMetaCollectorState(): MetaCollectorState;
|
|
10
|
-
export declare function resolveMetaField(state: MetaCollectorState, field: MetaField): string | undefined;
|
|
2
|
+
export declare function extractMetadata(document: Document): ExtractedMetadata;
|
|
@@ -1,11 +1,80 @@
|
|
|
1
|
-
|
|
1
|
+
function createMetaCollectorState() {
|
|
2
2
|
return {
|
|
3
3
|
title: {},
|
|
4
4
|
description: {},
|
|
5
5
|
author: {},
|
|
6
6
|
};
|
|
7
7
|
}
|
|
8
|
-
|
|
8
|
+
function resolveMetaField(state, field) {
|
|
9
9
|
const sources = state[field];
|
|
10
10
|
return sources.og ?? sources.twitter ?? sources.standard;
|
|
11
11
|
}
|
|
12
|
+
function parseOpenGraphKey(property) {
|
|
13
|
+
if (!property?.startsWith('og:'))
|
|
14
|
+
return null;
|
|
15
|
+
const key = property.replace('og:', '');
|
|
16
|
+
return key === 'title' || key === 'description' ? key : null;
|
|
17
|
+
}
|
|
18
|
+
function parseTwitterKey(name) {
|
|
19
|
+
if (!name?.startsWith('twitter:'))
|
|
20
|
+
return null;
|
|
21
|
+
const key = name.replace('twitter:', '');
|
|
22
|
+
return key === 'title' || key === 'description' ? key : null;
|
|
23
|
+
}
|
|
24
|
+
function parseStandardKey(name) {
|
|
25
|
+
if (name === 'description')
|
|
26
|
+
return 'description';
|
|
27
|
+
if (name === 'author')
|
|
28
|
+
return 'author';
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
function collectMetaTag(state, tag) {
|
|
32
|
+
const content = tag.getAttribute('content')?.trim();
|
|
33
|
+
if (!content)
|
|
34
|
+
return;
|
|
35
|
+
const ogKey = parseOpenGraphKey(tag.getAttribute('property'));
|
|
36
|
+
if (ogKey) {
|
|
37
|
+
state[ogKey].og = content;
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const name = tag.getAttribute('name');
|
|
41
|
+
const twitterKey = parseTwitterKey(name);
|
|
42
|
+
if (twitterKey) {
|
|
43
|
+
state[twitterKey].twitter = content;
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
const standardKey = parseStandardKey(name);
|
|
47
|
+
if (standardKey) {
|
|
48
|
+
state[standardKey].standard = content;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function scanMetaTags(document, state) {
|
|
52
|
+
const metaTags = document.querySelectorAll('meta');
|
|
53
|
+
for (const tag of metaTags) {
|
|
54
|
+
collectMetaTag(state, tag);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function ensureTitleFallback(document, state) {
|
|
58
|
+
if (state.title.standard)
|
|
59
|
+
return;
|
|
60
|
+
const titleEl = document.querySelector('title');
|
|
61
|
+
if (titleEl?.textContent) {
|
|
62
|
+
state.title.standard = titleEl.textContent.trim();
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
export function extractMetadata(document) {
|
|
66
|
+
const state = createMetaCollectorState();
|
|
67
|
+
scanMetaTags(document, state);
|
|
68
|
+
ensureTitleFallback(document, state);
|
|
69
|
+
const metadata = {};
|
|
70
|
+
const title = resolveMetaField(state, 'title');
|
|
71
|
+
const description = resolveMetaField(state, 'description');
|
|
72
|
+
const author = resolveMetaField(state, 'author');
|
|
73
|
+
if (title !== undefined)
|
|
74
|
+
metadata.title = title;
|
|
75
|
+
if (description !== undefined)
|
|
76
|
+
metadata.description = description;
|
|
77
|
+
if (author !== undefined)
|
|
78
|
+
metadata.author = author;
|
|
79
|
+
return metadata;
|
|
80
|
+
}
|
|
@@ -1,14 +1,4 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
type
|
|
3
|
-
export
|
|
4
|
-
|
|
5
|
-
html: string;
|
|
6
|
-
url: string;
|
|
7
|
-
options: TransformOptions & {
|
|
8
|
-
includeContentBlocks?: boolean;
|
|
9
|
-
};
|
|
10
|
-
}
|
|
11
|
-
type TransformResult = JsonlTransformResult | MarkdownTransformResult;
|
|
12
|
-
export declare function runTransformInWorker(job: TransformJob): Promise<TransformResult | null>;
|
|
13
|
-
export declare function destroyTransformWorkers(): void;
|
|
14
|
-
export {};
|
|
1
|
+
import type { MarkdownTransformResult } from '../config/types/content.js';
|
|
2
|
+
import type { WorkerTransformRequest } from './transform-worker-types.js';
|
|
3
|
+
export declare function transformInWorker(request: Omit<WorkerTransformRequest, 'id'>, signal?: AbortSignal): Promise<MarkdownTransformResult>;
|
|
4
|
+
export declare function destroyTransformWorkers(): Promise<void>;
|