@j0hanz/superfetch 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +116 -152
  2. package/dist/config/auth-config.d.ts +16 -0
  3. package/dist/config/auth-config.js +53 -0
  4. package/dist/config/constants.d.ts +11 -13
  5. package/dist/config/constants.js +1 -3
  6. package/dist/config/env-parsers.d.ts +7 -0
  7. package/dist/config/env-parsers.js +84 -0
  8. package/dist/config/formatting.d.ts +2 -2
  9. package/dist/config/index.d.ts +47 -53
  10. package/dist/config/index.js +25 -59
  11. package/dist/config/types/content.d.ts +1 -49
  12. package/dist/config/types/runtime.d.ts +8 -16
  13. package/dist/config/types/tools.d.ts +2 -28
  14. package/dist/http/accept-policy.d.ts +3 -0
  15. package/dist/http/accept-policy.js +45 -0
  16. package/dist/http/async-handler.d.ts +2 -0
  17. package/dist/http/async-handler.js +5 -0
  18. package/dist/http/auth-introspection.d.ts +2 -0
  19. package/dist/http/auth-introspection.js +141 -0
  20. package/dist/http/auth-static.d.ts +2 -0
  21. package/dist/http/auth-static.js +23 -0
  22. package/dist/http/auth.d.ts +3 -2
  23. package/dist/http/auth.js +98 -26
  24. package/dist/http/cors.d.ts +6 -6
  25. package/dist/http/cors.js +7 -42
  26. package/dist/http/download-routes.d.ts +0 -12
  27. package/dist/http/download-routes.js +21 -58
  28. package/dist/http/jsonrpc-http.d.ts +2 -0
  29. package/dist/http/jsonrpc-http.js +10 -0
  30. package/dist/http/mcp-routes.d.ts +0 -1
  31. package/dist/http/mcp-routes.js +43 -30
  32. package/dist/http/mcp-session-helpers.d.ts +0 -1
  33. package/dist/http/mcp-session-helpers.js +1 -1
  34. package/dist/http/mcp-session-transport.d.ts +7 -0
  35. package/dist/http/mcp-session-transport.js +57 -0
  36. package/dist/http/mcp-session.js +60 -73
  37. package/dist/http/mcp-validation.d.ts +1 -0
  38. package/dist/http/mcp-validation.js +11 -10
  39. package/dist/http/protocol-policy.d.ts +2 -0
  40. package/dist/http/protocol-policy.js +31 -0
  41. package/dist/http/rate-limit.js +5 -2
  42. package/dist/http/server-config.d.ts +1 -0
  43. package/dist/http/server-config.js +40 -0
  44. package/dist/http/server-middleware.d.ts +2 -9
  45. package/dist/http/server-middleware.js +96 -43
  46. package/dist/http/server-shutdown.d.ts +4 -0
  47. package/dist/http/server-shutdown.js +43 -0
  48. package/dist/http/server.js +52 -64
  49. package/dist/http/session-cleanup.js +1 -1
  50. package/dist/middleware/error-handler.js +1 -3
  51. package/dist/resources/cached-content.js +50 -108
  52. package/dist/resources/index.js +0 -82
  53. package/dist/server.js +51 -30
  54. package/dist/services/cache-keys.d.ts +7 -0
  55. package/dist/services/cache-keys.js +57 -0
  56. package/dist/services/cache.d.ts +1 -7
  57. package/dist/services/cache.js +53 -119
  58. package/dist/services/context.d.ts +0 -1
  59. package/dist/services/context.js +0 -7
  60. package/dist/services/extractor.js +10 -82
  61. package/dist/services/fetcher/agents.d.ts +2 -2
  62. package/dist/services/fetcher/agents.js +34 -95
  63. package/dist/services/fetcher/dns-selection.d.ts +2 -0
  64. package/dist/services/fetcher/dns-selection.js +72 -0
  65. package/dist/services/fetcher/interceptors.d.ts +0 -22
  66. package/dist/services/fetcher/interceptors.js +30 -13
  67. package/dist/services/fetcher/redirects.js +4 -3
  68. package/dist/services/fetcher/response.js +66 -31
  69. package/dist/services/fetcher.d.ts +1 -3
  70. package/dist/services/fetcher.js +14 -33
  71. package/dist/services/fifo-queue.d.ts +8 -0
  72. package/dist/services/fifo-queue.js +25 -0
  73. package/dist/services/logger.js +2 -2
  74. package/dist/services/metadata-collector.d.ts +1 -9
  75. package/dist/services/metadata-collector.js +71 -2
  76. package/dist/services/transform-worker-pool.d.ts +4 -14
  77. package/dist/services/transform-worker-pool.js +177 -129
  78. package/dist/services/transform-worker-types.d.ts +32 -0
  79. package/dist/services/transform-worker-types.js +14 -0
  80. package/dist/tools/handlers/fetch-markdown.tool.d.ts +3 -4
  81. package/dist/tools/handlers/fetch-markdown.tool.js +20 -72
  82. package/dist/tools/handlers/fetch-single.shared.d.ts +1 -20
  83. package/dist/tools/handlers/fetch-single.shared.js +44 -87
  84. package/dist/tools/handlers/fetch-url.tool.d.ts +1 -1
  85. package/dist/tools/handlers/fetch-url.tool.js +46 -123
  86. package/dist/tools/index.js +21 -40
  87. package/dist/tools/schemas.d.ts +1 -51
  88. package/dist/tools/schemas.js +2 -108
  89. package/dist/tools/utils/cached-markdown.d.ts +5 -0
  90. package/dist/tools/utils/cached-markdown.js +46 -0
  91. package/dist/tools/utils/content-shaping.d.ts +4 -0
  92. package/dist/tools/utils/content-shaping.js +52 -0
  93. package/dist/tools/utils/content-transform.d.ts +2 -17
  94. package/dist/tools/utils/content-transform.js +120 -114
  95. package/dist/tools/utils/fetch-pipeline.d.ts +0 -8
  96. package/dist/tools/utils/fetch-pipeline.js +65 -62
  97. package/dist/tools/utils/inline-content.d.ts +1 -2
  98. package/dist/tools/utils/inline-content.js +4 -7
  99. package/dist/transformers/markdown.transformer.js +109 -34
  100. package/dist/utils/cached-payload.d.ts +7 -0
  101. package/dist/utils/cached-payload.js +36 -0
  102. package/dist/utils/error-utils.js +1 -1
  103. package/dist/utils/filename-generator.js +21 -10
  104. package/dist/utils/guards.d.ts +1 -0
  105. package/dist/utils/guards.js +3 -0
  106. package/dist/utils/header-normalizer.d.ts +0 -3
  107. package/dist/utils/header-normalizer.js +3 -3
  108. package/dist/utils/tool-error-handler.d.ts +2 -2
  109. package/dist/utils/tool-error-handler.js +11 -38
  110. package/dist/utils/url-transformer.d.ts +7 -0
  111. package/dist/utils/url-transformer.js +147 -0
  112. package/dist/utils/url-validator.d.ts +1 -2
  113. package/dist/utils/url-validator.js +20 -93
  114. package/dist/workers/content-transform.worker.d.ts +1 -0
  115. package/dist/workers/content-transform.worker.js +40 -0
  116. package/package.json +13 -16
@@ -27,14 +27,7 @@ function publishFetchEvent(event) {
27
27
  // Avoid crashing the publisher if a subscriber throws.
28
28
  }
29
29
  }
30
- export function startFetchTelemetry(url, method) {
31
- const safeUrl = redactUrl(url);
32
- const context = {
33
- requestId: randomUUID(),
34
- startTime: performance.now(),
35
- url: safeUrl,
36
- method: method.toUpperCase(),
37
- };
30
+ function publishAndLogFetchStart(context) {
38
31
  publishFetchEvent({
39
32
  v: 1,
40
33
  type: 'start',
@@ -47,6 +40,16 @@ export function startFetchTelemetry(url, method) {
47
40
  method: context.method,
48
41
  url: context.url,
49
42
  });
43
+ }
44
+ export function startFetchTelemetry(url, method) {
45
+ const safeUrl = redactUrl(url);
46
+ const context = {
47
+ requestId: randomUUID(),
48
+ startTime: performance.now(),
49
+ url: safeUrl,
50
+ method: method.toUpperCase(),
51
+ };
52
+ publishAndLogFetchStart(context);
50
53
  return context;
51
54
  }
52
55
  export function recordFetchResponse(context, response, contentSize) {
@@ -92,10 +95,10 @@ function logSlowRequestIfNeeded(context, duration) {
92
95
  duration: `${Math.round(duration)}ms`,
93
96
  });
94
97
  }
95
- export function recordFetchError(context, error, status) {
96
- const duration = performance.now() - context.startTime;
97
- const err = error instanceof Error ? error : new Error(String(error));
98
- const code = isSystemError(err) ? err.code : undefined;
98
+ function normalizeError(error) {
99
+ return error instanceof Error ? error : new Error(String(error));
100
+ }
101
+ function buildFetchErrorEvent(context, err, duration, status) {
99
102
  const event = {
100
103
  v: 1,
101
104
  type: 'error',
@@ -104,14 +107,28 @@ export function recordFetchError(context, error, status) {
104
107
  error: err.message,
105
108
  duration,
106
109
  };
110
+ addOptionalErrorFields(event, err, status);
111
+ return event;
112
+ }
113
+ function addOptionalErrorFields(event, err, status) {
114
+ const code = isSystemError(err) ? err.code : undefined;
107
115
  if (code !== undefined) {
108
116
  event.code = code;
109
117
  }
110
118
  if (status !== undefined) {
111
119
  event.status = status;
112
120
  }
121
+ }
122
+ function selectErrorLogger(status) {
123
+ return status === 429 ? logWarn : logError;
124
+ }
125
+ export function recordFetchError(context, error, status) {
126
+ const duration = performance.now() - context.startTime;
127
+ const err = normalizeError(error);
128
+ const event = buildFetchErrorEvent(context, err, duration, status);
113
129
  publishFetchEvent(event);
114
- const log = status === 429 ? logWarn : logError;
130
+ const log = selectErrorLogger(status);
131
+ const code = isSystemError(err) ? err.code : undefined;
115
132
  log('HTTP Request Error', {
116
133
  requestId: context.requestId,
117
134
  url: context.url,
@@ -1,5 +1,6 @@
1
1
  import { FetchError } from '../../errors/app-error.js';
2
2
  import { createErrorWithCode } from '../../utils/error-utils.js';
3
+ import { isRecord } from '../../utils/guards.js';
3
4
  import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
4
5
  const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
5
6
  function isRedirectStatus(status) {
@@ -15,7 +16,7 @@ async function performFetchCycle(currentUrl, init, redirectLimit, redirectCount)
15
16
  void response.body?.cancel();
16
17
  return {
17
18
  response,
18
- nextUrl: await resolveRedirectTarget(currentUrl, location),
19
+ nextUrl: resolveRedirectTarget(currentUrl, location),
19
20
  };
20
21
  }
21
22
  function assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount) {
@@ -32,11 +33,11 @@ function getRedirectLocation(response, currentUrl) {
32
33
  throw new FetchError('Redirect response missing Location header', currentUrl);
33
34
  }
34
35
  function annotateRedirectError(error, url) {
35
- if (!error || typeof error !== 'object')
36
+ if (!isRecord(error))
36
37
  return;
37
38
  error.requestUrl = url;
38
39
  }
39
- async function resolveRedirectTarget(baseUrl, location) {
40
+ function resolveRedirectTarget(baseUrl, location) {
40
41
  if (!URL.canParse(location, baseUrl)) {
41
42
  throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
42
43
  }
@@ -1,5 +1,3 @@
1
- import { Readable, Writable } from 'node:stream';
2
- import { pipeline } from 'node:stream/promises';
3
1
  import { FetchError } from '../../errors/app-error.js';
4
2
  function assertContentLengthWithinLimit(response, url, maxBytes) {
5
3
  const contentLengthHeader = response.headers.get('content-length');
@@ -9,51 +7,88 @@ function assertContentLengthWithinLimit(response, url, maxBytes) {
9
7
  if (Number.isNaN(contentLength) || contentLength <= maxBytes) {
10
8
  return;
11
9
  }
10
+ void response.body?.cancel();
12
11
  throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
13
12
  }
14
- async function readStreamWithLimit(stream, url, maxBytes, signal) {
15
- const decoder = new TextDecoder();
16
- let total = 0;
17
- let text = '';
18
- const toBuffer = (chunk) => {
19
- if (typeof chunk === 'string') {
20
- return Buffer.from(chunk);
21
- }
22
- return Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
13
+ function createReadState() {
14
+ return {
15
+ decoder: new TextDecoder(),
16
+ parts: [],
17
+ total: 0,
23
18
  };
24
- const sink = new Writable({
25
- write(chunk, _encoding, callback) {
26
- const buffer = toBuffer(chunk);
27
- total += buffer.length;
28
- if (total > maxBytes) {
29
- callback(new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url));
30
- return;
31
- }
32
- text += decoder.decode(buffer, { stream: true });
33
- callback();
34
- },
35
- final(callback) {
36
- text += decoder.decode();
37
- callback();
38
- },
19
+ }
20
+ function appendChunk(state, chunk, maxBytes, url) {
21
+ state.total += chunk.byteLength;
22
+ if (state.total > maxBytes) {
23
+ throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
24
+ }
25
+ const decoded = state.decoder.decode(chunk, { stream: true });
26
+ if (decoded)
27
+ state.parts.push(decoded);
28
+ }
29
+ function finalizeRead(state) {
30
+ const decoded = state.decoder.decode();
31
+ if (decoded)
32
+ state.parts.push(decoded);
33
+ }
34
+ function createAbortError(url) {
35
+ return new FetchError('Request was aborted during response read', url, 499, {
36
+ reason: 'aborted',
39
37
  });
38
+ }
39
+ async function cancelReaderQuietly(reader) {
40
40
  try {
41
- const readable = Readable.fromWeb(stream, { signal });
42
- await pipeline(readable, sink, { signal });
41
+ await reader.cancel();
42
+ }
43
+ catch {
44
+ // Ignore cancel errors; we're already failing this read.
45
+ }
46
+ }
47
+ async function throwIfAborted(signal, url, reader) {
48
+ if (!signal?.aborted)
49
+ return;
50
+ await cancelReaderQuietly(reader);
51
+ throw createAbortError(url);
52
+ }
53
+ async function readAllChunks(reader, state, url, maxBytes, signal) {
54
+ await throwIfAborted(signal, url, reader);
55
+ let result = await reader.read();
56
+ while (!result.done) {
57
+ appendChunk(state, result.value, maxBytes, url);
58
+ await throwIfAborted(signal, url, reader);
59
+ result = await reader.read();
60
+ }
61
+ }
62
+ async function readStreamWithLimit(stream, url, maxBytes, signal) {
63
+ const state = createReadState();
64
+ const reader = stream.getReader();
65
+ try {
66
+ await readAllChunks(reader, state, url, maxBytes, signal);
43
67
  }
44
68
  catch (error) {
69
+ if (!signal?.aborted) {
70
+ await cancelReaderQuietly(reader);
71
+ }
45
72
  if (signal?.aborted) {
46
- throw new FetchError('Request was aborted during response read', url, 499, { reason: 'aborted' });
73
+ throw createAbortError(url);
47
74
  }
48
75
  throw error;
49
76
  }
50
- return { text, size: total };
77
+ finally {
78
+ reader.releaseLock();
79
+ }
80
+ finalizeRead(state);
81
+ return { text: state.parts.join(''), size: state.total };
51
82
  }
52
83
  export async function readResponseText(response, url, maxBytes, signal) {
53
84
  assertContentLengthWithinLimit(response, url, maxBytes);
54
85
  if (!response.body) {
55
86
  const text = await response.text();
56
- return { text, size: Buffer.byteLength(text) };
87
+ const size = Buffer.byteLength(text);
88
+ if (size > maxBytes) {
89
+ throw new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
90
+ }
91
+ return { text, size };
57
92
  }
58
93
  return readStreamWithLimit(response.body, url, maxBytes, signal);
59
94
  }
@@ -1,4 +1,2 @@
1
1
  import type { FetchOptions } from '../config/types/runtime.js';
2
- import { destroyAgents } from './fetcher/agents.js';
3
- export { destroyAgents };
4
- export declare function fetchNormalizedUrlWithRetry(normalizedUrl: string, options?: FetchOptions, maxRetries?: number): Promise<string>;
2
+ export declare function fetchNormalizedUrl(normalizedUrl: string, options?: FetchOptions): Promise<string>;
@@ -1,12 +1,9 @@
1
1
  import { config } from '../config/index.js';
2
- import { normalizeHeaderRecord } from '../utils/header-normalizer.js';
3
- import { destroyAgents, dispatcher } from './fetcher/agents.js';
2
+ import { dispatcher } from './fetcher/agents.js';
4
3
  import { createHttpError, createRateLimitError, mapFetchError, } from './fetcher/errors.js';
5
4
  import { recordFetchError, recordFetchResponse, startFetchTelemetry, } from './fetcher/interceptors.js';
6
5
  import { fetchWithRedirects } from './fetcher/redirects.js';
7
6
  import { readResponseText } from './fetcher/response.js';
8
- import { executeWithRetry } from './fetcher/retry-policy.js';
9
- export { destroyAgents };
10
7
  const DEFAULT_HEADERS = {
11
8
  'User-Agent': config.fetcher.userAgent,
12
9
  Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -14,15 +11,8 @@ const DEFAULT_HEADERS = {
14
11
  'Accept-Encoding': 'gzip, deflate, br',
15
12
  Connection: 'keep-alive',
16
13
  };
17
- function buildHeaders(customHeaders) {
18
- const headers = new Headers(DEFAULT_HEADERS);
19
- const sanitized = normalizeHeaderRecord(customHeaders, config.security.blockedHeaders);
20
- if (sanitized) {
21
- for (const [key, value] of Object.entries(sanitized)) {
22
- headers.set(key, value);
23
- }
24
- }
25
- return headers;
14
+ function buildHeaders() {
15
+ return new Headers(DEFAULT_HEADERS);
26
16
  }
27
17
  function buildRequestSignal(timeoutMs, external) {
28
18
  const timeoutSignal = AbortSignal.timeout(timeoutMs);
@@ -54,9 +44,7 @@ async function handleFetchResponse(response, finalUrl, telemetry, signal) {
54
44
  async function fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs) {
55
45
  const telemetry = startFetchTelemetry(normalizedUrl, 'GET');
56
46
  try {
57
- const { response, url: finalUrl } = await fetchWithRedirects(normalizedUrl, requestInit, config.fetcher.maxRedirects);
58
- telemetry.url = finalUrl;
59
- return await handleFetchResponse(response, finalUrl, telemetry, requestInit.signal ?? undefined);
47
+ return await fetchAndHandle(normalizedUrl, requestInit, telemetry);
60
48
  }
61
49
  catch (error) {
62
50
  const mapped = mapFetchError(error, normalizedUrl, timeoutMs);
@@ -65,22 +53,15 @@ async function fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs) {
65
53
  throw mapped;
66
54
  }
67
55
  }
68
- export async function fetchNormalizedUrlWithRetry(normalizedUrl, options, maxRetries = 3) {
69
- const context = buildRequestContext(options);
70
- return executeWithRetry(normalizedUrl, maxRetries, async () => runFetch(normalizedUrl, context), context.signal);
71
- }
72
- function buildRequestContext(options) {
73
- const context = {
74
- timeoutMs: options?.timeout ?? config.fetcher.timeout,
75
- headers: buildHeaders(options?.customHeaders),
76
- };
77
- if (options?.signal) {
78
- context.signal = options.signal;
79
- }
80
- return context;
56
+ async function fetchAndHandle(normalizedUrl, requestInit, telemetry) {
57
+ const { response, url: finalUrl } = await fetchWithRedirects(normalizedUrl, requestInit, config.fetcher.maxRedirects);
58
+ telemetry.url = finalUrl;
59
+ return handleFetchResponse(response, finalUrl, telemetry, requestInit.signal ?? undefined);
81
60
  }
82
- async function runFetch(normalizedUrl, context) {
83
- const signal = buildRequestSignal(context.timeoutMs, context.signal);
84
- const requestInit = buildRequestInit(context.headers, signal);
85
- return fetchWithTelemetry(normalizedUrl, requestInit, context.timeoutMs);
61
+ export async function fetchNormalizedUrl(normalizedUrl, options) {
62
+ const timeoutMs = config.fetcher.timeout;
63
+ const headers = buildHeaders();
64
+ const signal = buildRequestSignal(timeoutMs, options?.signal);
65
+ const requestInit = buildRequestInit(headers, signal);
66
+ return fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs);
86
67
  }
@@ -0,0 +1,8 @@
1
+ export declare class FifoQueue<T> {
2
+ private items;
3
+ private head;
4
+ get length(): number;
5
+ push(item: T): void;
6
+ shift(): T | undefined;
7
+ clear(): void;
8
+ }
@@ -0,0 +1,25 @@
1
+ export class FifoQueue {
2
+ items = [];
3
+ head = 0;
4
+ get length() {
5
+ return this.items.length - this.head;
6
+ }
7
+ push(item) {
8
+ this.items.push(item);
9
+ }
10
+ shift() {
11
+ if (this.head >= this.items.length)
12
+ return undefined;
13
+ const item = this.items[this.head];
14
+ this.head += 1;
15
+ if (this.head > 64 && this.head * 2 >= this.items.length) {
16
+ this.items = this.items.slice(this.head);
17
+ this.head = 0;
18
+ }
19
+ return item;
20
+ }
21
+ clear() {
22
+ this.items.length = 0;
23
+ this.head = 0;
24
+ }
25
+ }
@@ -18,10 +18,10 @@ function formatLogEntry(level, message, meta) {
18
18
  return `[${createTimestamp()}] ${level.toUpperCase()}: ${message}${formatMetadata(meta)}`;
19
19
  }
20
20
  function shouldLog(level) {
21
- if (!config.logging.enabled)
22
- return false;
21
+ // Debug logs only when LOG_LEVEL=debug
23
22
  if (level === 'debug')
24
23
  return config.logging.level === 'debug';
24
+ // All other levels always log
25
25
  return true;
26
26
  }
27
27
  export function logInfo(message, meta) {
@@ -1,10 +1,2 @@
1
1
  import type { ExtractedMetadata } from '../config/types/content.js';
2
- export type MetaSource = 'og' | 'twitter' | 'standard';
3
- export type MetaField = keyof ExtractedMetadata;
4
- export interface MetaCollectorState {
5
- title: Partial<Record<MetaSource, string>>;
6
- description: Partial<Record<MetaSource, string>>;
7
- author: Partial<Record<MetaSource, string>>;
8
- }
9
- export declare function createMetaCollectorState(): MetaCollectorState;
10
- export declare function resolveMetaField(state: MetaCollectorState, field: MetaField): string | undefined;
2
+ export declare function extractMetadata(document: Document): ExtractedMetadata;
@@ -1,11 +1,80 @@
1
- export function createMetaCollectorState() {
1
+ function createMetaCollectorState() {
2
2
  return {
3
3
  title: {},
4
4
  description: {},
5
5
  author: {},
6
6
  };
7
7
  }
8
- export function resolveMetaField(state, field) {
8
+ function resolveMetaField(state, field) {
9
9
  const sources = state[field];
10
10
  return sources.og ?? sources.twitter ?? sources.standard;
11
11
  }
12
+ function parseOpenGraphKey(property) {
13
+ if (!property?.startsWith('og:'))
14
+ return null;
15
+ const key = property.replace('og:', '');
16
+ return key === 'title' || key === 'description' ? key : null;
17
+ }
18
+ function parseTwitterKey(name) {
19
+ if (!name?.startsWith('twitter:'))
20
+ return null;
21
+ const key = name.replace('twitter:', '');
22
+ return key === 'title' || key === 'description' ? key : null;
23
+ }
24
+ function parseStandardKey(name) {
25
+ if (name === 'description')
26
+ return 'description';
27
+ if (name === 'author')
28
+ return 'author';
29
+ return null;
30
+ }
31
+ function collectMetaTag(state, tag) {
32
+ const content = tag.getAttribute('content')?.trim();
33
+ if (!content)
34
+ return;
35
+ const ogKey = parseOpenGraphKey(tag.getAttribute('property'));
36
+ if (ogKey) {
37
+ state[ogKey].og = content;
38
+ return;
39
+ }
40
+ const name = tag.getAttribute('name');
41
+ const twitterKey = parseTwitterKey(name);
42
+ if (twitterKey) {
43
+ state[twitterKey].twitter = content;
44
+ return;
45
+ }
46
+ const standardKey = parseStandardKey(name);
47
+ if (standardKey) {
48
+ state[standardKey].standard = content;
49
+ }
50
+ }
51
+ function scanMetaTags(document, state) {
52
+ const metaTags = document.querySelectorAll('meta');
53
+ for (const tag of metaTags) {
54
+ collectMetaTag(state, tag);
55
+ }
56
+ }
57
+ function ensureTitleFallback(document, state) {
58
+ if (state.title.standard)
59
+ return;
60
+ const titleEl = document.querySelector('title');
61
+ if (titleEl?.textContent) {
62
+ state.title.standard = titleEl.textContent.trim();
63
+ }
64
+ }
65
+ export function extractMetadata(document) {
66
+ const state = createMetaCollectorState();
67
+ scanMetaTags(document, state);
68
+ ensureTitleFallback(document, state);
69
+ const metadata = {};
70
+ const title = resolveMetaField(state, 'title');
71
+ const description = resolveMetaField(state, 'description');
72
+ const author = resolveMetaField(state, 'author');
73
+ if (title !== undefined)
74
+ metadata.title = title;
75
+ if (description !== undefined)
76
+ metadata.description = description;
77
+ if (author !== undefined)
78
+ metadata.author = author;
79
+ return metadata;
80
+ }
@@ -1,14 +1,4 @@
1
- import type { JsonlTransformResult, MarkdownTransformResult, TransformOptions } from '../config/types/content.js';
2
- type TransformMode = 'jsonl' | 'markdown' | 'markdown-blocks';
3
- export interface TransformJob {
4
- mode: TransformMode;
5
- html: string;
6
- url: string;
7
- options: TransformOptions & {
8
- includeContentBlocks?: boolean;
9
- };
10
- }
11
- type TransformResult = JsonlTransformResult | MarkdownTransformResult;
12
- export declare function runTransformInWorker(job: TransformJob): Promise<TransformResult | null>;
13
- export declare function destroyTransformWorkers(): void;
14
- export {};
1
+ import type { MarkdownTransformResult } from '../config/types/content.js';
2
+ import type { WorkerTransformRequest } from './transform-worker-types.js';
3
+ export declare function transformInWorker(request: Omit<WorkerTransformRequest, 'id'>, signal?: AbortSignal): Promise<MarkdownTransformResult>;
4
+ export declare function destroyTransformWorkers(): Promise<void>;