@j0hanz/fetch-url-mcp 1.11.7 → 1.11.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -33
- package/dist/http/health.d.ts.map +1 -1
- package/dist/http/health.js +0 -2
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +0 -25
- package/dist/lib/config.d.ts +0 -7
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/config.js +0 -9
- package/dist/lib/core.d.ts +0 -3
- package/dist/lib/core.d.ts.map +1 -1
- package/dist/lib/core.js +0 -7
- package/dist/lib/fetch-pipeline.d.ts +1 -14
- package/dist/lib/fetch-pipeline.d.ts.map +1 -1
- package/dist/lib/fetch-pipeline.js +4 -147
- package/dist/lib/http.d.ts +0 -3
- package/dist/lib/http.d.ts.map +1 -1
- package/dist/lib/http.js +2 -105
- package/dist/lib/utils.d.ts +0 -2
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +1 -36
- package/dist/resources/index.d.ts +1 -23
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +3 -294
- package/dist/schemas.d.ts +0 -14
- package/dist/schemas.d.ts.map +1 -1
- package/dist/schemas.js +1 -77
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +1 -2
- package/dist/tools/fetch-url.d.ts +0 -2
- package/dist/tools/fetch-url.d.ts.map +1 -1
- package/dist/tools/fetch-url.js +12 -43
- package/dist/transform/shared.js +4 -4
- package/dist/transform/transform.d.ts +1 -1
- package/dist/transform/transform.d.ts.map +1 -1
- package/dist/transform/transform.js +10 -10
- package/dist/transform/types.d.ts +2 -2
- package/dist/transform/types.d.ts.map +1 -1
- package/dist/transform/worker-pool.d.ts +3 -3
- package/dist/transform/worker-pool.d.ts.map +1 -1
- package/dist/transform/worker-pool.js +2 -2
- package/package.json +1 -1
- package/dist/lib/cache.d.ts +0 -48
- package/dist/lib/cache.d.ts.map +0 -1
- package/dist/lib/cache.js +0 -264
package/dist/schemas.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { config
|
|
2
|
+
import { config } from './lib/core.js';
|
|
3
3
|
const URL_FIELD_MAX_LENGTH = 2048;
|
|
4
4
|
const METADATA_LIMITS = {
|
|
5
5
|
title: 512,
|
|
@@ -49,61 +49,12 @@ export function normalizeExtractedMetadata(value) {
|
|
|
49
49
|
export function normalizePageTitle(value) {
|
|
50
50
|
return normalizeWithSchema(pageTitleSchema, value);
|
|
51
51
|
}
|
|
52
|
-
function normalizeString(value) {
|
|
53
|
-
return typeof value === 'string' ? value : undefined;
|
|
54
|
-
}
|
|
55
|
-
function normalizeBoolean(value) {
|
|
56
|
-
return typeof value === 'boolean' ? value : undefined;
|
|
57
|
-
}
|
|
58
|
-
export const cachedPayloadValueSchema = z.strictObject({
|
|
59
|
-
markdown: z.string(),
|
|
60
|
-
title: pageTitleSchema.optional(),
|
|
61
|
-
metadata: extractedMetadataSchema.optional(),
|
|
62
|
-
truncated: z.boolean().optional(),
|
|
63
|
-
});
|
|
64
|
-
const cachedPayloadCompatSchema = z.object({
|
|
65
|
-
markdown: z.unknown().transform((value) => normalizeString(value)),
|
|
66
|
-
title: z
|
|
67
|
-
.unknown()
|
|
68
|
-
.transform((value) => normalizePageTitle(value))
|
|
69
|
-
.optional(),
|
|
70
|
-
metadata: z
|
|
71
|
-
.unknown()
|
|
72
|
-
.transform((value) => normalizeExtractedMetadata(value))
|
|
73
|
-
.optional(),
|
|
74
|
-
truncated: z
|
|
75
|
-
.unknown()
|
|
76
|
-
.transform((value) => normalizeBoolean(value))
|
|
77
|
-
.optional(),
|
|
78
|
-
});
|
|
79
|
-
const cachedPayloadSchema = cachedPayloadCompatSchema
|
|
80
|
-
.superRefine((value, ctx) => {
|
|
81
|
-
if (typeof value.markdown === 'string')
|
|
82
|
-
return;
|
|
83
|
-
ctx.addIssue({
|
|
84
|
-
code: 'custom',
|
|
85
|
-
message: 'Missing markdown',
|
|
86
|
-
path: ['markdown'],
|
|
87
|
-
});
|
|
88
|
-
})
|
|
89
|
-
.transform((value) => cachedPayloadValueSchema.parse({
|
|
90
|
-
markdown: value.markdown,
|
|
91
|
-
...(value.title !== undefined ? { title: value.title } : {}),
|
|
92
|
-
...(value.metadata ? { metadata: value.metadata } : {}),
|
|
93
|
-
...(value.truncated !== undefined
|
|
94
|
-
? { truncated: value.truncated }
|
|
95
|
-
: {}),
|
|
96
|
-
}));
|
|
97
52
|
export const fetchUrlInputSchema = z.strictObject({
|
|
98
53
|
url: z
|
|
99
54
|
.httpUrl('Expected HTTP or HTTPS URL')
|
|
100
55
|
.min(1, 'URL required')
|
|
101
56
|
.max(config.constants.maxUrlLength, `URL exceeds ${config.constants.maxUrlLength} chars`)
|
|
102
57
|
.describe(`Target URL. Max ${config.constants.maxUrlLength} chars. Example: https://example.com`),
|
|
103
|
-
forceRefresh: z
|
|
104
|
-
.boolean('Expected boolean')
|
|
105
|
-
.optional()
|
|
106
|
-
.describe('Bypass cache and fetch fresh content.'),
|
|
107
58
|
}, 'Invalid input');
|
|
108
59
|
export const fetchUrlOutputSchema = z.strictObject({
|
|
109
60
|
url: z.httpUrl().max(config.constants.maxUrlLength).describe('Fetched URL.'),
|
|
@@ -131,7 +82,6 @@ export const fetchUrlOutputSchema = z.strictObject({
|
|
|
131
82
|
: z.string())
|
|
132
83
|
.optional()
|
|
133
84
|
.describe('Extracted Markdown. May be truncated (check truncated field).'),
|
|
134
|
-
fromCache: z.boolean().optional().describe('True if served from cache.'),
|
|
135
85
|
fetchedAt: z.iso.datetime().optional().describe('ISO timestamp of fetch.'),
|
|
136
86
|
contentSize: z
|
|
137
87
|
.number()
|
|
@@ -142,29 +92,3 @@ export const fetchUrlOutputSchema = z.strictObject({
|
|
|
142
92
|
.describe('Markdown fragment size before final inline truncation.'),
|
|
143
93
|
truncated: z.boolean().optional().describe('True if markdown was truncated.'),
|
|
144
94
|
});
|
|
145
|
-
export function parseCachedPayload(raw) {
|
|
146
|
-
try {
|
|
147
|
-
const parsed = JSON.parse(raw);
|
|
148
|
-
const result = cachedPayloadSchema.safeParse(parsed);
|
|
149
|
-
if (!result.success) {
|
|
150
|
-
logWarn('Rejected invalid cached payload', {
|
|
151
|
-
issues: result.error.issues.map((issue) => ({
|
|
152
|
-
path: issue.path,
|
|
153
|
-
message: issue.message,
|
|
154
|
-
code: issue.code,
|
|
155
|
-
})),
|
|
156
|
-
});
|
|
157
|
-
return null;
|
|
158
|
-
}
|
|
159
|
-
return result.data;
|
|
160
|
-
}
|
|
161
|
-
catch {
|
|
162
|
-
return null;
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
export function stringifyCachedPayload(payload) {
|
|
166
|
-
return JSON.stringify(cachedPayloadValueSchema.parse(payload));
|
|
167
|
-
}
|
|
168
|
-
export function resolveCachedPayloadContent(payload) {
|
|
169
|
-
return typeof payload.markdown === 'string' ? payload.markdown : null;
|
|
170
|
-
}
|
package/dist/server.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AA8GpE,wBAAsB,eAAe,IAAI,OAAO,CAAC,SAAS,CAAC,CAE1D;AAkDD,wBAAsB,6BAA6B,IAAI,OAAO,CAAC,SAAS,CAAC,CAExE;AA4FD,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAMtD"}
|
package/dist/server.js
CHANGED
|
@@ -7,7 +7,7 @@ import { config } from './lib/core.js';
|
|
|
7
7
|
import { getSessionId, logError, logInfo, setLogLevel, setMcpServer, } from './lib/core.js';
|
|
8
8
|
import { setTaskToolCallCapability } from './lib/mcp-interop.js';
|
|
9
9
|
import { toError } from './lib/utils.js';
|
|
10
|
-
import { buildServerInstructions,
|
|
10
|
+
import { buildServerInstructions, registerGetHelpPrompt, registerInstructionResource, } from './resources/index.js';
|
|
11
11
|
import { abortAllTaskExecutions, registerTaskHandlers, } from './tasks/handlers.js';
|
|
12
12
|
import { registerTools as registerFetchUrlTool } from './tools/fetch-url.js';
|
|
13
13
|
import { shutdownTransformWorkerPool } from './transform/transform.js';
|
|
@@ -89,7 +89,6 @@ async function createMcpServerWithOptions(options) {
|
|
|
89
89
|
const toolControls = registerFetchUrlTool(server);
|
|
90
90
|
registerGetHelpPrompt(server, serverInstructions, localIcon);
|
|
91
91
|
registerInstructionResource(server, serverInstructions, localIcon);
|
|
92
|
-
registerCacheResourceTemplate(server, localIcon);
|
|
93
92
|
const taskRegistration = registerTaskHandlers(server, {
|
|
94
93
|
requireInterception: config.tasks.requireInterception,
|
|
95
94
|
});
|
|
@@ -18,13 +18,11 @@ export declare function getFetchCompletionStatusMessage(result: ServerResult): s
|
|
|
18
18
|
export declare class FetchUrlProgressPlan {
|
|
19
19
|
private readonly reporter;
|
|
20
20
|
private readonly context;
|
|
21
|
-
private cacheStatus;
|
|
22
21
|
constructor(reporter: ProgressReporter, context: string);
|
|
23
22
|
reportStart(): void;
|
|
24
23
|
reportStage(stage: SharedFetchStage): void;
|
|
25
24
|
reportSuccess(contentSize: number): void;
|
|
26
25
|
reportFailure(cancelled: boolean): void;
|
|
27
|
-
private updateCacheStatus;
|
|
28
26
|
private mapStage;
|
|
29
27
|
}
|
|
30
28
|
export declare function fetchUrlToolHandler(input: FetchUrlInput, extra?: ToolHandlerExtra): Promise<ToolResponseBase>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AACvE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AACvE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAY7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,EAGL,KAAK,gBAAgB,EACrB,KAAK,gBAAgB,EACtB,MAAM,uBAAuB,CAAC;AAU/B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAIL,KAAK,sBAAsB,EAE5B,MAAM,sBAAsB,CAAC;AAE9B,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAoI/C,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,YAAY,GACnB,MAAM,GAAG,SAAS,CAUpB;AAED,qBAAa,oBAAoB;IAE7B,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,QAAQ,EAAE,gBAAgB,EAC1B,OAAO,EAAE,MAAM;IAGlC,WAAW,IAAI,IAAI;IAInB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,IAAI;IAM1C,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAIxC,aAAa,CAAC,SAAS,EAAE,OAAO,GAAG,IAAI;IAIvC,OAAO,CAAC,QAAQ;CAqBjB;AAmED,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAQ3B;AAqBD,MAAM,WAAW,wBAAwB;IACvC,cAAc,EAAE,CAAC,OAAO,EAAE,sBAAsB,KAAK,IAAI,CAAC;CAC3D;AAqBD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,wBAAwB,CAoCzE"}
|
package/dist/tools/fetch-url.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
|
|
2
2
|
import { config, logDebug, logError, logWarn } from '../lib/core.js';
|
|
3
|
-
import { finalizeInlineMarkdown, markdownTransform,
|
|
3
|
+
import { finalizeInlineMarkdown, markdownTransform, performSharedFetch, withSignal, } from '../lib/fetch-pipeline.js';
|
|
4
4
|
import { createProgressReporter, handleToolError, } from '../lib/mcp-interop.js';
|
|
5
5
|
import { composeAbortSignal, isAbortError, isObject, parseUrlOrNull, toError, } from '../lib/utils.js';
|
|
6
6
|
import { formatZodError } from '../lib/zod.js';
|
|
@@ -57,7 +57,6 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
|
57
57
|
...(title ? { title } : {}),
|
|
58
58
|
...(metadata ? { metadata } : {}),
|
|
59
59
|
markdown,
|
|
60
|
-
fromCache: pipeline.fromCache,
|
|
61
60
|
fetchedAt: pipeline.fetchedAt,
|
|
62
61
|
contentSize: inlineResult.contentSize,
|
|
63
62
|
...(truncated ? { truncated: true } : {}),
|
|
@@ -88,12 +87,11 @@ function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
|
88
87
|
const Step = {
|
|
89
88
|
START: 1,
|
|
90
89
|
RESOLVE_URL: 2,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
DONE: 8,
|
|
90
|
+
FETCH: 3,
|
|
91
|
+
RESPONSE: 4,
|
|
92
|
+
TRANSFORM: 5,
|
|
93
|
+
PREPARE: 6,
|
|
94
|
+
DONE: 7,
|
|
97
95
|
};
|
|
98
96
|
function formatContentSize(contentSize) {
|
|
99
97
|
if (contentSize < 1000)
|
|
@@ -119,7 +117,6 @@ export function getFetchCompletionStatusMessage(result) {
|
|
|
119
117
|
export class FetchUrlProgressPlan {
|
|
120
118
|
reporter;
|
|
121
119
|
context;
|
|
122
|
-
cacheStatus = 'unknown';
|
|
123
120
|
constructor(reporter, context) {
|
|
124
121
|
this.reporter = reporter;
|
|
125
122
|
this.context = context;
|
|
@@ -128,7 +125,6 @@ export class FetchUrlProgressPlan {
|
|
|
128
125
|
this.reporter.report(Step.START, 'Preparing request');
|
|
129
126
|
}
|
|
130
127
|
reportStage(stage) {
|
|
131
|
-
this.updateCacheStatus(stage);
|
|
132
128
|
const mapped = this.mapStage(stage);
|
|
133
129
|
if (!mapped)
|
|
134
130
|
return;
|
|
@@ -140,47 +136,23 @@ export class FetchUrlProgressPlan {
|
|
|
140
136
|
reportFailure(cancelled) {
|
|
141
137
|
this.reporter.report(Step.DONE, cancelled ? 'Cancelled' : 'Failed');
|
|
142
138
|
}
|
|
143
|
-
updateCacheStatus(stage) {
|
|
144
|
-
if (stage === 'cache_hit' || stage === 'cache_restore') {
|
|
145
|
-
this.cacheStatus = 'cache_hit';
|
|
146
|
-
}
|
|
147
|
-
else if (stage === 'fetch_remote' ||
|
|
148
|
-
stage === 'response_ready' ||
|
|
149
|
-
stage === 'transform_start') {
|
|
150
|
-
this.cacheStatus = 'cache_miss';
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
139
|
mapStage(stage) {
|
|
154
140
|
switch (stage) {
|
|
155
141
|
case 'resolve_url':
|
|
156
142
|
return { step: Step.RESOLVE_URL, message: 'Resolving URL' };
|
|
157
|
-
case 'check_cache':
|
|
158
|
-
return { step: Step.CHECK_CACHE, message: 'Checking cache' };
|
|
159
|
-
case 'cache_hit':
|
|
160
|
-
return { step: Step.CACHE_OR_FETCH, message: 'Loaded from cache' };
|
|
161
|
-
case 'cache_restore':
|
|
162
|
-
return {
|
|
163
|
-
step: Step.RESTORE_OR_RESPONSE,
|
|
164
|
-
message: 'Restoring cached content',
|
|
165
|
-
};
|
|
166
143
|
case 'fetch_remote':
|
|
167
144
|
return {
|
|
168
|
-
step: Step.
|
|
145
|
+
step: Step.FETCH,
|
|
169
146
|
message: `Fetching ${this.context}`,
|
|
170
147
|
};
|
|
171
148
|
case 'response_ready':
|
|
172
|
-
return { step: Step.
|
|
149
|
+
return { step: Step.RESPONSE, message: 'Received response' };
|
|
173
150
|
case 'transform_start':
|
|
174
151
|
return { step: Step.TRANSFORM, message: 'Parsing HTML -> Markdown' };
|
|
175
152
|
case 'prepare_output':
|
|
176
|
-
return {
|
|
177
|
-
step: this.cacheStatus === 'cache_miss' ? Step.PREPARE : Step.TRANSFORM,
|
|
178
|
-
message: 'Fetch completed',
|
|
179
|
-
};
|
|
153
|
+
return { step: Step.PREPARE, message: 'Fetch completed' };
|
|
180
154
|
case 'finalize_output':
|
|
181
|
-
|
|
182
|
-
return undefined;
|
|
183
|
-
return { step: Step.PREPARE, message: 'Finalizing output' };
|
|
155
|
+
return undefined;
|
|
184
156
|
}
|
|
185
157
|
}
|
|
186
158
|
}
|
|
@@ -195,19 +167,16 @@ function buildToolAbortSignal(extraSignal) {
|
|
|
195
167
|
}
|
|
196
168
|
return signal;
|
|
197
169
|
}
|
|
198
|
-
function buildFetchOptions(url, signal, progressPlan
|
|
170
|
+
function buildFetchOptions(url, signal, progressPlan) {
|
|
199
171
|
return {
|
|
200
172
|
url,
|
|
201
173
|
...withSignal(signal),
|
|
202
|
-
...(forceRefresh ? { forceRefresh: true } : {}),
|
|
203
174
|
onStage: (stage) => {
|
|
204
175
|
progressPlan.reportStage(stage);
|
|
205
176
|
},
|
|
206
177
|
transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
|
|
207
178
|
return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal);
|
|
208
179
|
},
|
|
209
|
-
serialize: serializeMarkdownResult,
|
|
210
|
-
deserialize: parseCachedMarkdownResult,
|
|
211
180
|
};
|
|
212
181
|
}
|
|
213
182
|
async function executeFetch(input, extra) {
|
|
@@ -217,7 +186,7 @@ async function executeFetch(input, extra) {
|
|
|
217
186
|
logDebug('Fetching URL', { url });
|
|
218
187
|
try {
|
|
219
188
|
progressPlan.reportStart();
|
|
220
|
-
const { pipeline, inlineResult } = await performSharedFetch(buildFetchOptions(url, signal, progressPlan
|
|
189
|
+
const { pipeline, inlineResult } = await performSharedFetch(buildFetchOptions(url, signal, progressPlan));
|
|
221
190
|
progressPlan.reportSuccess(inlineResult.contentSize);
|
|
222
191
|
return buildResponse(pipeline, inlineResult, url);
|
|
223
192
|
}
|
package/dist/transform/shared.js
CHANGED
|
@@ -3,10 +3,10 @@ function isTransformMessage(message) {
|
|
|
3
3
|
if (!message || typeof message !== 'object')
|
|
4
4
|
return false;
|
|
5
5
|
const value = message;
|
|
6
|
-
const { id, url, html, htmlBuffer, encoding,
|
|
6
|
+
const { id, url, html, htmlBuffer, encoding, includeMetadataFooter, inputTruncated, } = value;
|
|
7
7
|
return (typeof id === 'string' &&
|
|
8
8
|
typeof url === 'string' &&
|
|
9
|
-
typeof
|
|
9
|
+
typeof includeMetadataFooter === 'boolean' &&
|
|
10
10
|
(html === undefined || typeof html === 'string') &&
|
|
11
11
|
(htmlBuffer === undefined || htmlBuffer instanceof Uint8Array) &&
|
|
12
12
|
(encoding === undefined || typeof encoding === 'string') &&
|
|
@@ -80,7 +80,7 @@ function handleCancelMessage(params) {
|
|
|
80
80
|
}
|
|
81
81
|
function executeTransformMessage(params) {
|
|
82
82
|
const { message, controllersById, decoder, runTransform, sendMessage } = params;
|
|
83
|
-
const { id, url, html, htmlBuffer, encoding,
|
|
83
|
+
const { id, url, html, htmlBuffer, encoding, includeMetadataFooter, inputTruncated, } = message;
|
|
84
84
|
if (!id.trim()) {
|
|
85
85
|
sendMessage(createValidationErrorMessage(id, url || '', 'Missing transform message id'));
|
|
86
86
|
return;
|
|
@@ -94,7 +94,7 @@ function executeTransformMessage(params) {
|
|
|
94
94
|
try {
|
|
95
95
|
const content = decodeHtml(html, htmlBuffer, encoding, decoder);
|
|
96
96
|
const result = runTransform(content, url, {
|
|
97
|
-
|
|
97
|
+
includeMetadataFooter,
|
|
98
98
|
signal: controller.signal,
|
|
99
99
|
...(inputTruncated ? { inputTruncated: true } : {}),
|
|
100
100
|
});
|
|
@@ -20,7 +20,7 @@ export declare function htmlToMarkdown(html: string, metadata?: MetadataBlock, o
|
|
|
20
20
|
}): string;
|
|
21
21
|
export declare function isExtractionSufficient(article: ExtractedArticle | null, originalHtmlOrDocument: string | Document): boolean;
|
|
22
22
|
export declare function determineContentExtractionSource(article: ExtractedArticle | null): article is ExtractedArticle;
|
|
23
|
-
export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean,
|
|
23
|
+
export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadataFooter: boolean): MetadataBlock | undefined;
|
|
24
24
|
export declare function transformHtmlToMarkdownInProcess(html: string, url: string, options: TransformOptions): MarkdownTransformResult;
|
|
25
25
|
interface TransformPoolStats {
|
|
26
26
|
queueDepth: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AA2CA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAkB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AA+BpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AA4LD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA2aD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAuKD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACjC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACxC,GACA,MAAM,CAyBR;AA2DD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAKD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,
|
|
1
|
+
{"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AA2CA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAkB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AA+BpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AA4LD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA2aD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAuKD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACjC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACxC,GACA,MAAM,CAyBR;AA2DD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAKD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,qBAAqB,EAAE,OAAO,GAC7B,aAAa,GAAG,SAAS,CAuB3B;AA6bD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAMzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAuG1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,GACrB,CAAC"}
|
|
@@ -631,7 +631,7 @@ function shouldPreserveRawContent(url, content) {
|
|
|
631
631
|
}
|
|
632
632
|
function buildRawMarkdownPayload(params) {
|
|
633
633
|
const title = extractTitleFromRawMarkdown(params.rawContent);
|
|
634
|
-
let content = params.
|
|
634
|
+
let content = params.includeMetadataFooter
|
|
635
635
|
? addSourceToMarkdown(params.rawContent, params.url)
|
|
636
636
|
: params.rawContent;
|
|
637
637
|
if (params.url) {
|
|
@@ -648,7 +648,7 @@ function tryTransformRawContent(params) {
|
|
|
648
648
|
const { content, title } = buildRawMarkdownPayload({
|
|
649
649
|
rawContent: params.html,
|
|
650
650
|
url: params.url,
|
|
651
|
-
|
|
651
|
+
includeMetadataFooter: params.includeMetadataFooter,
|
|
652
652
|
});
|
|
653
653
|
return {
|
|
654
654
|
markdown: content,
|
|
@@ -672,8 +672,8 @@ const BINARY_SAMPLE_SIZE = 2000;
|
|
|
672
672
|
export function determineContentExtractionSource(article) {
|
|
673
673
|
return article !== null;
|
|
674
674
|
}
|
|
675
|
-
export function createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle,
|
|
676
|
-
if (!
|
|
675
|
+
export function createContentMetadataBlock(url, article, extractedMeta, shouldExtractFromArticle, includeMetadataFooter) {
|
|
676
|
+
if (!includeMetadataFooter)
|
|
677
677
|
return undefined;
|
|
678
678
|
const metadata = {
|
|
679
679
|
type: 'metadata',
|
|
@@ -826,8 +826,8 @@ function buildRawSource(base, params) {
|
|
|
826
826
|
};
|
|
827
827
|
}
|
|
828
828
|
function resolveBaseContentSource(input) {
|
|
829
|
-
const { html, url, article, extractedMeta,
|
|
830
|
-
const metadata = createContentMetadataBlock(url, article, extractedMeta, evaluatedArticleDoc !== null,
|
|
829
|
+
const { html, url, article, extractedMeta, includeMetadataFooter, evaluatedArticleDoc, document, truncated, signal, } = input;
|
|
830
|
+
const metadata = createContentMetadataBlock(url, article, extractedMeta, evaluatedArticleDoc !== null, includeMetadataFooter);
|
|
831
831
|
const preparedDocument = document
|
|
832
832
|
? prepareContentSourceDocument(document, url, signal)
|
|
833
833
|
: undefined;
|
|
@@ -879,7 +879,7 @@ function resolveContentSource(params) {
|
|
|
879
879
|
url: params.url,
|
|
880
880
|
article,
|
|
881
881
|
extractedMeta,
|
|
882
|
-
|
|
882
|
+
includeMetadataFooter: params.includeMetadataFooter,
|
|
883
883
|
evaluatedArticleDoc,
|
|
884
884
|
document,
|
|
885
885
|
truncated: truncated ?? false,
|
|
@@ -915,7 +915,7 @@ function resolveTransformContentResult(html, url, options, signal) {
|
|
|
915
915
|
const rawResult = stageTracker.run(url, 'transform:raw', () => tryTransformRawContent({
|
|
916
916
|
html,
|
|
917
917
|
url,
|
|
918
|
-
|
|
918
|
+
includeMetadataFooter: options.includeMetadataFooter,
|
|
919
919
|
inputTruncated: options.inputTruncated,
|
|
920
920
|
}));
|
|
921
921
|
if (rawResult)
|
|
@@ -923,7 +923,7 @@ function resolveTransformContentResult(html, url, options, signal) {
|
|
|
923
923
|
const context = stageTracker.run(url, 'transform:extract', () => resolveContentSource({
|
|
924
924
|
html,
|
|
925
925
|
url,
|
|
926
|
-
|
|
926
|
+
includeMetadataFooter: options.includeMetadataFooter,
|
|
927
927
|
signal,
|
|
928
928
|
inputTruncated: options.inputTruncated,
|
|
929
929
|
}));
|
|
@@ -968,7 +968,7 @@ function transformInputInProcess(htmlOrBuffer, url, options) {
|
|
|
968
968
|
}
|
|
969
969
|
function workerTransformOptions(options) {
|
|
970
970
|
return {
|
|
971
|
-
|
|
971
|
+
includeMetadataFooter: options.includeMetadataFooter,
|
|
972
972
|
...(options.signal ? { signal: options.signal } : {}),
|
|
973
973
|
...(options.inputTruncated
|
|
974
974
|
? { inputTruncated: options.inputTruncated }
|
|
@@ -59,7 +59,7 @@ export interface MarkdownTransformResult extends MarkdownPayload {
|
|
|
59
59
|
* Options for transform operations.
|
|
60
60
|
*/
|
|
61
61
|
export interface TransformOptions {
|
|
62
|
-
|
|
62
|
+
includeMetadataFooter: boolean;
|
|
63
63
|
signal?: AbortSignal;
|
|
64
64
|
inputTruncated?: boolean;
|
|
65
65
|
}
|
|
@@ -96,7 +96,7 @@ export interface TransformWorkerTransformMessage {
|
|
|
96
96
|
htmlBuffer?: Uint8Array | undefined;
|
|
97
97
|
encoding?: string | undefined;
|
|
98
98
|
url: string;
|
|
99
|
-
|
|
99
|
+
includeMetadataFooter: boolean;
|
|
100
100
|
inputTruncated?: boolean | undefined;
|
|
101
101
|
}
|
|
102
102
|
export interface TransformWorkerCancelledMessage {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/transform/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AAED,UAAU,eAAe;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,iBAAiB,GAAG,SAAS,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,eAAe;IAC9D,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/transform/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AAED,UAAU,eAAe;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,iBAAiB,GAAG,SAAS,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,eAAe;IAC9D,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,qBAAqB,EAAE,OAAO,CAAC;IAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,CAAC,EAAE,CAAC,CAAC;IACL,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,WAAW,CAAC;IAClB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B,UAAU,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC;IACpC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,qBAAqB,EAAE,OAAO,CAAC;IAC/B,cAAc,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACtC;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,WAAW,CAAC;IAClB,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,eAAe,CAAC;CACzB;AAED,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,GAAG,EAAE,MAAM,CAAC;QACZ,UAAU,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;QAChC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;KAC/C,CAAC;CACH;AAED,MAAM,MAAM,8BAA8B,GACtC,4BAA4B,GAC5B,2BAA2B,GAC3B,+BAA+B,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { MarkdownTransformResult } from './types.js';
|
|
2
2
|
interface TransformWorkerPool {
|
|
3
3
|
transform(html: string, url: string, options: {
|
|
4
|
-
|
|
4
|
+
includeMetadataFooter: boolean;
|
|
5
5
|
signal?: AbortSignal;
|
|
6
6
|
inputTruncated?: boolean;
|
|
7
7
|
}): Promise<MarkdownTransformResult>;
|
|
@@ -28,12 +28,12 @@ declare class WorkerPool implements TransformWorkerPool {
|
|
|
28
28
|
private readonly restartBackoff;
|
|
29
29
|
constructor(size: number, timeoutMs: number);
|
|
30
30
|
transform(html: string, url: string, options: {
|
|
31
|
-
|
|
31
|
+
includeMetadataFooter: boolean;
|
|
32
32
|
signal?: AbortSignal;
|
|
33
33
|
inputTruncated?: boolean;
|
|
34
34
|
}): Promise<MarkdownTransformResult>;
|
|
35
35
|
transform(htmlBuffer: Uint8Array, url: string, options: {
|
|
36
|
-
|
|
36
|
+
includeMetadataFooter: boolean;
|
|
37
37
|
signal?: AbortSignal;
|
|
38
38
|
inputTruncated?: boolean;
|
|
39
39
|
encoding?: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"worker-pool.d.ts","sourceRoot":"","sources":["../../src/transform/worker-pool.ts"],"names":[],"mappings":"AA2BA,OAAO,KAAK,EACV,uBAAuB,EAKxB,MAAM,YAAY,CAAC;AAqIpB,UAAU,mBAAmB;IAC3B,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,
|
|
1
|
+
{"version":3,"file":"worker-pool.d.ts","sourceRoot":"","sources":["../../src/transform/worker-pool.ts"],"names":[],"mappings":"AA2BA,OAAO,KAAK,EACV,uBAAuB,EAKxB,MAAM,YAAY,CAAC;AAqIpB,UAAU,mBAAmB;IAC3B,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,aAAa,IAAI,MAAM,CAAC;IACxB,gBAAgB,IAAI,MAAM,CAAC;IAC3B,WAAW,IAAI,MAAM,CAAC;CACvB;AA6JD,cAAM,UAAW,YAAW,mBAAmB;IAC7C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAkC;IAExE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkC;IAC1D,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAChD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAEhD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgC;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAmC;IAC5D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA0B;IAErD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,cAAc,CAA6B;gBAEhD,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IASrC,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC;IAC7B,SAAS,CACb,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,qBAAqB,EAAE,OAAO,CAAC;QAC/B,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GACA,OAAO,CAAC,uBAAuB,CAAC;IAyCnC,aAAa,IAAI,MAAM;IAIvB,gBAAgB,IAAI,MAAM;IAI1B,WAAW,IAAI,MAAM;IAIrB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAWpB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IA0B5B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,iBAAiB;IAoDzB,OAAO,CAAC,aAAa;YA4BP,aAAa;IA2B3B,OAAO,CAAC,kBAAkB;IAY1B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,cAAc;IA6BtB,OAAO,CAAC,aAAa;IAyBrB,OAAO,CAAC,eAAe;IAgCvB,OAAO,CAAC,mBAAmB;IA8B3B,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,QAAQ;IAShB,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,UAAU;IAgClB,OAAO,CAAC,iBAAiB;IAgCzB,OAAO,CAAC,gBAAgB;IA8CxB,OAAO,CAAC,YAAY;IAwBpB,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,iBAAiB;CAS1B;AAMD,wBAAgB,qBAAqB,IAAI,UAAU,CAIlD;AAED,wBAAgB,kBAAkB,IAAI;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,IAAI,CAOP;AAED,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAIxD"}
|
|
@@ -68,7 +68,7 @@ function buildWorkerDispatchPayload(task) {
|
|
|
68
68
|
type: 'transform',
|
|
69
69
|
id: task.id,
|
|
70
70
|
url: task.url,
|
|
71
|
-
|
|
71
|
+
includeMetadataFooter: task.includeMetadataFooter,
|
|
72
72
|
...(task.inputTruncated ? { inputTruncated: true } : {}),
|
|
73
73
|
};
|
|
74
74
|
if (!task.htmlBuffer) {
|
|
@@ -300,7 +300,7 @@ class WorkerPool {
|
|
|
300
300
|
const task = {
|
|
301
301
|
id,
|
|
302
302
|
url,
|
|
303
|
-
|
|
303
|
+
includeMetadataFooter: options.includeMetadataFooter,
|
|
304
304
|
...(options.inputTruncated
|
|
305
305
|
? { inputTruncated: options.inputTruncated }
|
|
306
306
|
: {}),
|
package/package.json
CHANGED
package/dist/lib/cache.d.ts
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
export declare function toCacheScopeId(sessionId?: string): string;
|
|
2
|
-
interface CacheEntry {
|
|
3
|
-
url: string;
|
|
4
|
-
title?: string;
|
|
5
|
-
content: string;
|
|
6
|
-
fetchedAt: string;
|
|
7
|
-
expiresAt: string;
|
|
8
|
-
scopeIds?: string[];
|
|
9
|
-
}
|
|
10
|
-
interface CacheKeyParts {
|
|
11
|
-
namespace: string;
|
|
12
|
-
urlHash: string;
|
|
13
|
-
}
|
|
14
|
-
interface CacheSetOptions {
|
|
15
|
-
force?: boolean;
|
|
16
|
-
}
|
|
17
|
-
interface CacheGetOptions {
|
|
18
|
-
force?: boolean;
|
|
19
|
-
scopeId?: string;
|
|
20
|
-
}
|
|
21
|
-
interface CacheEntryMetadata {
|
|
22
|
-
url: string;
|
|
23
|
-
title?: string;
|
|
24
|
-
scopeIds?: string[];
|
|
25
|
-
}
|
|
26
|
-
interface CacheUpdateEvent {
|
|
27
|
-
cacheKey: string;
|
|
28
|
-
namespace: string;
|
|
29
|
-
urlHash: string;
|
|
30
|
-
listChanged: boolean;
|
|
31
|
-
scopeIds: string[];
|
|
32
|
-
}
|
|
33
|
-
type CacheUpdateListener = (event: CacheUpdateEvent) => unknown;
|
|
34
|
-
export declare function createCacheKey(namespace: string, url: string, vary?: Record<string, unknown> | string): string | null;
|
|
35
|
-
export declare function parseCacheKey(cacheKey: string): CacheKeyParts | null;
|
|
36
|
-
export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
|
|
37
|
-
export declare function get(cacheKey: string | null, options?: CacheGetOptions): CacheEntry | undefined;
|
|
38
|
-
export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata, options?: CacheSetOptions): void;
|
|
39
|
-
export declare function keys(): readonly string[];
|
|
40
|
-
export declare function getEntryMeta(cacheKey: string): {
|
|
41
|
-
url: string;
|
|
42
|
-
title?: string;
|
|
43
|
-
fetchedAt?: string;
|
|
44
|
-
scopeIds: string[];
|
|
45
|
-
} | undefined;
|
|
46
|
-
export declare function isEnabled(): boolean;
|
|
47
|
-
export {};
|
|
48
|
-
//# sourceMappingURL=cache.d.ts.map
|
package/dist/lib/cache.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/lib/cache.ts"],"names":[],"mappings":"AAcA,wBAAgB,cAAc,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,CAEzD;AAYD,UAAU,UAAU;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AACD,UAAU,aAAa;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AACD,UAAU,eAAe;IACvB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AACD,UAAU,eAAe;IACvB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AACD,UAAU,kBAAkB;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB;AAID,UAAU,gBAAgB;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,OAAO,CAAC;IACrB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AACD,KAAK,mBAAmB,GAAG,CAAC,KAAK,EAAE,gBAAgB,KAAK,OAAO,CAAC;AAEhE,wBAAgB,cAAc,CAC5B,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,MAAM,EACX,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GACtC,MAAM,GAAG,IAAI,CAwBf;AAED,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CASpE;AAgOD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,MAAM,IAAI,CAEvE;AACD,wBAAgB,GAAG,CACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,UAAU,GAAG,SAAS,CAExB;AACD,wBAAgB,GAAG,CACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,kBAAkB,EAC5B,OAAO,CAAC,EAAE,eAAe,GACxB,IAAI,CAEN;AACD,wBAAgB,IAAI,IAAI,SAAS,MAAM,EAAE,CAExC;AACD,wBAAgB,YAAY,CAC1B,QAAQ,EAAE,MAAM,GAEd;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;CAAE,GACvE,SAAS,CASZ;AACD,wBAAgB,SAAS,IAAI,OAAO,CAEnC"}
|