@j0hanz/fetch-url-mcp 1.12.6 → 1.12.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/http/auth.d.ts.map +1 -1
- package/dist/http/auth.js +12 -12
- package/dist/http/helpers.d.ts +1 -1
- package/dist/http/helpers.d.ts.map +1 -1
- package/dist/http/helpers.js +8 -10
- package/dist/http/native.d.ts.map +1 -1
- package/dist/http/native.js +32 -53
- package/dist/http/rate-limit.d.ts.map +1 -1
- package/dist/http/rate-limit.js +3 -3
- package/dist/index.js +3 -3
- package/dist/lib/core.d.ts.map +1 -1
- package/dist/lib/core.js +8 -8
- package/dist/lib/error-codes.d.ts +12 -10
- package/dist/lib/error-codes.d.ts.map +1 -1
- package/dist/lib/error-codes.js +12 -15
- package/dist/lib/error-messages.d.ts.map +1 -1
- package/dist/lib/error-messages.js +12 -12
- package/dist/lib/fetch-pipeline.d.ts.map +1 -1
- package/dist/lib/fetch-pipeline.js +5 -5
- package/dist/lib/http.d.ts.map +1 -1
- package/dist/lib/http.js +9 -9
- package/dist/lib/logger-names.d.ts +12 -10
- package/dist/lib/logger-names.d.ts.map +1 -1
- package/dist/lib/logger-names.js +12 -10
- package/dist/lib/mcp-interop.d.ts +1 -0
- package/dist/lib/mcp-interop.d.ts.map +1 -1
- package/dist/lib/mcp-interop.js +15 -5
- package/dist/lib/tool-errors.d.ts.map +1 -1
- package/dist/lib/tool-errors.js +22 -21
- package/dist/lib/url.d.ts +1 -1
- package/dist/lib/url.d.ts.map +1 -1
- package/dist/lib/url.js +5 -5
- package/dist/lib/utils.d.ts +1 -0
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +8 -5
- package/dist/resources/index.js +3 -3
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +6 -6
- package/dist/tasks/execution.js +9 -9
- package/dist/tasks/handlers.js +7 -7
- package/dist/tasks/manager.d.ts.map +1 -1
- package/dist/tasks/manager.js +9 -9
- package/dist/tools/fetch-url.d.ts.map +1 -1
- package/dist/tools/fetch-url.js +10 -25
- package/dist/transform/dom-prep.d.ts.map +1 -1
- package/dist/transform/dom-prep.js +7 -7
- package/dist/transform/transform.d.ts.map +1 -1
- package/dist/transform/transform.js +18 -17
- package/dist/transform/worker-pool.js +11 -11
- package/package.json +1 -1
package/dist/server.js
CHANGED
|
@@ -5,7 +5,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
5
5
|
import { SetLevelRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
6
6
|
import { config } from './lib/core.js';
|
|
7
7
|
import { getSessionId, logError, logInfo, logNotice, setLogLevel, setMcpServer, } from './lib/core.js';
|
|
8
|
-
import {
|
|
8
|
+
import { Loggers } from './lib/logger-names.js';
|
|
9
9
|
import { setTaskToolCallCapability } from './lib/mcp-interop.js';
|
|
10
10
|
import { toError } from './lib/utils.js';
|
|
11
11
|
import { buildServerInstructions, registerGetHelpPrompt, registerInstructionResource, } from './resources/index.js';
|
|
@@ -121,7 +121,7 @@ function registerLoggingSetLevelHandler(server) {
|
|
|
121
121
|
}
|
|
122
122
|
function attachServerErrorHandler(server) {
|
|
123
123
|
server.server.onerror = (error) => {
|
|
124
|
-
logError('MCP server error', toError(error), LOG_SERVER);
|
|
124
|
+
logError('MCP server error', toError(error), Loggers.LOG_SERVER);
|
|
125
125
|
};
|
|
126
126
|
}
|
|
127
127
|
async function shutdownServer(server, signal) {
|
|
@@ -136,7 +136,7 @@ async function shutdownServer(server, signal) {
|
|
|
136
136
|
]);
|
|
137
137
|
for (const result of results) {
|
|
138
138
|
if (result.status === 'rejected') {
|
|
139
|
-
logError('Shutdown step failed', toError(result.reason), LOG_SERVER);
|
|
139
|
+
logError('Shutdown step failed', toError(result.reason), Loggers.LOG_SERVER);
|
|
140
140
|
}
|
|
141
141
|
}
|
|
142
142
|
}
|
|
@@ -149,7 +149,7 @@ function createShutdownHandler(server) {
|
|
|
149
149
|
logInfo('Shutdown already in progress; ignoring signal', {
|
|
150
150
|
signal,
|
|
151
151
|
initialSignal,
|
|
152
|
-
}, LOG_SERVER);
|
|
152
|
+
}, Loggers.LOG_SERVER);
|
|
153
153
|
return shutdownPromise ?? Promise.resolve();
|
|
154
154
|
}
|
|
155
155
|
shuttingDown = true;
|
|
@@ -158,7 +158,7 @@ function createShutdownHandler(server) {
|
|
|
158
158
|
.then(() => shutdownServer(server, signal))
|
|
159
159
|
.catch((err) => {
|
|
160
160
|
const error = toError(err);
|
|
161
|
-
logError('Error during shutdown', error, LOG_SERVER);
|
|
161
|
+
logError('Error during shutdown', error, Loggers.LOG_SERVER);
|
|
162
162
|
process.exitCode = 1;
|
|
163
163
|
})
|
|
164
164
|
.finally(() => {
|
|
@@ -178,7 +178,7 @@ function registerSignalHandlers(handler) {
|
|
|
178
178
|
async function connectStdioServer(server, transport) {
|
|
179
179
|
try {
|
|
180
180
|
await server.connect(transport);
|
|
181
|
-
logInfo('Fetch URL MCP server running on stdio', undefined, LOG_SERVER);
|
|
181
|
+
logInfo('Fetch URL MCP server running on stdio', undefined, Loggers.LOG_SERVER);
|
|
182
182
|
}
|
|
183
183
|
catch (error) {
|
|
184
184
|
const err = toError(error);
|
package/dist/tasks/execution.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { ErrorCode, McpError, } from '@modelcontextprotocol/sdk/types.js';
|
|
2
2
|
import { config } from '../lib/core.js';
|
|
3
3
|
import { logDebug, logError, logInfo, logWarn, runWithRequestContext, } from '../lib/core.js';
|
|
4
|
-
import {
|
|
4
|
+
import { Loggers } from '../lib/logger-names.js';
|
|
5
5
|
import { createMcpError } from '../lib/mcp-interop.js';
|
|
6
6
|
import {} from '../lib/mcp-interop.js';
|
|
7
7
|
import { tryReadToolErrorMessage } from '../lib/tool-errors.js';
|
|
@@ -23,7 +23,7 @@ function attachAbortController(taskId) {
|
|
|
23
23
|
logWarn('Abort controller map reached task capacity — possible leak', {
|
|
24
24
|
size: taskAbortControllers.size,
|
|
25
25
|
maxTotal: config.tasks.maxTotal,
|
|
26
|
-
}, LOG_TASKS);
|
|
26
|
+
}, Loggers.LOG_TASKS);
|
|
27
27
|
}
|
|
28
28
|
const controller = new AbortController();
|
|
29
29
|
taskAbortControllers.set(taskId, controller);
|
|
@@ -72,7 +72,7 @@ export function emitTaskStatusNotification(server, task) {
|
|
|
72
72
|
taskId: task.taskId,
|
|
73
73
|
status: task.status,
|
|
74
74
|
error: getErrorMessage(error),
|
|
75
|
-
}, LOG_TASKS);
|
|
75
|
+
}, Loggers.LOG_TASKS);
|
|
76
76
|
});
|
|
77
77
|
}
|
|
78
78
|
export function throwTaskNotFound() {
|
|
@@ -134,7 +134,7 @@ async function runTaskToolExecution(params) {
|
|
|
134
134
|
const controller = attachAbortController(taskId);
|
|
135
135
|
const progressState = { closed: false };
|
|
136
136
|
try {
|
|
137
|
-
logInfo('Task execution started', { taskId, tool: tool.name }, LOG_TASKS);
|
|
137
|
+
logInfo('Task execution started', { taskId, tool: tool.name }, Loggers.LOG_TASKS);
|
|
138
138
|
const relatedMeta = buildRelatedTaskMeta(taskId, meta);
|
|
139
139
|
const result = await tool.execute(args, {
|
|
140
140
|
signal: controller.signal,
|
|
@@ -160,10 +160,10 @@ async function runTaskToolExecution(params) {
|
|
|
160
160
|
const completionUpdate = buildTaskCompletionUpdate(result, tool);
|
|
161
161
|
updateTaskAndEmitStatus(server, taskId, completionUpdate);
|
|
162
162
|
if (completionUpdate.status === 'completed') {
|
|
163
|
-
logInfo('Task execution completed', { taskId, tool: tool.name }, LOG_TASKS);
|
|
163
|
+
logInfo('Task execution completed', { taskId, tool: tool.name }, Loggers.LOG_TASKS);
|
|
164
164
|
}
|
|
165
165
|
else {
|
|
166
|
-
logWarn('Task execution completed with tool error result', { taskId, tool: tool.name }, LOG_TASKS);
|
|
166
|
+
logWarn('Task execution completed with tool error result', { taskId, tool: tool.name }, Loggers.LOG_TASKS);
|
|
167
167
|
}
|
|
168
168
|
}
|
|
169
169
|
catch (error) {
|
|
@@ -171,7 +171,7 @@ async function runTaskToolExecution(params) {
|
|
|
171
171
|
taskId,
|
|
172
172
|
tool: tool.name,
|
|
173
173
|
error: getErrorMessage(error),
|
|
174
|
-
}, LOG_TASKS);
|
|
174
|
+
}, Loggers.LOG_TASKS);
|
|
175
175
|
updateTaskAndEmitStatus(server, taskId, buildTaskFailureState(error));
|
|
176
176
|
}
|
|
177
177
|
finally {
|
|
@@ -197,7 +197,7 @@ export async function handleToolCallRequest(server, request, context) {
|
|
|
197
197
|
taskId: task.taskId,
|
|
198
198
|
tool: params.name,
|
|
199
199
|
...(params.task.ttl !== undefined ? { ttl: params.task.ttl } : {}),
|
|
200
|
-
}, LOG_TASKS);
|
|
200
|
+
}, Loggers.LOG_TASKS);
|
|
201
201
|
void runTaskToolExecution({
|
|
202
202
|
server,
|
|
203
203
|
taskId: task.taskId,
|
|
@@ -228,7 +228,7 @@ export async function handleToolCallRequest(server, request, context) {
|
|
|
228
228
|
logDebug('Executing task-capable tool inline', {
|
|
229
229
|
tool: params.name,
|
|
230
230
|
hasProgressToken: params._meta?.progressToken !== undefined,
|
|
231
|
-
}, LOG_TASKS);
|
|
231
|
+
}, Loggers.LOG_TASKS);
|
|
232
232
|
try {
|
|
233
233
|
return await tool.execute(args, {
|
|
234
234
|
...buildToolHandlerExtra(context, params._meta),
|
package/dist/tasks/handlers.js
CHANGED
|
@@ -3,7 +3,7 @@ import {} from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
3
3
|
import { CallToolRequestSchema, ErrorCode, } from '@modelcontextprotocol/sdk/types.js';
|
|
4
4
|
import { z } from 'zod';
|
|
5
5
|
import { logDebug, logWarn, runWithRequestContext } from '../lib/core.js';
|
|
6
|
-
import {
|
|
6
|
+
import { Loggers } from '../lib/logger-names.js';
|
|
7
7
|
import { createMcpError, getSdkCallToolHandler } from '../lib/mcp-interop.js';
|
|
8
8
|
import { parseExtendedCallToolRequest, withRelatedTaskMeta, } from './call-contract.js';
|
|
9
9
|
import { abortTaskExecution, emitTaskStatusNotification, handleToolCallRequest, throwTaskNotFound, toTaskSummary, } from './execution.js';
|
|
@@ -55,7 +55,7 @@ export function registerTaskHandlers(server, options) {
|
|
|
55
55
|
if (taskCapableToolsRegistered && requireInterception) {
|
|
56
56
|
throw Error('Task-capable tools are registered but SDK tools/call interception is unavailable. Upgrade compatibility or disable strict interception with TASKS_REQUIRE_INTERCEPTION=false.');
|
|
57
57
|
}
|
|
58
|
-
logWarn('Task call interception disabled: SDK tools/call handler unavailable; task-capable tools require MCP SDK compatibility update', { sdkVersion: 'unknown' }, LOG_TASKS);
|
|
58
|
+
logWarn('Task call interception disabled: SDK tools/call handler unavailable; task-capable tools require MCP SDK compatibility update', { sdkVersion: 'unknown' }, Loggers.LOG_TASKS);
|
|
59
59
|
}
|
|
60
60
|
if (sdkCallToolHandler) {
|
|
61
61
|
server.server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
|
|
@@ -82,7 +82,7 @@ export function registerTaskHandlers(server, options) {
|
|
|
82
82
|
tool: toolName,
|
|
83
83
|
taskRequested: parsed.params.task !== undefined,
|
|
84
84
|
hasProgressToken: parsed.params._meta?.progressToken !== undefined,
|
|
85
|
-
}, LOG_TASKS);
|
|
85
|
+
}, Loggers.LOG_TASKS);
|
|
86
86
|
return handleToolCallRequest(server, parsed, context);
|
|
87
87
|
});
|
|
88
88
|
});
|
|
@@ -90,7 +90,7 @@ export function registerTaskHandlers(server, options) {
|
|
|
90
90
|
server.server.setRequestHandler(TaskGetSchema, (request, extra) => {
|
|
91
91
|
const { taskId } = request.params;
|
|
92
92
|
const { ownerKey } = resolveOwnerScopedExtra(extra);
|
|
93
|
-
logDebug('tasks/get requested', { taskId }, LOG_TASKS);
|
|
93
|
+
logDebug('tasks/get requested', { taskId }, Loggers.LOG_TASKS);
|
|
94
94
|
const task = taskManager.getTask(taskId, ownerKey);
|
|
95
95
|
if (!task)
|
|
96
96
|
throwTaskNotFound();
|
|
@@ -99,7 +99,7 @@ export function registerTaskHandlers(server, options) {
|
|
|
99
99
|
server.server.setRequestHandler(TaskResultSchema, async (request, extra) => {
|
|
100
100
|
const { taskId } = request.params;
|
|
101
101
|
const { parsedExtra, ownerKey } = resolveOwnerScopedExtra(extra);
|
|
102
|
-
logDebug('tasks/result requested', { taskId }, LOG_TASKS);
|
|
102
|
+
logDebug('tasks/result requested', { taskId }, Loggers.LOG_TASKS);
|
|
103
103
|
const task = await taskManager.waitForTerminalTask(taskId, ownerKey, parsedExtra?.signal);
|
|
104
104
|
if (!task)
|
|
105
105
|
throwTaskNotFound();
|
|
@@ -132,7 +132,7 @@ export function registerTaskHandlers(server, options) {
|
|
|
132
132
|
server.server.setRequestHandler(TaskListSchema, (request, extra) => {
|
|
133
133
|
const { ownerKey } = resolveOwnerScopedExtra(extra);
|
|
134
134
|
const cursor = request.params?.cursor;
|
|
135
|
-
logDebug('tasks/list requested', { hasCursor: cursor !== undefined }, LOG_TASKS);
|
|
135
|
+
logDebug('tasks/list requested', { hasCursor: cursor !== undefined }, Loggers.LOG_TASKS);
|
|
136
136
|
const { tasks, nextCursor } = taskManager.listTasks(cursor === undefined ? { ownerKey } : { ownerKey, cursor });
|
|
137
137
|
return {
|
|
138
138
|
tasks: tasks.map((task) => toTaskSummary(task)),
|
|
@@ -142,7 +142,7 @@ export function registerTaskHandlers(server, options) {
|
|
|
142
142
|
server.server.setRequestHandler(TaskCancelSchema, (request, extra) => {
|
|
143
143
|
const { taskId } = request.params;
|
|
144
144
|
const { ownerKey } = resolveOwnerScopedExtra(extra);
|
|
145
|
-
logDebug('tasks/cancel requested', { taskId }, LOG_TASKS);
|
|
145
|
+
logDebug('tasks/cancel requested', { taskId }, Loggers.LOG_TASKS);
|
|
146
146
|
const task = taskManager.cancelTask(taskId, ownerKey);
|
|
147
147
|
if (!task)
|
|
148
148
|
throwTaskNotFound();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"manager.d.ts","sourceRoot":"","sources":["../../src/tasks/manager.ts"],"names":[],"mappings":"AAgBA,MAAM,MAAM,UAAU,GAClB,SAAS,GACT,gBAAgB,GAChB,WAAW,GACX,QAAQ,GACR,WAAW,CAAC;AAEhB,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,SAAS,CAAC;CACnB;AAMD,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,IAAI,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,UAAU,CAAC;QACnB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,aAAa,EAAE,MAAM,CAAC;QACtB,GAAG,EAAE,MAAM,CAAC;QACZ,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAiFD,cAAM,WAAW;IACf,OAAO,CAAC,KAAK,CAAwC;IACrD,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAEtB;IACF,OAAO,CAAC,eAAe,CAA+C;IAEtE,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;IAOvB,OAAO,CAAC,aAAa;IAIrB,OAAO,CAAC,kBAAkB;IAkB1B,OAAO,CAAC,UAAU;IAgBlB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,mBAAmB;IAU3B,OAAO,CAAC,mBAAmB;IAoB3B,UAAU,CACR,OAAO,CAAC,EAAE,iBAAiB,EAC3B,aAAa,SAAiB,EAC9B,QAAQ,GAAE,MAA0B,GACnC,SAAS;IAiCZ,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS;IAIjE,UAAU,CACR,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,GAAG,WAAW,CAAC,CAAC,GACxD,IAAI;
|
|
1
|
+
{"version":3,"file":"manager.d.ts","sourceRoot":"","sources":["../../src/tasks/manager.ts"],"names":[],"mappings":"AAgBA,MAAM,MAAM,UAAU,GAClB,SAAS,GACT,gBAAgB,GAChB,WAAW,GACX,QAAQ,GACR,WAAW,CAAC;AAEhB,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,CAAC;IACnB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,GAAG,EAAE,MAAM,CAAC;IACZ,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,SAAS,CAAC;CACnB;AAMD,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,IAAI,EAAE;QACJ,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,UAAU,CAAC;QACnB,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,aAAa,EAAE,MAAM,CAAC;QACtB,GAAG,EAAE,MAAM,CAAC;QACZ,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAiFD,cAAM,WAAW;IACf,OAAO,CAAC,KAAK,CAAwC;IACrD,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAEtB;IACF,OAAO,CAAC,eAAe,CAA+C;IAEtE,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;IAOvB,OAAO,CAAC,aAAa;IAIrB,OAAO,CAAC,kBAAkB;IAkB1B,OAAO,CAAC,UAAU;IAgBlB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,mBAAmB;IAU3B,OAAO,CAAC,mBAAmB;IAoB3B,UAAU,CACR,OAAO,CAAC,EAAE,iBAAiB,EAC3B,aAAa,SAAiB,EAC9B,QAAQ,GAAE,MAA0B,GACnC,SAAS;IAiCZ,OAAO,CAAC,gBAAgB;IAgBxB,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS;IAIjE,UAAU,CACR,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,GAAG,WAAW,CAAC,CAAC,GACxD,IAAI;IAmCP,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS;IAuBpE,kBAAkB,CAChB,QAAQ,EAAE,MAAM,EAChB,aAAa,SAAkE,GAC9E,SAAS,EAAE;IAuBd,OAAO,CAAC,WAAW;IA2BnB,SAAS,CAAC,OAAO,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG;QACzE,KAAK,EAAE,SAAS,EAAE,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB;IAgBD,OAAO,CAAC,mBAAmB;IAQrB,mBAAmB,CACvB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,OAAO,CAAC,SAAS,GAAG,SAAS,CAAC;IAcjC,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;CAU7C;AAED,eAAO,MAAM,WAAW,aAAoB,CAAC;AAY7C,wBAAgB,gBAAgB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,CAO7D;AAED,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,MAAM,GACb;IAAE,YAAY,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CA0BjC"}
|
package/dist/tasks/manager.js
CHANGED
|
@@ -3,7 +3,7 @@ import { createHmac, randomBytes } from 'node:crypto';
|
|
|
3
3
|
import { setInterval } from 'node:timers';
|
|
4
4
|
import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
|
5
5
|
import { config, logInfo, logWarn } from '../lib/core.js';
|
|
6
|
-
import {
|
|
6
|
+
import { Loggers } from '../lib/logger-names.js';
|
|
7
7
|
import { createMcpError } from '../lib/mcp-interop.js';
|
|
8
8
|
import { isObject, timingSafeEqualUtf8 } from '../lib/utils.js';
|
|
9
9
|
import { TaskWaiterRegistry, waitForTerminalTask as waitForTerminalTaskWithDeadline, } from './waiters.js';
|
|
@@ -58,10 +58,10 @@ function logTaskStatusTransition(task, previousStatus, nextStatus) {
|
|
|
58
58
|
...(task.statusMessage ? { statusMessage: task.statusMessage } : {}),
|
|
59
59
|
};
|
|
60
60
|
if (nextStatus === 'failed') {
|
|
61
|
-
logWarn('Task status changed to failed', meta, LOG_TASKS);
|
|
61
|
+
logWarn('Task status changed to failed', meta, Loggers.LOG_TASKS);
|
|
62
62
|
return;
|
|
63
63
|
}
|
|
64
|
-
logInfo('Task status changed', meta, LOG_TASKS);
|
|
64
|
+
logInfo('Task status changed', meta, Loggers.LOG_TASKS);
|
|
65
65
|
}
|
|
66
66
|
class TaskManager {
|
|
67
67
|
tasks = new Map();
|
|
@@ -94,7 +94,7 @@ class TaskManager {
|
|
|
94
94
|
taskId: task.taskId,
|
|
95
95
|
ownerKey: task.ownerKey,
|
|
96
96
|
status: task.status,
|
|
97
|
-
}, LOG_TASKS);
|
|
97
|
+
}, Loggers.LOG_TASKS);
|
|
98
98
|
this.removeTask(task.taskId);
|
|
99
99
|
}
|
|
100
100
|
}
|
|
@@ -171,7 +171,7 @@ class TaskManager {
|
|
|
171
171
|
taskId: task.taskId,
|
|
172
172
|
ownerKey,
|
|
173
173
|
ttl: task.ttl,
|
|
174
|
-
}, LOG_TASKS);
|
|
174
|
+
}, Loggers.LOG_TASKS);
|
|
175
175
|
return task;
|
|
176
176
|
}
|
|
177
177
|
lookupActiveTask(taskId, ownerKey) {
|
|
@@ -192,14 +192,14 @@ class TaskManager {
|
|
|
192
192
|
updateTask(taskId, updates) {
|
|
193
193
|
const task = this.tasks.get(taskId);
|
|
194
194
|
if (!task) {
|
|
195
|
-
logWarn('updateTask called for unknown task', { taskId }, LOG_TASKS);
|
|
195
|
+
logWarn('updateTask called for unknown task', { taskId }, Loggers.LOG_TASKS);
|
|
196
196
|
return;
|
|
197
197
|
}
|
|
198
198
|
if (isTerminalStatus(task.status)) {
|
|
199
199
|
logWarn('updateTask called for terminal task', {
|
|
200
200
|
taskId,
|
|
201
201
|
currentStatus: task.status,
|
|
202
|
-
}, LOG_TASKS);
|
|
202
|
+
}, Loggers.LOG_TASKS);
|
|
203
203
|
return;
|
|
204
204
|
}
|
|
205
205
|
const nextStatus = resolveNextTaskStatus(task, updates);
|
|
@@ -222,7 +222,7 @@ class TaskManager {
|
|
|
222
222
|
logInfo('Task cancelled by request', {
|
|
223
223
|
taskId: task.taskId,
|
|
224
224
|
ownerKey: task.ownerKey,
|
|
225
|
-
}, LOG_TASKS);
|
|
225
|
+
}, Loggers.LOG_TASKS);
|
|
226
226
|
return task;
|
|
227
227
|
}
|
|
228
228
|
cancelTasksByOwner(ownerKey, statusMessage = 'The task was cancelled because its owner is no longer active.') {
|
|
@@ -239,7 +239,7 @@ class TaskManager {
|
|
|
239
239
|
logInfo('Tasks cancelled for owner', {
|
|
240
240
|
ownerKey,
|
|
241
241
|
count: cancelled.length,
|
|
242
|
-
}, LOG_TASKS);
|
|
242
|
+
}, Loggers.LOG_TASKS);
|
|
243
243
|
}
|
|
244
244
|
return cancelled;
|
|
245
245
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AACvE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAY7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAEjE,OAAO,EAGL,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,
|
|
1
|
+
{"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AACvE,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAY7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAEjE,OAAO,EAGL,KAAK,gBAAgB,EAErB,KAAK,gBAAgB,EAEtB,MAAM,uBAAuB,CAAC;AAS/B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAIL,KAAK,sBAAsB,EAE5B,MAAM,sBAAsB,CAAC;AAE9B,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAkF/C,wBAAgB,0BAA0B,CACxC,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACzC,YAAY,EAAE,CAUhB;AAuCD,wBAAgB,+BAA+B,CAC7C,MAAM,EAAE,YAAY,GACnB,MAAM,GAAG,SAAS,CAUpB;AAED,qBAAa,oBAAoB;IAI7B,OAAO,CAAC,QAAQ,CAAC,QAAQ;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO;IAJ1B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAa;gBAGhB,QAAQ,EAAE,gBAAgB,EAC1B,OAAO,EAAE,MAAM;IAGlC,WAAW,IAAI,IAAI;IAInB,WAAW,CAAC,KAAK,EAAE,gBAAgB,GAAG,IAAI;IAM1C,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI;IAQxC,aAAa,CAAC,SAAS,EAAE,OAAO,GAAG,IAAI;IAQvC,OAAO,CAAC,QAAQ;CAiCjB;AA8FD,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAa3B;AAqBD,MAAM,WAAW,wBAAwB;IACvC,cAAc,EAAE,CAAC,OAAO,EAAE,sBAAsB,KAAK,IAAI,CAAC;CAC3D;AAoBD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,wBAAwB,CAuCzE"}
|
package/dist/tools/fetch-url.js
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import { ErrorCode } from '@modelcontextprotocol/sdk/types.js';
|
|
2
|
-
import { config, logInfo
|
|
2
|
+
import { config, logInfo } from '../lib/core.js';
|
|
3
3
|
import { finalizeInlineMarkdown, markdownTransform, performSharedFetch, withSignal, } from '../lib/fetch-pipeline.js';
|
|
4
|
-
import {
|
|
5
|
-
import { createMcpError, createProgressReporter, registerToolPresentation, } from '../lib/mcp-interop.js';
|
|
4
|
+
import { Loggers } from '../lib/logger-names.js';
|
|
5
|
+
import { createMcpError, createProgressReporter, registerToolPresentation, validateOrThrow, } from '../lib/mcp-interop.js';
|
|
6
6
|
import { classifyAndLogToolError } from '../lib/tool-errors.js';
|
|
7
7
|
import { composeAbortSignal, isAbortError, isObject, parseUrlOrNull, } from '../lib/utils.js';
|
|
8
|
-
import { formatZodError } from '../lib/zod.js';
|
|
9
8
|
import { fetchUrlInputSchema, fetchUrlOutputSchema, normalizeExtractedMetadata, normalizePageTitle, } from '../schemas.js';
|
|
10
9
|
import { withRequestContextIfMissing } from '../tasks/owner.js';
|
|
11
10
|
import { registerTaskCapableTool, setTaskCapableToolSupport, unregisterTaskCapableTool, } from '../tasks/registry.js';
|
|
@@ -64,18 +63,8 @@ function buildStructuredContent(pipeline, inlineResult, inputUrl) {
|
|
|
64
63
|
...(truncated ? { truncated: true } : {}),
|
|
65
64
|
};
|
|
66
65
|
}
|
|
67
|
-
function validateStructuredContent(structuredContent
|
|
68
|
-
|
|
69
|
-
if (validation.success)
|
|
70
|
-
return;
|
|
71
|
-
const issues = formatZodError(validation.error);
|
|
72
|
-
logWarn('Tool output schema validation failed', {
|
|
73
|
-
url: inputUrl,
|
|
74
|
-
issues,
|
|
75
|
-
}, LOG_FETCH_URL);
|
|
76
|
-
throw createMcpError(ErrorCode.InternalError, 'Output validation failed', {
|
|
77
|
-
issues,
|
|
78
|
-
});
|
|
66
|
+
function validateStructuredContent(structuredContent) {
|
|
67
|
+
validateOrThrow(fetchUrlOutputSchema, structuredContent, ErrorCode.InternalError, 'Output validation failed', Loggers.LOG_FETCH_URL);
|
|
79
68
|
}
|
|
80
69
|
export function buildFetchUrlContentBlocks(structuredContent) {
|
|
81
70
|
const markdown = typeof structuredContent['markdown'] === 'string'
|
|
@@ -88,7 +77,7 @@ export function buildFetchUrlContentBlocks(structuredContent) {
|
|
|
88
77
|
}
|
|
89
78
|
function buildResponse(pipeline, inlineResult, inputUrl) {
|
|
90
79
|
const structuredContent = buildStructuredContent(pipeline, inlineResult, inputUrl);
|
|
91
|
-
validateStructuredContent(structuredContent
|
|
80
|
+
validateStructuredContent(structuredContent);
|
|
92
81
|
return {
|
|
93
82
|
content: buildFetchUrlContentBlocks(structuredContent),
|
|
94
83
|
structuredContent,
|
|
@@ -216,7 +205,7 @@ async function executeFetch(input, extra) {
|
|
|
216
205
|
...(isObject(relatedTask) && typeof relatedTask['taskId'] === 'string'
|
|
217
206
|
? { taskId: relatedTask['taskId'] }
|
|
218
207
|
: {}),
|
|
219
|
-
}, LOG_FETCH_URL);
|
|
208
|
+
}, Loggers.LOG_FETCH_URL);
|
|
220
209
|
progressPlan.reportStart();
|
|
221
210
|
const { pipeline, inlineResult } = await performSharedFetch(buildFetchOptions(url, signal, progressPlan));
|
|
222
211
|
const truncated = inlineResult.truncated ?? pipeline.data.truncated;
|
|
@@ -227,7 +216,7 @@ async function executeFetch(input, extra) {
|
|
|
227
216
|
contentSize: inlineResult.contentSize,
|
|
228
217
|
durationMs: Math.round(performance.now() - startedAt),
|
|
229
218
|
...(truncated ? { truncated: true } : {}),
|
|
230
|
-
}, LOG_FETCH_URL);
|
|
219
|
+
}, Loggers.LOG_FETCH_URL);
|
|
231
220
|
const response = buildResponse(pipeline, inlineResult, url);
|
|
232
221
|
progressPlan.reportSuccess(inlineResult.contentSize);
|
|
233
222
|
return response;
|
|
@@ -241,7 +230,7 @@ export async function fetchUrlToolHandler(input, extra) {
|
|
|
241
230
|
const startedAt = performance.now();
|
|
242
231
|
return executeFetch(input, extra).catch((error) => {
|
|
243
232
|
const durationMs = Math.round(performance.now() - startedAt);
|
|
244
|
-
return classifyAndLogToolError(error, { url: input.url, durationMs }, LOG_FETCH_URL, 'fetch-url', 'Failed to fetch URL');
|
|
233
|
+
return classifyAndLogToolError(error, { url: input.url, durationMs }, Loggers.LOG_FETCH_URL, 'fetch-url', 'Failed to fetch URL');
|
|
245
234
|
});
|
|
246
235
|
}
|
|
247
236
|
/* -------------------------------------------------------------------------------------------------
|
|
@@ -265,11 +254,7 @@ function createTaskCapableDescriptor() {
|
|
|
265
254
|
return {
|
|
266
255
|
name: TOOL_DEFINITION.name,
|
|
267
256
|
parseArguments: (args) => {
|
|
268
|
-
|
|
269
|
-
if (!parsed.success) {
|
|
270
|
-
throw createMcpError(ErrorCode.InvalidParams, formatZodError(parsed.error));
|
|
271
|
-
}
|
|
272
|
-
return parsed.data;
|
|
257
|
+
return validateOrThrow(TOOL_DEFINITION.inputSchema, args, ErrorCode.InvalidParams, 'Invalid parameters for fetch-url', Loggers.LOG_FETCH_URL);
|
|
273
258
|
},
|
|
274
259
|
execute: TOOL_DEFINITION.handler,
|
|
275
260
|
getCompletionStatusMessage: getFetchCompletionStatusMessage,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/transform/dom-prep.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AA+mBnD,eAAO,MAAM,sBAAsB,QAAmB,CAAC;AAyCvD,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA6B9D;AAuBD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAY/D;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CASR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AA0RD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA2BjE;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAE1D;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAY9D;AAWD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAQ3D;AAuDD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAON;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AA0ED,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,MAAM,GAAG,QAAQ,GAChC,MAAM,CAaR;AAiMD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,EACzB,QAAQ,EAAE,QAAQ,GACjB,QAAQ,GAAG,IAAI,
|
|
1
|
+
{"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/transform/dom-prep.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AA+mBnD,eAAO,MAAM,sBAAsB,QAAmB,CAAC;AAyCvD,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA6B9D;AAuBD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAY/D;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CASR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AA0RD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA2BjE;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAE1D;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAY9D;AAWD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAQ3D;AAuDD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAON;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AA0ED,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,MAAM,GAAG,QAAQ,GAChC,MAAM,CAaR;AAiMD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,EACzB,QAAQ,EAAE,QAAQ,GACjB,QAAQ,GAAG,IAAI,CA0CjB"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { parseHTML } from 'linkedom';
|
|
2
2
|
import { config, logDebug } from '../lib/core.js';
|
|
3
|
-
import {
|
|
3
|
+
import { Loggers } from '../lib/logger-names.js';
|
|
4
4
|
import { CharCode, isWhitespaceChar } from '../lib/utils.js';
|
|
5
5
|
// ── Thresholds ──────────────────────────────────────────────────────
|
|
6
6
|
const NOISE_SCAN_LIMIT = 50_000;
|
|
@@ -459,7 +459,7 @@ function stripNoise(document, signal) {
|
|
|
459
459
|
if (config.noiseRemoval.debug) {
|
|
460
460
|
logDebug('Noise removal audit enabled', {
|
|
461
461
|
categories: [...(context.flags.navFooter ? ['nav-footer'] : [])],
|
|
462
|
-
}, LOG_TRANSFORM);
|
|
462
|
+
}, Loggers.LOG_TRANSFORM);
|
|
463
463
|
}
|
|
464
464
|
// Structural Removal
|
|
465
465
|
removeNodes(document.querySelectorAll(context.noiseSelector));
|
|
@@ -1263,23 +1263,23 @@ function passesEmptySectionRatio(articleDoc) {
|
|
|
1263
1263
|
}
|
|
1264
1264
|
export function evaluateArticleContent(article, document) {
|
|
1265
1265
|
if (!passesContentRatioGate(article.textContent.length, document)) {
|
|
1266
|
-
logDebug('FAILED passesContentRatioGate', undefined, LOG_TRANSFORM);
|
|
1266
|
+
logDebug('FAILED passesContentRatioGate', undefined, Loggers.LOG_TRANSFORM);
|
|
1267
1267
|
return null;
|
|
1268
1268
|
}
|
|
1269
1269
|
if (!passesRetentionRulesFromHtml(document, article.content)) {
|
|
1270
|
-
logDebug('FAILED passesRetentionRulesFromHtml', undefined, LOG_TRANSFORM);
|
|
1270
|
+
logDebug('FAILED passesRetentionRulesFromHtml', undefined, Loggers.LOG_TRANSFORM);
|
|
1271
1271
|
return null;
|
|
1272
1272
|
}
|
|
1273
1273
|
if (hasTruncatedSentences(article.textContent)) {
|
|
1274
|
-
logDebug('FAILED hasTruncatedSentences', undefined, LOG_TRANSFORM);
|
|
1274
|
+
logDebug('FAILED hasTruncatedSentences', undefined, Loggers.LOG_TRANSFORM);
|
|
1275
1275
|
return null;
|
|
1276
1276
|
}
|
|
1277
1277
|
const articleDoc = parseHTML(`<!DOCTYPE html><html><body>${article.content}</body></html>`).document;
|
|
1278
1278
|
if (!passesEmptySectionRatio(articleDoc)) {
|
|
1279
1279
|
const headings = articleDoc.querySelectorAll('h1,h2,h3,h4,h5,h6');
|
|
1280
|
-
logDebug(`FAILED passesEmptySectionRatio: ${headings.length} headings`, undefined, LOG_TRANSFORM);
|
|
1280
|
+
logDebug(`FAILED passesEmptySectionRatio: ${headings.length} headings`, undefined, Loggers.LOG_TRANSFORM);
|
|
1281
1281
|
for (const h of headings) {
|
|
1282
|
-
logDebug(`H: ${h.textContent} ${String(hasSectionContent(h))}`, undefined, LOG_TRANSFORM);
|
|
1282
|
+
logDebug(`H: ${h.textContent} ${String(hasSectionContent(h))}`, undefined, Loggers.LOG_TRANSFORM);
|
|
1283
1283
|
}
|
|
1284
1284
|
return null;
|
|
1285
1285
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AA6CA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAkB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAoCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAqMD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;
|
|
1
|
+
{"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AA6CA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAkB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAoCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAqMD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AAkcD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAuKD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACjC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACxC,GACA,MAAM,CAgCR;AA+DD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAKD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,qBAAqB,EAAE,OAAO,GAC7B,aAAa,GAAG,SAAS,CAuB3B;AA6bD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAMzB;AAcD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAmH1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,GACrB,CAAC"}
|
|
@@ -2,9 +2,9 @@ import diagnosticsChannel from 'node:diagnostics_channel';
|
|
|
2
2
|
import { isProbablyReaderable, Readability } from '@mozilla/readability';
|
|
3
3
|
import { parseHTML } from 'linkedom';
|
|
4
4
|
import { config, getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/core.js';
|
|
5
|
-
import {
|
|
5
|
+
import { SystemErrors } from '../lib/error-codes.js';
|
|
6
6
|
import { isRawTextContentUrl } from '../lib/http.js';
|
|
7
|
-
import {
|
|
7
|
+
import { Loggers } from '../lib/logger-names.js';
|
|
8
8
|
import { composeAbortSignal, FetchError, getErrorMessage, getUtf8ByteLength, isAsciiOnly, isObject, throwIfAborted, toError, trimDanglingTagFragment, truncateToUtf8Boundary, } from '../lib/utils.js';
|
|
9
9
|
import { evaluateArticleContent, extractNoscriptImages, getVisibleTextLength, normalizeTabContent, prepareDocumentForMarkdown, removeNoiseFromHtml, serializeDocumentForMarkdown, stripDocsControls, stripScreenReaderText, surfaceCodeEditorContent, } from './dom-prep.js';
|
|
10
10
|
import { extractLanguageFromClassName } from './html-translators.js';
|
|
@@ -72,7 +72,7 @@ class StageTracker {
|
|
|
72
72
|
durationMs: Math.round(durationMs),
|
|
73
73
|
thresholdMs: Math.round(warnThresholdMs),
|
|
74
74
|
url: context.url,
|
|
75
|
-
}, LOG_TRANSFORM);
|
|
75
|
+
}, Loggers.LOG_TRANSFORM);
|
|
76
76
|
}
|
|
77
77
|
}
|
|
78
78
|
const event = {
|
|
@@ -140,7 +140,7 @@ class StageTracker {
|
|
|
140
140
|
logDebug('Diagnostic channel publish failed', {
|
|
141
141
|
stage: event.stage,
|
|
142
142
|
error: getErrorMessage(error),
|
|
143
|
-
}, LOG_TRANSFORM);
|
|
143
|
+
}, Loggers.LOG_TRANSFORM);
|
|
144
144
|
}
|
|
145
145
|
}
|
|
146
146
|
runTrackedSync(url, signal, fn) {
|
|
@@ -201,7 +201,7 @@ function truncateHtml(html, inputTruncated = false) {
|
|
|
201
201
|
size: getUtf8ByteLength(html),
|
|
202
202
|
maxSize,
|
|
203
203
|
truncatedSize: getUtf8ByteLength(content),
|
|
204
|
-
}, LOG_TRANSFORM);
|
|
204
|
+
}, Loggers.LOG_TRANSFORM);
|
|
205
205
|
return { html: content, truncated: true };
|
|
206
206
|
}
|
|
207
207
|
const MIN_SPA_CONTENT_LENGTH = 100;
|
|
@@ -340,7 +340,7 @@ function validateReaderability(doc, url, signal) {
|
|
|
340
340
|
if (textLength < MIN_SPA_CONTENT_LENGTH) {
|
|
341
341
|
logWarn('Very minimal server-rendered content detected (< 100 chars). ' +
|
|
342
342
|
'This might be a client-side rendered (SPA) application. ' +
|
|
343
|
-
'Content extraction may be incomplete.', { textLength }, LOG_TRANSFORM);
|
|
343
|
+
'Content extraction may be incomplete.', { textLength }, Loggers.LOG_TRANSFORM);
|
|
344
344
|
}
|
|
345
345
|
throwIfAborted(signal, url, 'extract:article:readabilityCheck');
|
|
346
346
|
if (textLength >= MIN_READERABLE_TEXT_LENGTH && !isProbablyReaderable(doc)) {
|
|
@@ -391,7 +391,7 @@ function mapReadabilityResult(parsed) {
|
|
|
391
391
|
// runs later in buildContentSource), so this clone starts from raw HTML.
|
|
392
392
|
function extractArticle(document, url, signal) {
|
|
393
393
|
if (!isReadabilityCompatible(document)) {
|
|
394
|
-
logWarn('Document not compatible with Readability', undefined, LOG_TRANSFORM);
|
|
394
|
+
logWarn('Document not compatible with Readability', undefined, Loggers.LOG_TRANSFORM);
|
|
395
395
|
return null;
|
|
396
396
|
}
|
|
397
397
|
try {
|
|
@@ -404,17 +404,17 @@ function extractArticle(document, url, signal) {
|
|
|
404
404
|
return mapReadabilityResult(parsed);
|
|
405
405
|
}
|
|
406
406
|
catch (error) {
|
|
407
|
-
logError('Failed to extract article with Readability', error instanceof Error ? error : undefined, LOG_TRANSFORM);
|
|
407
|
+
logError('Failed to extract article with Readability', error instanceof Error ? error : undefined, Loggers.LOG_TRANSFORM);
|
|
408
408
|
return null;
|
|
409
409
|
}
|
|
410
410
|
}
|
|
411
411
|
function isValidInput(html, url) {
|
|
412
412
|
if (typeof html !== 'string' || html.length === 0) {
|
|
413
|
-
logWarn('extractContent called with invalid HTML input', undefined, LOG_TRANSFORM);
|
|
413
|
+
logWarn('extractContent called with invalid HTML input', undefined, Loggers.LOG_TRANSFORM);
|
|
414
414
|
return false;
|
|
415
415
|
}
|
|
416
416
|
if (typeof url !== 'string' || url.length === 0) {
|
|
417
|
-
logWarn('extractContent called with invalid URL', undefined, LOG_TRANSFORM);
|
|
417
|
+
logWarn('extractContent called with invalid URL', undefined, Loggers.LOG_TRANSFORM);
|
|
418
418
|
return false;
|
|
419
419
|
}
|
|
420
420
|
return true;
|
|
@@ -427,7 +427,7 @@ function applyBaseUri(document, url) {
|
|
|
427
427
|
logInfo('Failed to set baseURI (non-critical)', {
|
|
428
428
|
url: url.substring(0, 100),
|
|
429
429
|
error: getErrorMessage(error),
|
|
430
|
-
}, LOG_TRANSFORM);
|
|
430
|
+
}, Loggers.LOG_TRANSFORM);
|
|
431
431
|
}
|
|
432
432
|
}
|
|
433
433
|
function createEmptyExtractionContext() {
|
|
@@ -483,7 +483,7 @@ function extractContentContext(html, url, options) {
|
|
|
483
483
|
if (error instanceof FetchError)
|
|
484
484
|
throw error;
|
|
485
485
|
throwIfAborted(options.signal, url, 'extract:error');
|
|
486
|
-
logError('Failed to extract content', error instanceof Error ? error : undefined, LOG_TRANSFORM);
|
|
486
|
+
logError('Failed to extract content', error instanceof Error ? error : undefined, Loggers.LOG_TRANSFORM);
|
|
487
487
|
return createEmptyExtractionContext();
|
|
488
488
|
}
|
|
489
489
|
}
|
|
@@ -619,7 +619,7 @@ export function htmlToMarkdown(html, metadata, options) {
|
|
|
619
619
|
catch (error) {
|
|
620
620
|
if (error instanceof FetchError)
|
|
621
621
|
throw error;
|
|
622
|
-
logError('Failed to convert HTML to markdown', error instanceof Error ? error : undefined, LOG_TRANSFORM);
|
|
622
|
+
logError('Failed to convert HTML to markdown', error instanceof Error ? error : undefined, Loggers.LOG_TRANSFORM);
|
|
623
623
|
const fetchError = new FetchError('Failed to convert HTML to markdown', url, 500, {
|
|
624
624
|
reason: 'markdown_convert_failed',
|
|
625
625
|
});
|
|
@@ -651,7 +651,7 @@ function tryTransformRawContent(params) {
|
|
|
651
651
|
return null;
|
|
652
652
|
logDebug('Preserving raw markdown content', {
|
|
653
653
|
url: params.url.substring(0, 80),
|
|
654
|
-
}, LOG_TRANSFORM);
|
|
654
|
+
}, Loggers.LOG_TRANSFORM);
|
|
655
655
|
const { content, title } = buildRawMarkdownPayload({
|
|
656
656
|
rawContent: params.html,
|
|
657
657
|
url: params.url,
|
|
@@ -998,12 +998,13 @@ async function transformWithWorkerPool(htmlOrBuffer, url, options) {
|
|
|
998
998
|
}
|
|
999
999
|
function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
1000
1000
|
const poolStats = getWorkerPoolStats();
|
|
1001
|
-
const isQueueFull = error instanceof FetchError &&
|
|
1001
|
+
const isQueueFull = error instanceof FetchError &&
|
|
1002
|
+
error.details['reason'] === SystemErrors.QUEUE_FULL;
|
|
1002
1003
|
if (isQueueFull) {
|
|
1003
1004
|
logWarn('Transform worker queue full; falling back to in-process', {
|
|
1004
1005
|
url: redactUrl(url),
|
|
1005
1006
|
...(poolStats ?? {}),
|
|
1006
|
-
}, LOG_TRANSFORM);
|
|
1007
|
+
}, Loggers.LOG_TRANSFORM);
|
|
1007
1008
|
return transformInputInProcess(htmlOrBuffer, url, options);
|
|
1008
1009
|
}
|
|
1009
1010
|
throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
@@ -1016,7 +1017,7 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
|
1016
1017
|
url: redactUrl(url),
|
|
1017
1018
|
error: message,
|
|
1018
1019
|
...(poolStats ?? {}),
|
|
1019
|
-
}, LOG_TRANSFORM);
|
|
1020
|
+
}, Loggers.LOG_TRANSFORM);
|
|
1020
1021
|
return transformInputInProcess(htmlOrBuffer, url, options);
|
|
1021
1022
|
}
|
|
1022
1023
|
async function runWorkerTransformWithFallback(htmlOrBuffer, url, options) {
|