@j0hanz/superfetch 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -9
- package/dist/assets/logo.svg +24835 -0
- package/dist/cache.js +58 -4
- package/dist/config.d.ts +2 -0
- package/dist/config.js +2 -0
- package/dist/dom-noise-removal.js +15 -13
- package/dist/fetch.js +16 -25
- package/dist/http-native.js +19 -3
- package/dist/markdown-cleanup.d.ts +6 -12
- package/dist/markdown-cleanup.js +259 -25
- package/dist/mcp.js +27 -10
- package/dist/observability.d.ts +2 -0
- package/dist/observability.js +25 -0
- package/dist/tools.d.ts +6 -4
- package/dist/tools.js +39 -13
- package/dist/transform-types.d.ts +38 -0
- package/dist/transform.d.ts +12 -6
- package/dist/transform.js +158 -267
- package/package.json +1 -2
package/dist/observability.js
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
2
2
|
import { config } from './config.js';
|
|
3
3
|
const requestContext = new AsyncLocalStorage();
|
|
4
|
+
let mcpServer;
|
|
5
|
+
export function setMcpServer(server) {
|
|
6
|
+
mcpServer = server;
|
|
7
|
+
}
|
|
4
8
|
export function runWithRequestContext(context, fn) {
|
|
5
9
|
return requestContext.run(context, fn);
|
|
6
10
|
}
|
|
@@ -43,10 +47,31 @@ function shouldLog(level) {
|
|
|
43
47
|
// All other levels always log
|
|
44
48
|
return true;
|
|
45
49
|
}
|
|
50
|
+
function mapToMcpLevel(level) {
|
|
51
|
+
switch (level) {
|
|
52
|
+
case 'warn':
|
|
53
|
+
return 'warning';
|
|
54
|
+
case 'error':
|
|
55
|
+
return 'error';
|
|
56
|
+
case 'debug':
|
|
57
|
+
return 'debug';
|
|
58
|
+
case 'info':
|
|
59
|
+
default:
|
|
60
|
+
return 'info';
|
|
61
|
+
}
|
|
62
|
+
}
|
|
46
63
|
function writeLog(level, message, meta) {
|
|
47
64
|
if (!shouldLog(level))
|
|
48
65
|
return;
|
|
49
66
|
process.stderr.write(`${formatLogEntry(level, message, meta)}\n`);
|
|
67
|
+
if (mcpServer) {
|
|
68
|
+
mcpServer.server
|
|
69
|
+
.sendLoggingMessage({
|
|
70
|
+
level: mapToMcpLevel(level),
|
|
71
|
+
data: meta ? { message, ...meta } : message,
|
|
72
|
+
})
|
|
73
|
+
.catch(() => { });
|
|
74
|
+
}
|
|
50
75
|
}
|
|
51
76
|
export function logInfo(message, meta) {
|
|
52
77
|
writeLog('info', message, meta);
|
package/dist/tools.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
-
import {
|
|
2
|
+
import type { MarkdownTransformResult } from './transform-types.js';
|
|
3
3
|
export interface FetchUrlInput {
|
|
4
4
|
url: string;
|
|
5
5
|
}
|
|
@@ -25,7 +25,6 @@ export interface ToolContentResourceBlock {
|
|
|
25
25
|
}
|
|
26
26
|
export type ToolContentBlockUnion = ToolContentBlock | ToolContentResourceLinkBlock | ToolContentResourceBlock;
|
|
27
27
|
export interface ToolErrorResponse {
|
|
28
|
-
[x: string]: unknown;
|
|
29
28
|
content: ToolContentBlockUnion[];
|
|
30
29
|
structuredContent: {
|
|
31
30
|
error: string;
|
|
@@ -34,7 +33,6 @@ export interface ToolErrorResponse {
|
|
|
34
33
|
isError: true;
|
|
35
34
|
}
|
|
36
35
|
export interface ToolResponseBase {
|
|
37
|
-
[x: string]: unknown;
|
|
38
36
|
content: ToolContentBlockUnion[];
|
|
39
37
|
structuredContent?: Record<string, unknown>;
|
|
40
38
|
isError?: boolean;
|
|
@@ -86,6 +84,10 @@ export interface ToolHandlerExtra {
|
|
|
86
84
|
}
|
|
87
85
|
export declare const FETCH_URL_TOOL_NAME = "fetch-url";
|
|
88
86
|
export declare const FETCH_URL_TOOL_DESCRIPTION = "Fetches a webpage and converts it to clean Markdown format";
|
|
87
|
+
interface ProgressReporter {
|
|
88
|
+
report: (progress: number, message: string) => Promise<void>;
|
|
89
|
+
}
|
|
90
|
+
export declare function createProgressReporter(extra?: ToolHandlerExtra): ProgressReporter;
|
|
89
91
|
interface InlineContentResult {
|
|
90
92
|
content?: string;
|
|
91
93
|
contentSize: number;
|
|
@@ -123,5 +125,5 @@ type MarkdownPipelineResult = MarkdownTransformResult & {
|
|
|
123
125
|
export declare function parseCachedMarkdownResult(cached: string): MarkdownPipelineResult | undefined;
|
|
124
126
|
export declare function fetchUrlToolHandler(input: FetchUrlInput, extra?: ToolHandlerExtra): Promise<ToolResponseBase>;
|
|
125
127
|
export declare function withRequestContextIfMissing<TParams, TResult, TExtra = unknown>(handler: (params: TParams, extra?: TExtra) => Promise<TResult>): (params: TParams, extra?: TExtra) => Promise<TResult>;
|
|
126
|
-
export declare function registerTools(server: McpServer): void;
|
|
128
|
+
export declare function registerTools(server: McpServer, serverIcon?: string): void;
|
|
127
129
|
export {};
|
package/dist/tools.js
CHANGED
|
@@ -5,10 +5,11 @@ import { config } from './config.js';
|
|
|
5
5
|
import { FetchError, getErrorMessage, isSystemError } from './errors.js';
|
|
6
6
|
import { fetchNormalizedUrl, normalizeUrl, transformToRawUrl, } from './fetch.js';
|
|
7
7
|
import { getRequestId, logDebug, logError, logWarn, runWithRequestContext, } from './observability.js';
|
|
8
|
-
import { transformHtmlToMarkdown
|
|
8
|
+
import { transformHtmlToMarkdown } from './transform.js';
|
|
9
9
|
import { isObject } from './type-guards.js';
|
|
10
10
|
const TRUNCATION_MARKER = '...[truncated]';
|
|
11
11
|
const FETCH_PROGRESS_TOTAL = 4;
|
|
12
|
+
const PROGRESS_NOTIFICATION_TIMEOUT_MS = 5000;
|
|
12
13
|
const fetchUrlInputSchema = z.strictObject({
|
|
13
14
|
url: z
|
|
14
15
|
.url({ protocol: /^https?$/i })
|
|
@@ -46,7 +47,7 @@ const fetchUrlOutputSchema = z.strictObject({
|
|
|
46
47
|
});
|
|
47
48
|
export const FETCH_URL_TOOL_NAME = 'fetch-url';
|
|
48
49
|
export const FETCH_URL_TOOL_DESCRIPTION = 'Fetches a webpage and converts it to clean Markdown format';
|
|
49
|
-
function createProgressReporter(extra) {
|
|
50
|
+
export function createProgressReporter(extra) {
|
|
50
51
|
const token = extra?._meta?.progressToken ?? null;
|
|
51
52
|
const sendNotification = extra?.sendNotification;
|
|
52
53
|
if (token === null || !sendNotification) {
|
|
@@ -55,19 +56,33 @@ function createProgressReporter(extra) {
|
|
|
55
56
|
return {
|
|
56
57
|
report: async (progress, message) => {
|
|
57
58
|
try {
|
|
58
|
-
await
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
59
|
+
await Promise.race([
|
|
60
|
+
sendNotification({
|
|
61
|
+
method: 'notifications/progress',
|
|
62
|
+
params: {
|
|
63
|
+
progressToken: token,
|
|
64
|
+
progress,
|
|
65
|
+
total: FETCH_PROGRESS_TOTAL,
|
|
66
|
+
message,
|
|
67
|
+
},
|
|
68
|
+
}),
|
|
69
|
+
new Promise((_, reject) => {
|
|
70
|
+
setTimeout(() => {
|
|
71
|
+
reject(new Error('Progress notification timeout'));
|
|
72
|
+
}, PROGRESS_NOTIFICATION_TIMEOUT_MS);
|
|
73
|
+
}),
|
|
74
|
+
]);
|
|
67
75
|
}
|
|
68
76
|
catch (error) {
|
|
69
|
-
|
|
77
|
+
const isTimeout = error instanceof Error &&
|
|
78
|
+
error.message === 'Progress notification timeout';
|
|
79
|
+
const logMessage = isTimeout
|
|
80
|
+
? 'Progress notification timed out'
|
|
81
|
+
: 'Failed to send progress notification';
|
|
82
|
+
logWarn(logMessage, {
|
|
70
83
|
error: getErrorMessage(error),
|
|
84
|
+
progress,
|
|
85
|
+
message,
|
|
71
86
|
});
|
|
72
87
|
}
|
|
73
88
|
},
|
|
@@ -467,12 +482,23 @@ function resolveRequestIdFromExtra(extra) {
|
|
|
467
482
|
return String(requestId);
|
|
468
483
|
return undefined;
|
|
469
484
|
}
|
|
470
|
-
export function registerTools(server) {
|
|
485
|
+
export function registerTools(server, serverIcon) {
|
|
471
486
|
server.registerTool(TOOL_DEFINITION.name, {
|
|
472
487
|
title: TOOL_DEFINITION.title,
|
|
473
488
|
description: TOOL_DEFINITION.description,
|
|
474
489
|
inputSchema: TOOL_DEFINITION.inputSchema,
|
|
475
490
|
outputSchema: TOOL_DEFINITION.outputSchema,
|
|
476
491
|
annotations: TOOL_DEFINITION.annotations,
|
|
492
|
+
...(serverIcon
|
|
493
|
+
? {
|
|
494
|
+
icons: [
|
|
495
|
+
{
|
|
496
|
+
src: serverIcon,
|
|
497
|
+
mimeType: 'image/svg+xml',
|
|
498
|
+
sizes: ['any'],
|
|
499
|
+
},
|
|
500
|
+
],
|
|
501
|
+
}
|
|
502
|
+
: {}),
|
|
477
503
|
}, withRequestContextIfMissing(TOOL_DEFINITION.handler));
|
|
478
504
|
}
|
|
@@ -77,4 +77,42 @@ export interface TransformStageContext {
|
|
|
77
77
|
readonly stage: string;
|
|
78
78
|
readonly startTime: number;
|
|
79
79
|
readonly url: string;
|
|
80
|
+
readonly budgetMs?: number;
|
|
81
|
+
readonly totalBudgetMs?: number;
|
|
80
82
|
}
|
|
83
|
+
/**
|
|
84
|
+
* Worker message types for transform workers.
|
|
85
|
+
*/
|
|
86
|
+
export interface TransformWorkerTransformMessage {
|
|
87
|
+
type: 'transform';
|
|
88
|
+
id: string;
|
|
89
|
+
html: string;
|
|
90
|
+
url: string;
|
|
91
|
+
includeMetadata: boolean;
|
|
92
|
+
}
|
|
93
|
+
export interface TransformWorkerCancelMessage {
|
|
94
|
+
type: 'cancel';
|
|
95
|
+
id: string;
|
|
96
|
+
}
|
|
97
|
+
export interface TransformWorkerResultMessage {
|
|
98
|
+
type: 'result';
|
|
99
|
+
id: string;
|
|
100
|
+
result: {
|
|
101
|
+
markdown: string;
|
|
102
|
+
title?: string;
|
|
103
|
+
truncated: boolean;
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
export interface TransformWorkerErrorMessage {
|
|
107
|
+
type: 'error';
|
|
108
|
+
id: string;
|
|
109
|
+
error: {
|
|
110
|
+
name: string;
|
|
111
|
+
message: string;
|
|
112
|
+
url: string;
|
|
113
|
+
statusCode?: number;
|
|
114
|
+
details?: Record<string, unknown>;
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
export type TransformWorkerIncomingMessage = TransformWorkerTransformMessage | TransformWorkerCancelMessage;
|
|
118
|
+
export type TransformWorkerOutgoingMessage = TransformWorkerResultMessage | TransformWorkerErrorMessage;
|
package/dist/transform.d.ts
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import type { ExtractedArticle, ExtractedMetadata, ExtractionResult, MarkdownTransformResult, MetadataBlock, TransformOptions, TransformStageContext } from './transform-types.js';
|
|
2
|
-
export
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export declare function startTransformStage(url: string, stage: string): TransformStageContext | null;
|
|
2
|
+
export interface StageBudget {
|
|
3
|
+
totalBudgetMs: number;
|
|
4
|
+
elapsedMs: number;
|
|
5
|
+
}
|
|
6
|
+
export declare function startTransformStage(url: string, stage: string, budget?: StageBudget): TransformStageContext | null;
|
|
7
7
|
export declare function endTransformStage(context: TransformStageContext | null, options?: {
|
|
8
8
|
truncated?: boolean;
|
|
9
|
-
}):
|
|
9
|
+
}): number;
|
|
10
10
|
export declare function extractContent(html: string, url: string, options?: {
|
|
11
11
|
extractArticle?: boolean;
|
|
12
12
|
signal?: AbortSignal;
|
|
@@ -22,4 +22,10 @@ export declare function determineContentExtractionSource(article: ExtractedArtic
|
|
|
22
22
|
export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
|
|
23
23
|
export declare function transformHtmlToMarkdownInProcess(html: string, url: string, options: TransformOptions): MarkdownTransformResult;
|
|
24
24
|
export declare function shutdownTransformWorkerPool(): Promise<void>;
|
|
25
|
+
export interface TransformPoolStats {
|
|
26
|
+
queueDepth: number;
|
|
27
|
+
activeWorkers: number;
|
|
28
|
+
capacity: number;
|
|
29
|
+
}
|
|
30
|
+
export declare function getTransformPoolStats(): TransformPoolStats | null;
|
|
25
31
|
export declare function transformHtmlToMarkdown(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;
|