@llmindset/hf-mcp 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/docs-search/doc-fetch.d.ts +1 -0
- package/dist/docs-search/doc-fetch.d.ts.map +1 -1
- package/dist/docs-search/doc-fetch.js +9 -12
- package/dist/docs-search/doc-fetch.js.map +1 -1
- package/dist/docs-search/doc-fetch.test.js +56 -11
- package/dist/docs-search/doc-fetch.test.js.map +1 -1
- package/dist/docs-search/docs-semantic-search.d.ts.map +1 -1
- package/dist/docs-search/docs-semantic-search.js +7 -1
- package/dist/docs-search/docs-semantic-search.js.map +1 -1
- package/dist/file-icons.d.ts +3 -0
- package/dist/file-icons.d.ts.map +1 -0
- package/dist/file-icons.js +38 -0
- package/dist/file-icons.js.map +1 -0
- package/dist/gradio-files.d.ts +0 -1
- package/dist/gradio-files.d.ts.map +1 -1
- package/dist/gradio-files.js +2 -35
- package/dist/gradio-files.js.map +1 -1
- package/dist/hf-api-call.d.ts.map +1 -1
- package/dist/hf-api-call.js +7 -7
- package/dist/hf-api-call.js.map +1 -1
- package/dist/hub-inspect.d.ts +2 -2
- package/dist/hub-inspect.d.ts.map +1 -1
- package/dist/hub-inspect.js +1 -1
- package/dist/hub-inspect.js.map +1 -1
- package/dist/index.browser.d.ts +48 -0
- package/dist/index.browser.d.ts.map +1 -0
- package/dist/index.browser.js +153 -0
- package/dist/index.browser.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/jobs/commands/uv-utils.d.ts +0 -3
- package/dist/jobs/commands/uv-utils.d.ts.map +1 -1
- package/dist/jobs/commands/uv-utils.js +2 -2
- package/dist/jobs/commands/uv-utils.js.map +1 -1
- package/dist/jobs/jobs-tool.d.ts.map +1 -1
- package/dist/jobs/jobs-tool.js +11 -12
- package/dist/jobs/jobs-tool.js.map +1 -1
- package/dist/jobs/schema-help.d.ts +2 -9
- package/dist/jobs/schema-help.d.ts.map +1 -1
- package/dist/jobs/schema-help.js +3 -3
- package/dist/jobs/schema-help.js.map +1 -1
- package/dist/jobs/sse-handler.d.ts +3 -2
- package/dist/jobs/sse-handler.d.ts.map +1 -1
- package/dist/jobs/sse-handler.js +8 -4
- package/dist/jobs/sse-handler.js.map +1 -1
- package/dist/jobs/types.d.ts +1 -1
- package/dist/logger.d.ts +2 -2
- package/dist/logger.d.ts.map +1 -1
- package/dist/network/fetch-profile.d.ts +24 -0
- package/dist/network/fetch-profile.d.ts.map +1 -0
- package/dist/network/fetch-profile.js +80 -0
- package/dist/network/fetch-profile.js.map +1 -0
- package/dist/network/index.d.ts +5 -0
- package/dist/network/index.d.ts.map +1 -0
- package/dist/network/index.js +5 -0
- package/dist/network/index.js.map +1 -0
- package/dist/network/ip-policy.d.ts +6 -0
- package/dist/network/ip-policy.d.ts.map +1 -0
- package/dist/network/ip-policy.js +166 -0
- package/dist/network/ip-policy.js.map +1 -0
- package/dist/network/ip-policy.test.d.ts +2 -0
- package/dist/network/ip-policy.test.d.ts.map +1 -0
- package/dist/network/ip-policy.test.js +26 -0
- package/dist/network/ip-policy.test.js.map +1 -0
- package/dist/network/safe-fetch.d.ts +16 -0
- package/dist/network/safe-fetch.d.ts.map +1 -0
- package/dist/network/safe-fetch.js +124 -0
- package/dist/network/safe-fetch.js.map +1 -0
- package/dist/network/safe-fetch.test.d.ts +2 -0
- package/dist/network/safe-fetch.test.d.ts.map +1 -0
- package/dist/network/safe-fetch.test.js +136 -0
- package/dist/network/safe-fetch.test.js.map +1 -0
- package/dist/network/url-policy.d.ts +32 -0
- package/dist/network/url-policy.d.ts.map +1 -0
- package/dist/network/url-policy.js +230 -0
- package/dist/network/url-policy.js.map +1 -0
- package/dist/network/url-policy.test.d.ts +2 -0
- package/dist/network/url-policy.test.d.ts.map +1 -0
- package/dist/network/url-policy.test.js +57 -0
- package/dist/network/url-policy.test.js.map +1 -0
- package/dist/readme-utils.d.ts.map +1 -1
- package/dist/readme-utils.js +3 -4
- package/dist/readme-utils.js.map +1 -1
- package/dist/repo-search.d.ts +46 -0
- package/dist/repo-search.d.ts.map +1 -0
- package/dist/repo-search.js +310 -0
- package/dist/repo-search.js.map +1 -0
- package/dist/repo-search.test.d.ts +2 -0
- package/dist/repo-search.test.d.ts.map +1 -0
- package/dist/repo-search.test.js +130 -0
- package/dist/repo-search.test.js.map +1 -0
- package/dist/space/commands/discover.d.ts +0 -5
- package/dist/space/commands/discover.d.ts.map +1 -1
- package/dist/space/commands/discover.js +9 -2
- package/dist/space/commands/discover.js.map +1 -1
- package/dist/space/commands/invoke.js +1 -59
- package/dist/space/commands/invoke.js.map +1 -1
- package/dist/space/commands/view-parameters.d.ts.map +1 -1
- package/dist/space/commands/view-parameters.js +3 -98
- package/dist/space/commands/view-parameters.js.map +1 -1
- package/dist/space/dynamic-space-tool.d.ts.map +1 -1
- package/dist/space/dynamic-space-tool.js +5 -2
- package/dist/space/dynamic-space-tool.js.map +1 -1
- package/dist/space/utils/gradio-caller.d.ts.map +1 -1
- package/dist/space/utils/gradio-caller.js +13 -6
- package/dist/space/utils/gradio-caller.js.map +1 -1
- package/dist/space/utils/space-http.d.ts +8 -0
- package/dist/space/utils/space-http.d.ts.map +1 -0
- package/dist/space/utils/space-http.js +49 -0
- package/dist/space/utils/space-http.js.map +1 -0
- package/dist/space-files.d.ts +0 -1
- package/dist/space-files.d.ts.map +1 -1
- package/dist/space-files.js +3 -36
- package/dist/space-files.js.map +1 -1
- package/dist/tool-ids.d.ts +6 -5
- package/dist/tool-ids.d.ts.map +1 -1
- package/dist/tool-ids.js +9 -14
- package/dist/tool-ids.js.map +1 -1
- package/package.json +7 -3
- package/src/docs-search/doc-fetch.test.ts +98 -28
- package/src/docs-search/doc-fetch.ts +9 -16
- package/src/docs-search/docs-semantic-search.ts +8 -1
- package/src/file-icons.ts +39 -0
- package/src/gradio-files.ts +2 -40
- package/src/hf-api-call.ts +8 -10
- package/src/hub-inspect.ts +2 -2
- package/src/index.browser.ts +183 -0
- package/src/index.ts +2 -0
- package/src/jobs/commands/uv-utils.ts +2 -2
- package/src/jobs/jobs-tool.ts +13 -12
- package/src/jobs/schema-help.ts +4 -4
- package/src/jobs/sse-handler.ts +12 -7
- package/src/logger.ts +2 -2
- package/src/network/fetch-profile.ts +112 -0
- package/src/network/index.ts +4 -0
- package/src/network/ip-policy.test.ts +29 -0
- package/src/network/ip-policy.ts +206 -0
- package/src/network/safe-fetch.test.ts +181 -0
- package/src/network/safe-fetch.ts +174 -0
- package/src/network/url-policy.test.ts +100 -0
- package/src/network/url-policy.ts +304 -0
- package/src/readme-utils.ts +11 -10
- package/src/repo-search.test.ts +155 -0
- package/src/repo-search.ts +414 -0
- package/src/space/commands/discover.ts +10 -2
- package/src/space/commands/invoke.ts +1 -88
- package/src/space/commands/view-parameters.ts +3 -136
- package/src/space/dynamic-space-tool.ts +6 -2
- package/src/space/utils/gradio-caller.ts +25 -12
- package/src/space/utils/space-http.ts +75 -0
- package/src/space-files.ts +3 -41
- package/src/tool-ids.ts +10 -14
- package/test/fetch-guard.spec.ts +70 -0
- package/test/jobs/sse-handler.spec.ts +60 -0
- package/dist/space/utils/result-formatter.d.ts +0 -4
- package/dist/space/utils/result-formatter.d.ts.map +0 -1
- package/dist/space/utils/result-formatter.js +0 -146
- package/dist/space/utils/result-formatter.js.map +0 -1
- package/src/space/utils/result-formatter.ts +0 -226
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import { assertExternalAddress } from './ip-policy.js';
|
|
2
|
+
import { parseAndValidateUrl, type UrlPolicy } from './url-policy.js';
|
|
3
|
+
|
|
4
|
+
export interface SafeFetchOptions {
|
|
5
|
+
urlPolicy: UrlPolicy;
|
|
6
|
+
timeoutMs?: number;
|
|
7
|
+
maxRedirects?: number;
|
|
8
|
+
externalOnly?: boolean;
|
|
9
|
+
requestInit?: RequestInit;
|
|
10
|
+
stripSensitiveHeadersOnCrossHostRedirect?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface SafeFetchResult {
|
|
14
|
+
response: Response;
|
|
15
|
+
finalUrl: URL;
|
|
16
|
+
redirectsFollowed: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const DEFAULT_TIMEOUT_MS = 12500;
|
|
20
|
+
const DEFAULT_MAX_REDIRECTS = 5;
|
|
21
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
22
|
+
const SENSITIVE_HEADERS = new Set(['authorization', 'proxy-authorization', 'cookie', 'x-hf-authorization']);
|
|
23
|
+
|
|
24
|
+
function isRedirectStatus(status: number): boolean {
|
|
25
|
+
return REDIRECT_STATUSES.has(status);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function dropSensitiveHeaders(headersInit: HeadersInit | undefined): Headers {
|
|
29
|
+
const headers = new Headers(headersInit);
|
|
30
|
+
for (const key of SENSITIVE_HEADERS) {
|
|
31
|
+
headers.delete(key);
|
|
32
|
+
}
|
|
33
|
+
return headers;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function withMethodAndBody(requestInit: RequestInit, method: string, body: BodyInit | null | undefined): RequestInit {
|
|
37
|
+
const nextInit: RequestInit = {
|
|
38
|
+
...requestInit,
|
|
39
|
+
method,
|
|
40
|
+
redirect: 'manual',
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
if (body !== undefined && body !== null && method !== 'GET' && method !== 'HEAD') {
|
|
44
|
+
nextInit.body = body;
|
|
45
|
+
} else {
|
|
46
|
+
delete nextInit.body;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return nextInit;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function fetchWithTimeout(url: URL, requestInit: RequestInit, timeoutMs: number): Promise<Response> {
|
|
53
|
+
if (timeoutMs <= 0) {
|
|
54
|
+
return fetch(url.toString(), {
|
|
55
|
+
...requestInit,
|
|
56
|
+
redirect: 'manual',
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const outerSignal = requestInit.signal;
|
|
61
|
+
|
|
62
|
+
if (outerSignal) {
|
|
63
|
+
if (outerSignal.aborted) {
|
|
64
|
+
throw new Error('Request was aborted');
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
69
|
+
const signal = outerSignal ? AbortSignal.any([outerSignal, timeoutSignal]) : timeoutSignal;
|
|
70
|
+
|
|
71
|
+
try {
|
|
72
|
+
return await fetch(url.toString(), {
|
|
73
|
+
...requestInit,
|
|
74
|
+
signal,
|
|
75
|
+
redirect: 'manual',
|
|
76
|
+
});
|
|
77
|
+
} catch (error) {
|
|
78
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
79
|
+
if (outerSignal?.aborted) {
|
|
80
|
+
throw new Error('Request was aborted');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (timeoutSignal.aborted) {
|
|
84
|
+
throw new Error(`Request timed out after ${timeoutMs.toString()}ms`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
throw new Error(`Request timed out after ${timeoutMs.toString()}ms`);
|
|
88
|
+
}
|
|
89
|
+
throw error;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export async function safeFetch(url: string | URL, options: SafeFetchOptions): Promise<SafeFetchResult> {
|
|
94
|
+
const {
|
|
95
|
+
urlPolicy,
|
|
96
|
+
timeoutMs = DEFAULT_TIMEOUT_MS,
|
|
97
|
+
maxRedirects = DEFAULT_MAX_REDIRECTS,
|
|
98
|
+
externalOnly = false,
|
|
99
|
+
requestInit = {},
|
|
100
|
+
stripSensitiveHeadersOnCrossHostRedirect = true,
|
|
101
|
+
} = options;
|
|
102
|
+
|
|
103
|
+
if (maxRedirects < 0) {
|
|
104
|
+
throw new Error('maxRedirects must be >= 0');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let currentUrl = parseAndValidateUrl(url, urlPolicy);
|
|
108
|
+
if (externalOnly) {
|
|
109
|
+
await assertExternalAddress(currentUrl.hostname);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const baseHeaders = new Headers(requestInit.headers);
|
|
113
|
+
let currentMethod = (requestInit.method || 'GET').toUpperCase();
|
|
114
|
+
let currentBody = requestInit.body;
|
|
115
|
+
let redirectsFollowed = 0;
|
|
116
|
+
|
|
117
|
+
while (true) {
|
|
118
|
+
const currentInit = withMethodAndBody(
|
|
119
|
+
{
|
|
120
|
+
...requestInit,
|
|
121
|
+
headers: baseHeaders,
|
|
122
|
+
},
|
|
123
|
+
currentMethod,
|
|
124
|
+
currentBody
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
const response = await fetchWithTimeout(currentUrl, currentInit, timeoutMs);
|
|
128
|
+
|
|
129
|
+
if (!isRedirectStatus(response.status)) {
|
|
130
|
+
return {
|
|
131
|
+
response,
|
|
132
|
+
finalUrl: currentUrl,
|
|
133
|
+
redirectsFollowed,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (redirectsFollowed >= maxRedirects) {
|
|
138
|
+
throw new Error(`Redirect limit exceeded (${maxRedirects.toString()})`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const location = response.headers.get('location');
|
|
142
|
+
if (!location) {
|
|
143
|
+
throw new Error(`Redirect response missing Location header (status ${response.status.toString()})`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const nextCandidate = new URL(location, currentUrl);
|
|
147
|
+
const nextUrl = parseAndValidateUrl(nextCandidate, urlPolicy);
|
|
148
|
+
if (externalOnly) {
|
|
149
|
+
await assertExternalAddress(nextUrl.hostname);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (stripSensitiveHeadersOnCrossHostRedirect && currentUrl.origin !== nextUrl.origin) {
|
|
153
|
+
const filtered = dropSensitiveHeaders(baseHeaders);
|
|
154
|
+
baseHeaders.forEach((_, key) => {
|
|
155
|
+
baseHeaders.delete(key);
|
|
156
|
+
});
|
|
157
|
+
filtered.forEach((value, key) => {
|
|
158
|
+
baseHeaders.set(key, value);
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (response.status === 303 || ((response.status === 301 || response.status === 302) && currentMethod === 'POST')) {
|
|
163
|
+
currentMethod = 'GET';
|
|
164
|
+
currentBody = undefined;
|
|
165
|
+
baseHeaders.delete('content-length');
|
|
166
|
+
baseHeaders.delete('content-type');
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
redirectsFollowed += 1;
|
|
170
|
+
currentUrl = nextUrl;
|
|
171
|
+
|
|
172
|
+
await response.body?.cancel();
|
|
173
|
+
}
|
|
174
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
createExactHostPolicy,
|
|
4
|
+
createExternalHttpsPolicy,
|
|
5
|
+
createGradioMcpHostPolicy,
|
|
6
|
+
createGradioSchemaHostPolicy,
|
|
7
|
+
createHuggingFaceHubPolicy,
|
|
8
|
+
createLocalhostHttpPolicy,
|
|
9
|
+
createGradioMcpPolicy,
|
|
10
|
+
createHfDocsPolicy,
|
|
11
|
+
isLocalhostHostname,
|
|
12
|
+
parseAndValidateUrl,
|
|
13
|
+
} from './url-policy.js';
|
|
14
|
+
|
|
15
|
+
describe('url-policy', () => {
|
|
16
|
+
describe('HF docs policy', () => {
|
|
17
|
+
it('accepts valid Hugging Face docs and Gradio docs URLs', () => {
|
|
18
|
+
expect(() => parseAndValidateUrl('https://huggingface.co/docs/transformers', createHfDocsPolicy())).not.toThrow();
|
|
19
|
+
expect(() => parseAndValidateUrl('https://www.huggingface.co/docs/datasets', createHfDocsPolicy())).not.toThrow();
|
|
20
|
+
expect(() => parseAndValidateUrl('https://www.gradio.app/guides', createHfDocsPolicy())).not.toThrow();
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it('rejects non-https or non-allowlisted hosts', () => {
|
|
24
|
+
expect(() => parseAndValidateUrl('http://huggingface.co/docs/transformers', createHfDocsPolicy())).toThrow(
|
|
25
|
+
'URL protocol is not allowed'
|
|
26
|
+
);
|
|
27
|
+
expect(() => parseAndValidateUrl('https://example.com/docs/transformers', createHfDocsPolicy())).toThrow(
|
|
28
|
+
'URL hostname is not allowed'
|
|
29
|
+
);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('rejects traversal and encoded traversal variants', () => {
|
|
33
|
+
const variants = [
|
|
34
|
+
'https://huggingface.co/docs/../x',
|
|
35
|
+
'https://huggingface.co/docs/%2e%2e/x',
|
|
36
|
+
'https://huggingface.co/docs/%2e%2e%2fx',
|
|
37
|
+
'https://huggingface.co/docs/..%2fx',
|
|
38
|
+
'https://huggingface.co/docs/%2e%2e%5cx',
|
|
39
|
+
'https://huggingface.co/docs/%252e%252e%252fx',
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
for (const candidate of variants) {
|
|
43
|
+
expect(() => parseAndValidateUrl(candidate, createHfDocsPolicy())).toThrow();
|
|
44
|
+
}
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('enforces /docs/ prefix on HF hosts', () => {
|
|
48
|
+
expect(() => parseAndValidateUrl('https://huggingface.co/models/some-model', createHfDocsPolicy())).toThrow(
|
|
49
|
+
'Hugging Face docs URLs must remain under /docs/'
|
|
50
|
+
);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
describe('Gradio MCP policy', () => {
|
|
55
|
+
it('accepts mcp endpoint URLs over https', () => {
|
|
56
|
+
expect(() =>
|
|
57
|
+
parseAndValidateUrl('https://demo-space.hf.space/gradio_api/mcp/', createGradioMcpPolicy())
|
|
58
|
+
).not.toThrow();
|
|
59
|
+
expect(() =>
|
|
60
|
+
parseAndValidateUrl('https://fake-mcp.local/gradio_api/mcp/', createGradioMcpPolicy())
|
|
61
|
+
).not.toThrow();
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('rejects invalid paths', () => {
|
|
65
|
+
expect(() => parseAndValidateUrl('https://demo-space.hf.space/not-mcp', createGradioMcpPolicy())).toThrow(
|
|
66
|
+
'URL path must start with /gradio_api/mcp'
|
|
67
|
+
);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe('external https policy', () => {
|
|
72
|
+
it('rejects credentials in URL', () => {
|
|
73
|
+
expect(() => parseAndValidateUrl('https://user:pass@example.com/file.wav', createExternalHttpsPolicy())).toThrow(
|
|
74
|
+
'URL credentials are not allowed'
|
|
75
|
+
);
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('supports huggingface hub and localhost policy helpers', () => {
|
|
80
|
+
expect(() => parseAndValidateUrl('https://huggingface.co/api/models', createHuggingFaceHubPolicy())).not.toThrow();
|
|
81
|
+
expect(() => parseAndValidateUrl('http://localhost:7860/health', createLocalhostHttpPolicy())).not.toThrow();
|
|
82
|
+
expect(() => parseAndValidateUrl('https://example.com/x', createExactHostPolicy('example.com', 'https:'))).not.toThrow();
|
|
83
|
+
expect(() =>
|
|
84
|
+
parseAndValidateUrl(
|
|
85
|
+
'https://demo-space.hf.space/gradio_api/mcp/schema',
|
|
86
|
+
createGradioSchemaHostPolicy('demo-space.hf.space')
|
|
87
|
+
)
|
|
88
|
+
).not.toThrow();
|
|
89
|
+
expect(() =>
|
|
90
|
+
parseAndValidateUrl(
|
|
91
|
+
'https://demo-space.hf.space/gradio_api/mcp/',
|
|
92
|
+
createGradioMcpHostPolicy('demo-space.hf.space', 'https:')
|
|
93
|
+
)
|
|
94
|
+
).not.toThrow();
|
|
95
|
+
expect(isLocalhostHostname('localhost')).toBe(true);
|
|
96
|
+
expect(isLocalhostHostname('127.0.0.1')).toBe(true);
|
|
97
|
+
expect(isLocalhostHostname('[::1]')).toBe(true);
|
|
98
|
+
expect(isLocalhostHostname('example.com')).toBe(false);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
export interface UrlPathRules {
|
|
2
|
+
requiredPrefix?: string;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export type UrlProtocol = 'https:' | 'http:';
|
|
6
|
+
|
|
7
|
+
export interface UrlQueryRules {
|
|
8
|
+
allowAny?: boolean;
|
|
9
|
+
allowKeys?: ReadonlySet<string>;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface UrlPolicy {
|
|
13
|
+
allowedProtocols: ReadonlySet<UrlProtocol>;
|
|
14
|
+
allowedHosts?: ReadonlySet<string>;
|
|
15
|
+
allowSubdomainsOf?: readonly string[];
|
|
16
|
+
requireDefaultPort?: boolean;
|
|
17
|
+
pathRules?: UrlPathRules;
|
|
18
|
+
queryRules?: UrlQueryRules;
|
|
19
|
+
allowCredentials?: boolean;
|
|
20
|
+
customValidator?: (url: URL) => void;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const LOCALHOST_HOSTS = new Set(['localhost', '127.0.0.1', '[::1]', '::1']);
|
|
24
|
+
|
|
25
|
+
const ENCODED_SEPARATOR_RE = /%(?:2f|5c)/i;
|
|
26
|
+
const ENCODED_BYTE_RE = /%[0-9a-f]{2}/i;
|
|
27
|
+
const INVALID_PERCENT_ENCODING_RE = /%(?![0-9a-f]{2})/i;
|
|
28
|
+
|
|
29
|
+
function normalizeHostname(hostname: string): string {
|
|
30
|
+
return hostname.toLowerCase().replace(/\.+$/, '');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function safeDecodeURIComponent(value: string): string {
|
|
34
|
+
try {
|
|
35
|
+
return decodeURIComponent(value);
|
|
36
|
+
} catch {
|
|
37
|
+
throw new Error('URL contains invalid percent-encoding');
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function collectDecodedPathVariants(pathname: string): string[] {
|
|
42
|
+
const variants = [pathname];
|
|
43
|
+
let current = pathname;
|
|
44
|
+
|
|
45
|
+
for (let i = 0; i < 2; i += 1) {
|
|
46
|
+
if (!current.includes('%')) {
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const decoded = safeDecodeURIComponent(current);
|
|
51
|
+
if (decoded === current) {
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
variants.push(decoded);
|
|
56
|
+
current = decoded;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return variants;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function hasDotSegments(pathname: string): boolean {
|
|
63
|
+
const normalized = pathname.replace(/\\/g, '/');
|
|
64
|
+
const segments = normalized.split('/');
|
|
65
|
+
return segments.some((segment) => segment === '.' || segment === '..');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function matchesRequiredPrefix(pathname: string, requiredPrefix: string): boolean {
|
|
69
|
+
const normalizedPath = pathname.replace(/\\/g, '/');
|
|
70
|
+
const normalizedPrefix = requiredPrefix.replace(/\\/g, '/');
|
|
71
|
+
|
|
72
|
+
if (normalizedPath === normalizedPrefix) {
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (normalizedPrefix.endsWith('/') && normalizedPath === normalizedPrefix.slice(0, -1)) {
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return normalizedPath.startsWith(normalizedPrefix);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function assertHostAllowed(hostname: string, policy: UrlPolicy): void {
|
|
84
|
+
if (!policy.allowedHosts && (!policy.allowSubdomainsOf || policy.allowSubdomainsOf.length === 0)) {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const normalized = normalizeHostname(hostname);
|
|
89
|
+
|
|
90
|
+
if (policy.allowedHosts) {
|
|
91
|
+
for (const host of policy.allowedHosts) {
|
|
92
|
+
if (normalizeHostname(host) === normalized) {
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (policy.allowSubdomainsOf) {
|
|
99
|
+
for (const domain of policy.allowSubdomainsOf) {
|
|
100
|
+
const normalizedDomain = normalizeHostname(domain);
|
|
101
|
+
if (normalized === normalizedDomain || normalized.endsWith(`.${normalizedDomain}`)) {
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
throw new Error(`URL hostname is not allowed: ${hostname}`);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function assertPathAllowed(url: URL, pathRules?: UrlPathRules): void {
|
|
111
|
+
const pathname = url.pathname;
|
|
112
|
+
|
|
113
|
+
if (pathname.includes('%') && INVALID_PERCENT_ENCODING_RE.test(pathname)) {
|
|
114
|
+
throw new Error('URL path contains invalid percent-encoding');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const variants = collectDecodedPathVariants(pathname);
|
|
118
|
+
|
|
119
|
+
const hasEncodedSeparators = variants.some((variant) => ENCODED_SEPARATOR_RE.test(variant));
|
|
120
|
+
if (hasEncodedSeparators) {
|
|
121
|
+
throw new Error('URL path contains encoded separators');
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const hasUnsafeDotSegments = variants.some((variant) => hasDotSegments(variant));
|
|
125
|
+
if (hasUnsafeDotSegments) {
|
|
126
|
+
throw new Error('URL path contains dot-segments');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (variants.length > 1) {
|
|
130
|
+
const decodedOnce = variants[1] ?? '';
|
|
131
|
+
if (ENCODED_BYTE_RE.test(decodedOnce)) {
|
|
132
|
+
throw new Error('URL path appears to use double-encoding');
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (pathRules?.requiredPrefix) {
|
|
137
|
+
const hasPrefix = variants.some((variant) => matchesRequiredPrefix(variant, pathRules.requiredPrefix ?? ''));
|
|
138
|
+
if (!hasPrefix) {
|
|
139
|
+
throw new Error(`URL path must start with ${pathRules.requiredPrefix}`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function assertQueryAllowed(url: URL, policy: UrlPolicy): void {
|
|
145
|
+
const rules = policy.queryRules;
|
|
146
|
+
if (!rules || rules.allowAny === true) {
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (!rules.allowKeys) {
|
|
151
|
+
if (url.search.length > 0) {
|
|
152
|
+
throw new Error('URL query string is not allowed');
|
|
153
|
+
}
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
for (const key of url.searchParams.keys()) {
|
|
158
|
+
if (!rules.allowKeys.has(key)) {
|
|
159
|
+
throw new Error(`URL query parameter is not allowed: ${key}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function assertPortAllowed(url: URL, policy: UrlPolicy): void {
|
|
165
|
+
if (!policy.requireDefaultPort || url.port.length === 0) {
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const defaultPort = url.protocol === 'https:' ? '443' : url.protocol === 'http:' ? '80' : '';
|
|
170
|
+
if (!defaultPort || url.port !== defaultPort) {
|
|
171
|
+
throw new Error(`URL port is not allowed for protocol ${url.protocol}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export function validateUrlAgainstPolicy(url: URL, policy: UrlPolicy): void {
|
|
176
|
+
if (!policy.allowedProtocols.has(url.protocol as UrlProtocol)) {
|
|
177
|
+
throw new Error(`URL protocol is not allowed: ${url.protocol}`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (!policy.allowCredentials && (url.username.length > 0 || url.password.length > 0)) {
|
|
181
|
+
throw new Error('URL credentials are not allowed');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
assertHostAllowed(url.hostname, policy);
|
|
185
|
+
assertPortAllowed(url, policy);
|
|
186
|
+
assertPathAllowed(url, policy.pathRules);
|
|
187
|
+
assertQueryAllowed(url, policy);
|
|
188
|
+
|
|
189
|
+
policy.customValidator?.(url);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export function parseAndValidateUrl(input: string | URL, policy: UrlPolicy): URL {
|
|
193
|
+
const parsed = input instanceof URL ? new URL(input.toString()) : new URL(input.trim());
|
|
194
|
+
validateUrlAgainstPolicy(parsed, policy);
|
|
195
|
+
return parsed;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function createHfDocsPolicy(): UrlPolicy {
|
|
199
|
+
const hfHosts = new Set(['huggingface.co', 'www.huggingface.co']);
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
allowedProtocols: new Set(['https:']),
|
|
203
|
+
allowedHosts: new Set(['huggingface.co', 'www.huggingface.co', 'gradio.app', 'www.gradio.app']),
|
|
204
|
+
allowCredentials: false,
|
|
205
|
+
queryRules: { allowAny: true },
|
|
206
|
+
customValidator: (url) => {
|
|
207
|
+
const host = normalizeHostname(url.hostname);
|
|
208
|
+
if (hfHosts.has(host) && !matchesRequiredPrefix(url.pathname, '/docs/')) {
|
|
209
|
+
throw new Error('Hugging Face docs URLs must remain under /docs/');
|
|
210
|
+
}
|
|
211
|
+
},
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export function createGradioMcpPolicy(): UrlPolicy {
|
|
216
|
+
return {
|
|
217
|
+
allowedProtocols: new Set(['https:', 'http:']),
|
|
218
|
+
pathRules: {
|
|
219
|
+
requiredPrefix: '/gradio_api/mcp',
|
|
220
|
+
},
|
|
221
|
+
allowCredentials: false,
|
|
222
|
+
queryRules: { allowAny: true },
|
|
223
|
+
customValidator: (url) => {
|
|
224
|
+
const enforceLocalHttpOnly = process.env.NODE_ENV === 'production';
|
|
225
|
+
if (enforceLocalHttpOnly && url.protocol === 'http:' && !isLocalhostHostname(url.hostname)) {
|
|
226
|
+
throw new Error('HTTP is only allowed for localhost Gradio MCP endpoints');
|
|
227
|
+
}
|
|
228
|
+
},
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export function isLocalhostHostname(hostname: string): boolean {
|
|
233
|
+
return LOCALHOST_HOSTS.has(normalizeHostname(hostname));
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export function createExternalHttpsPolicy(): UrlPolicy {
|
|
237
|
+
return {
|
|
238
|
+
allowedProtocols: new Set(['https:']),
|
|
239
|
+
allowCredentials: false,
|
|
240
|
+
queryRules: { allowAny: true },
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
export function createHuggingFaceHubPolicy(): UrlPolicy {
|
|
245
|
+
return {
|
|
246
|
+
allowedProtocols: new Set(['https:']),
|
|
247
|
+
allowedHosts: new Set(['huggingface.co', 'www.huggingface.co', 'hf.co']),
|
|
248
|
+
allowCredentials: false,
|
|
249
|
+
queryRules: { allowAny: true },
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export function createLocalhostHttpPolicy(): UrlPolicy {
|
|
254
|
+
return {
|
|
255
|
+
allowedProtocols: new Set(['https:', 'http:']),
|
|
256
|
+
allowedHosts: new Set(['localhost', '127.0.0.1', '[::1]']),
|
|
257
|
+
allowCredentials: false,
|
|
258
|
+
queryRules: { allowAny: true },
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
export function createExactHostPolicy(hostname: string, allowedProtocol: UrlProtocol): UrlPolicy {
|
|
263
|
+
return {
|
|
264
|
+
allowedProtocols: new Set([allowedProtocol]),
|
|
265
|
+
allowedHosts: new Set([hostname.toLowerCase()]),
|
|
266
|
+
allowCredentials: false,
|
|
267
|
+
queryRules: { allowAny: true },
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export function createHostPrefixPolicy(
|
|
272
|
+
hostname: string,
|
|
273
|
+
requiredPrefix: string,
|
|
274
|
+
allowedProtocol: UrlProtocol = 'https:'
|
|
275
|
+
): UrlPolicy {
|
|
276
|
+
return {
|
|
277
|
+
allowedProtocols: new Set([allowedProtocol]),
|
|
278
|
+
allowedHosts: new Set([hostname.toLowerCase()]),
|
|
279
|
+
pathRules: {
|
|
280
|
+
requiredPrefix,
|
|
281
|
+
},
|
|
282
|
+
allowCredentials: false,
|
|
283
|
+
queryRules: { allowAny: true },
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
export function createGradioMcpHostPolicy(
|
|
288
|
+
hostname: string,
|
|
289
|
+
allowedProtocol: UrlProtocol
|
|
290
|
+
): UrlPolicy {
|
|
291
|
+
return createHostPrefixPolicy(hostname, '/gradio_api/mcp', allowedProtocol);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
export function createGradioSchemaHostPolicy(hostname: string): UrlPolicy {
|
|
295
|
+
return createHostPrefixPolicy(hostname, '/gradio_api/mcp/schema');
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
export function createHttpOrHttpsPolicy(): UrlPolicy {
|
|
299
|
+
return {
|
|
300
|
+
allowedProtocols: new Set(['https:', 'http:']),
|
|
301
|
+
allowCredentials: false,
|
|
302
|
+
queryRules: { allowAny: true },
|
|
303
|
+
};
|
|
304
|
+
}
|
package/src/readme-utils.ts
CHANGED
|
@@ -2,12 +2,14 @@
|
|
|
2
2
|
* Utility functions for fetching and processing README files from Hugging Face repositories
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import { fetchWithProfile, NETWORK_FETCH_PROFILES } from './network/fetch-profile.js';
|
|
6
|
+
|
|
5
7
|
// Maximum number of characters to include from a README
|
|
6
8
|
const DEFAULT_MAX_README_CHARS = 10_000;
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* Fetches README content from a Hugging Face repository
|
|
10
|
-
*
|
|
12
|
+
*
|
|
11
13
|
* @param repoName The resolved repository name (e.g., 'rajpurkar/squad', 'openai-community/gpt2')
|
|
12
14
|
* @param type The repository type ('models' or 'datasets')
|
|
13
15
|
* @param includeYaml Whether to include YAML frontmatter (default: false)
|
|
@@ -20,14 +22,13 @@ export async function fetchReadmeContent(
|
|
|
20
22
|
): Promise<string | null> {
|
|
21
23
|
try {
|
|
22
24
|
// Construct the URL based on repository type
|
|
23
|
-
const baseUrl =
|
|
24
|
-
? `https://huggingface.co/datasets/${repoName}`
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
const baseUrl =
|
|
26
|
+
type === 'datasets' ? `https://huggingface.co/datasets/${repoName}` : `https://huggingface.co/${repoName}`;
|
|
27
|
+
|
|
27
28
|
const url = `${baseUrl}/resolve/main/README.md`;
|
|
28
29
|
|
|
29
|
-
const response = await
|
|
30
|
-
|
|
30
|
+
const { response } = await fetchWithProfile(url, NETWORK_FETCH_PROFILES.hfHub());
|
|
31
|
+
|
|
31
32
|
if (!response.ok) {
|
|
32
33
|
if (response.status === 404) {
|
|
33
34
|
// README doesn't exist, return null silently
|
|
@@ -64,7 +65,7 @@ export async function fetchReadmeContent(
|
|
|
64
65
|
|
|
65
66
|
/**
|
|
66
67
|
* Strips YAML frontmatter from markdown content
|
|
67
|
-
*
|
|
68
|
+
*
|
|
68
69
|
* @param content The full markdown content
|
|
69
70
|
* @returns The content with YAML frontmatter removed
|
|
70
71
|
*/
|
|
@@ -72,12 +73,12 @@ function stripYamlFrontmatter(content: string): string {
|
|
|
72
73
|
// Match YAML frontmatter: starts with ---, ends with ---
|
|
73
74
|
const yamlPattern = /^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))/;
|
|
74
75
|
const match = content.match(yamlPattern);
|
|
75
|
-
|
|
76
|
+
|
|
76
77
|
if (match) {
|
|
77
78
|
// Return everything after the closing ---
|
|
78
79
|
return content.substring(match[0].length);
|
|
79
80
|
}
|
|
80
|
-
|
|
81
|
+
|
|
81
82
|
// No YAML frontmatter found, return original content
|
|
82
83
|
return content;
|
|
83
84
|
}
|