@llmindset/hf-mcp 0.2.28 → 0.2.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/docs-search/doc-fetch.d.ts.map +1 -1
- package/dist/docs-search/doc-fetch.js +24 -13
- package/dist/docs-search/doc-fetch.js.map +1 -1
- package/dist/docs-search/doc-fetch.test.js +60 -17
- package/dist/docs-search/doc-fetch.test.js.map +1 -1
- package/package.json +1 -1
- package/src/docs-search/doc-fetch.test.ts +94 -17
- package/src/docs-search/doc-fetch.ts +52 -36
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-fetch.d.ts","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;CAkBnB,CAAC;AAEX,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC;AAErE,qBAAa,YAAY;IACxB,OAAO,CAAC,eAAe,CAAkB;;
|
|
1
|
+
{"version":3,"file":"doc-fetch.d.ts","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;CAkBnB,CAAC;AAEX,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC;AAErE,qBAAa,YAAY;IACxB,OAAO,CAAC,eAAe,CAAkB;;IA6FzC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAuB1B,KAAK,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAqCpD,OAAO,CAAC,aAAa;CAoCrB;AAMD,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAoBrD"}
|
|
@@ -80,7 +80,8 @@ export class DocFetchTool {
|
|
|
80
80
|
return false;
|
|
81
81
|
const text = (n.textContent || '').trim();
|
|
82
82
|
const children = n.childNodes || [];
|
|
83
|
-
const onlyIcons = children.length > 0 &&
|
|
83
|
+
const onlyIcons = children.length > 0 &&
|
|
84
|
+
children.every((c) => (c.nodeName || '').toLowerCase() === 'img' || (c.nodeName || '').toLowerCase() === 'svg');
|
|
84
85
|
const looksLikeEncodedSvg = /data:image\/svg\+xml|%3csvg|svg%2bxml/i.test(text);
|
|
85
86
|
const noAlnumText = text.length <= 3 && !/[a-z0-9]/i.test(text);
|
|
86
87
|
return onlyIcons || looksLikeEncodedSvg || noAlnumText;
|
|
@@ -89,7 +90,7 @@ export class DocFetchTool {
|
|
|
89
90
|
return false;
|
|
90
91
|
}
|
|
91
92
|
},
|
|
92
|
-
replacement: () => ''
|
|
93
|
+
replacement: () => '',
|
|
93
94
|
});
|
|
94
95
|
}
|
|
95
96
|
validateUrl(hfUrl) {
|
|
@@ -113,18 +114,21 @@ export class DocFetchTool {
|
|
|
113
114
|
try {
|
|
114
115
|
const normalizedUrl = normalizeDocUrl(params.doc_url);
|
|
115
116
|
this.validateUrl(normalizedUrl);
|
|
116
|
-
const response = await fetch(normalizedUrl);
|
|
117
|
+
const response = await fetch(normalizedUrl, { headers: { accept: 'text/markdown' } });
|
|
117
118
|
if (!response.ok) {
|
|
118
119
|
throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`);
|
|
119
120
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
.
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
121
|
+
let content = await response.text();
|
|
122
|
+
const contentType = response.headers.get('content-type') || '';
|
|
123
|
+
const isPlainOrMarkdown = contentType.includes('text/plain') || contentType.includes('text/markdown');
|
|
124
|
+
if (!isPlainOrMarkdown) {
|
|
125
|
+
content = this.turndownService.turndown(content);
|
|
126
|
+
content = content
|
|
127
|
+
.replace(/!\[[^\]]*\]\(\s*(?:data:image\/svg\+xml[^)]*|[^)]*\.svg(?:\?[^)]*)?)\s*\)/gi, '')
|
|
128
|
+
.replace(/\[\s*\]\(\s*[^)]*\s*\)/g, '');
|
|
129
|
+
content = content.replace(/\[[^\]]*(?:data:image\/svg\+xml|%3csvg|svg%2bxml)[^\]]*\]\([^)]*\)/gi, '');
|
|
130
|
+
}
|
|
131
|
+
return this.applyChunking(content, params.offset || 0);
|
|
128
132
|
}
|
|
129
133
|
catch (error) {
|
|
130
134
|
throw new Error(`Failed to fetch document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
@@ -156,13 +160,20 @@ export class DocFetchTool {
|
|
|
156
160
|
}
|
|
157
161
|
export function normalizeDocUrl(input) {
|
|
158
162
|
try {
|
|
159
|
-
const
|
|
163
|
+
const trimmed = input.trim();
|
|
164
|
+
if (trimmed.startsWith('/docs')) {
|
|
165
|
+
return `https://huggingface.co${trimmed}`;
|
|
166
|
+
}
|
|
167
|
+
if (trimmed.startsWith('./docs')) {
|
|
168
|
+
return `https://huggingface.co/${trimmed.slice(2)}`;
|
|
169
|
+
}
|
|
170
|
+
const url = new URL(trimmed);
|
|
160
171
|
const host = url.hostname.toLowerCase();
|
|
161
172
|
if (host === 'gradio.app') {
|
|
162
173
|
url.hostname = 'www.gradio.app';
|
|
163
174
|
return url.toString();
|
|
164
175
|
}
|
|
165
|
-
return
|
|
176
|
+
return trimmed;
|
|
166
177
|
}
|
|
167
178
|
catch {
|
|
168
179
|
return input;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-fetch.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC/B,IAAI,EAAE,cAAc;IACpB,WAAW,EACV,mIAAmI;IACpI,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC;QAChB,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC,QAAQ,CAAC,4CAA4C,CAAC;QACrG,MAAM,EAAE,CAAC;aACP,MAAM,EAAE;aACR,GAAG,CAAC,CAAC,CAAC;aACN,QAAQ,EAAE;aACV,QAAQ,CAAC,2EAA2E,CAAC;KACvF,CAAC;IACF,WAAW,EAAE;QACZ,KAAK,EAAE,8DAA8D;QACrE,eAAe,EAAE,KAAK;QACtB,YAAY,EAAE,IAAI;QAClB,aAAa,EAAE,IAAI;KACnB;CACQ,CAAC;AAIX,MAAM,OAAO,YAAY;IAChB,eAAe,CAAkB;IAEzC;QACC,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC;YAC1C,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,QAAQ;SACxB,CAAC,CAAC;QACH,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACpC,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAGtC,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YACpC,IAAI,CAAC;gBACJ,MAAM,GAAG,GAAG,CAAE,IAAyC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACtF,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzG,OAAO,IAAI,CAAC;gBACb,CAAC;YACF,CAAC;YAAC,MAAM,CAAC;YAET,CAAC;YACD,OAAO,KAAK,CAAC;QACd,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAEpC,IAAI,CAAC;gBACJ,IAAI,OAAQ,IAAyC,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;oBAC7E,MAAM,GAAG,GAAG,CAAE,IAAwC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;oBACrF,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;wBACnB,OAAO,IAAI,CAAC;oBACb,CAAC;gBACF,CAAC;YACF,CAAC;YAAC,MAAM,CAAC;YAET,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,KAAK,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,yBAAyB,CAAC,EAAE,CAAC;gBACjF,OAAO,IAAI,CAAC;YACb,CAAC;YAED,MAAM,QAAQ,GAAG,CAAE,IAAyC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YAC3F,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;gBACxB,IAAI,CAAC;oBACJ,MAAM,GAAG,
|
|
1
|
+
{"version":3,"file":"doc-fetch.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC/B,IAAI,EAAE,cAAc;IACpB,WAAW,EACV,mIAAmI;IACpI,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC;QAChB,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC,QAAQ,CAAC,4CAA4C,CAAC;QACrG,MAAM,EAAE,CAAC;aACP,MAAM,EAAE;aACR,GAAG,CAAC,CAAC,CAAC;aACN,QAAQ,EAAE;aACV,QAAQ,CAAC,2EAA2E,CAAC;KACvF,CAAC;IACF,WAAW,EAAE;QACZ,KAAK,EAAE,8DAA8D;QACrE,eAAe,EAAE,KAAK;QACtB,YAAY,EAAE,IAAI;QAClB,aAAa,EAAE,IAAI;KACnB;CACQ,CAAC;AAIX,MAAM,OAAO,YAAY;IAChB,eAAe,CAAkB;IAEzC;QACC,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC;YAC1C,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,QAAQ;SACxB,CAAC,CAAC;QACH,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACpC,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAGtC,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YACpC,IAAI,CAAC;gBACJ,MAAM,GAAG,GAAG,CAAE,IAAyC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACtF,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzG,OAAO,IAAI,CAAC;gBACb,CAAC;YACF,CAAC;YAAC,MAAM,CAAC;YAET,CAAC;YACD,OAAO,KAAK,CAAC;QACd,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAEpC,IAAI,CAAC;gBACJ,IAAI,OAAQ,IAAyC,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;oBAC7E,MAAM,GAAG,GAAG,CAAE,IAAwC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;oBACrF,IAAI,GAAG,KAAK,KAAK,EAAE,CAAC;wBACnB,OAAO,IAAI,CAAC;oBACb,CAAC;gBACF,CAAC;YACF,CAAC;YAAC,MAAM,CAAC;YAET,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,KAAK,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,yBAAyB,CAAC,EAAE,CAAC;gBACjF,OAAO,IAAI,CAAC;YACb,CAAC;YAED,MAAM,QAAQ,GAAG,CAAE,IAAyC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YAC3F,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;gBACxB,IAAI,CAAC;oBACJ,MAAM,GAAG,GACP,IAAsE,CAAC,YAAY,EAAE,CAAC,KAAK,CAAC;wBAC7F,CAAE,IAAoC,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC;oBACnD,IACC,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC;wBACxB,4BAA4B,CAAC,IAAI,CAAC,GAAG,CAAC;wBACtC,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,EAC1C,CAAC;wBACF,OAAO,IAAI,CAAC;oBACb,CAAC;gBACF,CAAC;gBAAC,MAAM,CAAC;gBAET,CAAC;YACF,CAAC;YACD,OAAO,KAAK,CAAC;QACd,CAAC,CAAC,CAAC;QAGH,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,oBAAoB,EAAE;YAClD,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;gBAChB,IAAI,CAAC;oBACJ,MAAM,CAAC,GAAG,IAKT,CAAC;oBACF,IAAI,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,KAAK,GAAG;wBAAE,OAAO,KAAK,CAAC;oBAC3D,MAAM,IAAI,GAAG,CAAC,CAAC,YAAY,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;oBAC5C,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;wBAAE,OAAO,KAAK,CAAC;oBACjD,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC1C,MAAM,QAAQ,GAAI,CAA8D,CAAC,UAAU,IAAI,EAAE,CAAC;oBAClG,MAAM,SAAS,GACd,QAAQ,CAAC,MAAM,GAAG,CAAC;wBACnB,QAAQ,CAAC,KAAK,CACb,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,KAAK,KAAK,CAC/F,CAAC;oBACH,MAAM,mBAAmB,GAAG,wCAAwC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAChF,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAChE,OAAO,SAAS,IAAI,mBAAmB,IAAI,WAAW,CAAC;gBACxD,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,KAAK,CAAC;gBACd,CAAC;YACF,CAAC;YACD,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;SACrB,CAAC,CAAC;IACJ,CAAC;IAKD,WAAW,CAAC,KAAa;QACxB,IAAI,CAAC;YACJ,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;YAC3B,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBAC/B,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;YAC3D,CAAC;YAED,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC5C,MAAM,QAAQ,GACb,CAAC,QAAQ,KAAK,gBAAgB,IAAI,QAAQ,KAAK,oBAAoB,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YAC3G,MAAM,QAAQ,GAAG,QAAQ,KAAK,YAAY,IAAI,QAAQ,KAAK,gBAAgB,CAAC;YAE5E,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;YAC3D,CAAC;QACF,CAAC;QAAC,MAAM,CAAC;YACR,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;QAC3D,CAAC;IACF,CAAC;IAKD,KAAK,CAAC,KAAK,CAAC,MAAsB;QACjC,IAAI,CAAC;YACJ,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACtD,IAAI,CAAC,WAAW,CAAC,aAAa,CAAC,CAAC;YAEhC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,EAAE,CAAC,CAAC;YACtF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YACxF,CAAC;YACD,IAAI,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;YAC/D,MAAM,iBAAiB,GAAG,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;YACtG,IAAI,CAAC,iBAAiB,EAAE,CAAC;gBAExB,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBAKjD,OAAO,GAAG,OAAO;qBACf,OAAO,CAAC,6EAA6E,EAAE,EAAE,CAAC;qBAC1F,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC;gBAGzC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,sEAAsE,EAAE,EAAE,CAAC,CAAC;YACvG,CAAC;YAGD,OAAO,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;QACxD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAC1G,CAAC;IACF,CAAC;IAKO,aAAa,CAAC,eAAuB,EAAE,MAAc;QAC5D,MAAM,WAAW,GAAG,cAAc,CAAC,eAAe,CAAC,CAAC;QACpD,MAAM,iBAAiB,GAAG,IAAI,CAAC;QAG/B,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC;QAC1C,MAAM,aAAa,GAAG,UAAU,GAAG,WAAW,CAAC;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC;QAGrD,IAAI,SAAS,IAAI,UAAU,EAAE,CAAC;YAC7B,OAAO,iBAAiB,MAAM,mCAAmC,WAAW,iBAAiB,CAAC;QAC/F,CAAC;QAGD,IAAI,WAAW,IAAI,iBAAiB,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACtD,OAAO,eAAe,CAAC;QACxB,CAAC;QAED,MAAM,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,iBAAiB,GAAG,aAAa,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,gBAAgB,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAGxD,MAAM,UAAU,GAAG,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,UAAU,GAAG,WAAW,CAAC;QAEzC,IAAI,MAAM,GAAG,KAAK,CAAC;QAGnB,IAAI,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,oCAAoC,gBAAgB,CAAC,IAAI,sBAAsB,UAAU,yBAAyB,CAAC;QAC9H,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;CACD;AAMD,MAAM,UAAU,eAAe,CAAC,KAAa;IAC5C,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,OAAO,yBAAyB,OAAO,EAAE,CAAC;QAC3C,CAAC;QACD,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClC,OAAO,0BAA0B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;QACrD,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;QAC7B,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QACxC,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;YAC3B,GAAG,CAAC,QAAQ,GAAG,gBAAgB,CAAC;YAChC,OAAO,GAAG,CAAC,QAAQ,EAAE,CAAC;QACvB,CAAC;QACD,OAAO,OAAO,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC"}
|
|
@@ -1,7 +1,21 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from 'vitest';
|
|
1
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
2
2
|
import { DocFetchTool, normalizeDocUrl } from './doc-fetch.js';
|
|
3
|
+
const createMockResponse = ({ content, contentType = 'text/html', status = 200, statusText = 'OK', }) => new Response(content, {
|
|
4
|
+
status,
|
|
5
|
+
statusText,
|
|
6
|
+
headers: { 'content-type': contentType },
|
|
7
|
+
});
|
|
8
|
+
const stubFetch = (factory) => {
|
|
9
|
+
const fetchMock = vi.fn().mockImplementation(() => Promise.resolve(factory()));
|
|
10
|
+
vi.stubGlobal('fetch', fetchMock);
|
|
11
|
+
return fetchMock;
|
|
12
|
+
};
|
|
3
13
|
describe('DocFetchTool', () => {
|
|
4
14
|
const tool = new DocFetchTool();
|
|
15
|
+
afterEach(() => {
|
|
16
|
+
vi.clearAllMocks();
|
|
17
|
+
vi.unstubAllGlobals();
|
|
18
|
+
});
|
|
5
19
|
describe('URL validation', () => {
|
|
6
20
|
it('should accept valid HF and Gradio docs URLs', () => {
|
|
7
21
|
const validUrls = [
|
|
@@ -32,11 +46,22 @@ describe('DocFetchTool', () => {
|
|
|
32
46
|
});
|
|
33
47
|
});
|
|
34
48
|
describe('document chunking', () => {
|
|
35
|
-
it('
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
49
|
+
it('uses markdown content from host when available', async () => {
|
|
50
|
+
const markdown = '# Heading\nBody content';
|
|
51
|
+
const fetchMock = stubFetch(() => createMockResponse({
|
|
52
|
+
content: markdown,
|
|
53
|
+
contentType: 'text/markdown',
|
|
54
|
+
}));
|
|
55
|
+
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
56
|
+
expect(fetchMock).toHaveBeenCalledWith('https://huggingface.co/docs/test', {
|
|
57
|
+
headers: { accept: 'text/markdown' },
|
|
39
58
|
});
|
|
59
|
+
expect(result).toBe(markdown);
|
|
60
|
+
});
|
|
61
|
+
it('should return small documents without chunking', async () => {
|
|
62
|
+
stubFetch(() => createMockResponse({
|
|
63
|
+
content: '<h1>Short Document</h1><p>This is a short document.</p>',
|
|
64
|
+
}));
|
|
40
65
|
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
41
66
|
expect(result).toContain('# Short Document');
|
|
42
67
|
expect(result).toContain('This is a short document');
|
|
@@ -44,10 +69,9 @@ describe('DocFetchTool', () => {
|
|
|
44
69
|
});
|
|
45
70
|
it('should chunk large documents and show truncation message', async () => {
|
|
46
71
|
const longHtml = '<h1>Long Document</h1>' + '<p>This is a very long sentence that will be repeated many times to create a document that exceeds the 7500 token limit for testing chunking functionality.</p>'.repeat(200);
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
});
|
|
72
|
+
stubFetch(() => createMockResponse({
|
|
73
|
+
content: longHtml,
|
|
74
|
+
}));
|
|
51
75
|
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
52
76
|
expect(result).toContain('# Long Document');
|
|
53
77
|
expect(result).toContain('DOCUMENT TRUNCATED');
|
|
@@ -58,18 +82,38 @@ describe('DocFetchTool', () => {
|
|
|
58
82
|
{ in: 'https://gradio.app/guides/x', out: 'https://www.gradio.app/guides/x' },
|
|
59
83
|
{ in: 'https://www.gradio.app/guides/x', out: 'https://www.gradio.app/guides/x' },
|
|
60
84
|
{ in: 'https://huggingface.co/docs/transformers', out: 'https://huggingface.co/docs/transformers' },
|
|
85
|
+
{ in: '/docs/diffusers/index', out: 'https://huggingface.co/docs/diffusers/index' },
|
|
86
|
+
{ in: './docs/diffusers/index', out: 'https://huggingface.co/docs/diffusers/index' },
|
|
61
87
|
{ in: 'not a url', out: 'not a url' },
|
|
62
88
|
];
|
|
63
89
|
for (const c of cases) {
|
|
64
90
|
expect(normalizeDocUrl(c.in)).toBe(c.out);
|
|
65
91
|
}
|
|
66
92
|
});
|
|
93
|
+
it('normalizes relative doc paths to the huggingface docs host', async () => {
|
|
94
|
+
const fetchMock = stubFetch(() => createMockResponse({
|
|
95
|
+
content: '<h1>Title</h1><p>Body</p>',
|
|
96
|
+
}));
|
|
97
|
+
const result = await tool.fetch({ doc_url: '/docs/test' });
|
|
98
|
+
expect(fetchMock).toHaveBeenCalledWith('https://huggingface.co/docs/test', {
|
|
99
|
+
headers: { accept: 'text/markdown' },
|
|
100
|
+
});
|
|
101
|
+
expect(result).toContain('# Title');
|
|
102
|
+
});
|
|
103
|
+
it('normalizes ./docs paths to the huggingface docs host', async () => {
|
|
104
|
+
const fetchMock = stubFetch(() => createMockResponse({
|
|
105
|
+
content: '<h1>Another Title</h1><p>Body</p>',
|
|
106
|
+
}));
|
|
107
|
+
await tool.fetch({ doc_url: './docs/another' });
|
|
108
|
+
expect(fetchMock).toHaveBeenCalledWith('https://huggingface.co/docs/another', {
|
|
109
|
+
headers: { accept: 'text/markdown' },
|
|
110
|
+
});
|
|
111
|
+
});
|
|
67
112
|
it('should return subsequent chunks with offset', async () => {
|
|
68
113
|
const longHtml = '<h1>Long Document</h1>' + '<p>This is a very long sentence that will be repeated many times to create a document that exceeds the 7500 token limit for testing chunking functionality.</p>'.repeat(200);
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
});
|
|
114
|
+
stubFetch(() => createMockResponse({
|
|
115
|
+
content: longHtml,
|
|
116
|
+
}));
|
|
73
117
|
const firstChunk = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
74
118
|
const offsetMatch = firstChunk.match(/OFFSET OF (\d+)/);
|
|
75
119
|
expect(offsetMatch).toBeTruthy();
|
|
@@ -79,10 +123,9 @@ describe('DocFetchTool', () => {
|
|
|
79
123
|
expect(secondChunk.length).toBeGreaterThan(0);
|
|
80
124
|
});
|
|
81
125
|
it('should handle offset beyond document length', async () => {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
});
|
|
126
|
+
stubFetch(() => createMockResponse({
|
|
127
|
+
content: '<h1>Short Document</h1><p>This is short.</p>',
|
|
128
|
+
}));
|
|
86
129
|
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test', offset: 10000 });
|
|
87
130
|
expect(result).toContain('Error: Offset 10000 is beyond');
|
|
88
131
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-fetch.test.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"doc-fetch.test.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAC7D,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAE/D,MAAM,kBAAkB,GAAG,CAAC,EAC3B,OAAO,EACP,WAAW,GAAG,WAAW,EACzB,MAAM,GAAG,GAAG,EACZ,UAAU,GAAG,IAAI,GAMjB,EAAE,EAAE,CACJ,IAAI,QAAQ,CAAC,OAAO,EAAE;IACrB,MAAM;IACN,UAAU;IACV,OAAO,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE;CACxC,CAAC,CAAC;AAEJ,MAAM,SAAS,GAAG,CAAC,OAAuB,EAAE,EAAE;IAC7C,MAAM,SAAS,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC/E,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAClC,OAAO,SAAS,CAAC;AAClB,CAAC,CAAC;AAEF,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC1B,MAAM,IAAI,GAAG,IAAI,YAAY,EAAE,CAAC;IAEnC,SAAS,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,EAAE,CAAC,gBAAgB,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC/B,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACtD,MAAM,SAAS,GAAG;gBACjB,kDAAkD;gBAClD,0EAA0E;gBAC1E,yDAAyD;gBACzD,sEAAsE;gBACtE,yCAAyC;gBACzC,0CAA0C;gBAC1C,oBAAoB;gBACpB,+BAA+B;aAC/B,CAAC;YAEF,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;YACnD,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8DAA8D,EAAE,GAAG,EAAE;YACvE,MAAM,WAAW,GAAG;gBACnB,oCAAoC;gBACpC,6CAA6C;gBAC7C,yCAAyC;gBACzC,kCAAkC;gBAClC,iDAAiD;aACjD,CAAC;YAEF,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;gBAC/B,MAAM,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,wCAAwC,CAAC,CAAC;YACvF,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC/D,MAAM,QAAQ,GAAG,yBAAyB,CAAC;YAC3C,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,EAAE,CAChC,kBAAkB,CAAC;gBAClB,OAAO,EAAE,QAAQ;gBACjB,WAAW,EAAE,eAAe;aAC5B,CAAC,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YACjF,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAAC,kCAAkC,EAAE;gBAC1E,OAAO,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE;aACpC,CAAC,CAAC;YACH,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAG/D,SAAS,CAAC,GAAG,EAAE,CACd,kBAAkB,CAAC;gBAClB,OAAO,EAAE,yDAAyD;aAClE,CAAC,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YAEjF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,0BAA0B,CAAC,CAAC;YACrD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YAEzE,MAAM,QAAQ,GAAG,wBAAwB,GAAG,iKAAiK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAE1N,SAAS,CAAC,GAAG,EAAE,CACd,kBAAkB,CAAC;gBAClB,OAAO,EAAE,QAAQ;aACjB,CAAC,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YAEjF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;YAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;YAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,qCAAqC,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;YAClE,MAAM,KAAK,GAAuC;gBACjD,EAAE,EAAE,EAAE,6BAA6B,EAAE,GAAG,EAAE,iCAAiC,EAAE;gBAC7E,EAAE,EAAE,EAAE,iCAAiC,EAAE,GAAG,EAAE,iCAAiC,EAAE;gBACjF,EAAE,EAAE,EAAE,0CAA0C,EAAE,GAAG,EAAE,0CAA0C,EAAE;gBACnG,EAAE,EAAE,EAAE,uBAAuB,EAAE,GAAG,EAAE,6CAA6C,EAAE;gBACnF,EAAE,EAAE,EAAE,wBAAwB,EAAE,GAAG,EAAE,6CAA6C,EAAE;gBACpF,EAAE,EAAE,EAAE,WAAW,EAAE,GAAG,EAAE,WAAW,EAAE;aACrC,CAAC;YACF,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACvB,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC3C,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;YAC3E,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,EAAE,CAChC,kBAAkB,CAAC;gBAClB,OAAO,EAAE,2BAA2B;aACpC,CAAC,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;YAC3D,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAAC,kCAAkC,EAAE;gBAC1E,OAAO,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE;aACpC,CAAC,CAAC;YACH,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;YACrE,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,EAAE,CAChC,kBAAkB,CAAC;gBAClB,OAAO,EAAE,mCAAmC;aAC5C,CAAC,CACF,CAAC;YAEF,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC,CAAC;YAChD,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CAAC,qCAAqC,EAAE;gBAC7E,OAAO,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE;aACpC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAE5D,MAAM,QAAQ,GAAG,wBAAwB,GAAG,iKAAiK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAE1N,SAAS,CAAC,GAAG,EAAE,CACd,kBAAkB,CAAC;gBAClB,OAAO,EAAE,QAAQ;aACjB,CAAC,CACF,CAAC;YAGF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YAGrF,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACxD,MAAM,CAAC,WAAW,CAAC,CAAC,UAAU,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YAGrD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,MAAM,EAAE,CAAC,CAAC;YAE9F,MAAM,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAC5C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC5D,SAAS,CAAC,GAAG,EAAE,CACd,kBAAkB,CAAC;gBAClB,OAAO,EAAE,8CAA8C;aACvD,CAAC,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;YAEhG,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,+BAA+B,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,37 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from 'vitest';
|
|
1
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
2
2
|
import { DocFetchTool, normalizeDocUrl } from './doc-fetch.js';
|
|
3
3
|
|
|
4
|
+
const createMockResponse = ({
|
|
5
|
+
content,
|
|
6
|
+
contentType = 'text/html',
|
|
7
|
+
status = 200,
|
|
8
|
+
statusText = 'OK',
|
|
9
|
+
}: {
|
|
10
|
+
content: string;
|
|
11
|
+
contentType?: string;
|
|
12
|
+
status?: number;
|
|
13
|
+
statusText?: string;
|
|
14
|
+
}) =>
|
|
15
|
+
new Response(content, {
|
|
16
|
+
status,
|
|
17
|
+
statusText,
|
|
18
|
+
headers: { 'content-type': contentType },
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const stubFetch = (factory: () => Response) => {
|
|
22
|
+
const fetchMock = vi.fn().mockImplementation(() => Promise.resolve(factory()));
|
|
23
|
+
vi.stubGlobal('fetch', fetchMock);
|
|
24
|
+
return fetchMock;
|
|
25
|
+
};
|
|
26
|
+
|
|
4
27
|
describe('DocFetchTool', () => {
|
|
5
28
|
const tool = new DocFetchTool();
|
|
6
29
|
|
|
30
|
+
afterEach(() => {
|
|
31
|
+
vi.clearAllMocks();
|
|
32
|
+
vi.unstubAllGlobals();
|
|
33
|
+
});
|
|
34
|
+
|
|
7
35
|
describe('URL validation', () => {
|
|
8
36
|
it('should accept valid HF and Gradio docs URLs', () => {
|
|
9
37
|
const validUrls = [
|
|
@@ -38,13 +66,30 @@ describe('DocFetchTool', () => {
|
|
|
38
66
|
});
|
|
39
67
|
|
|
40
68
|
describe('document chunking', () => {
|
|
69
|
+
it('uses markdown content from host when available', async () => {
|
|
70
|
+
const markdown = '# Heading\nBody content';
|
|
71
|
+
const fetchMock = stubFetch(() =>
|
|
72
|
+
createMockResponse({
|
|
73
|
+
content: markdown,
|
|
74
|
+
contentType: 'text/markdown',
|
|
75
|
+
}),
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
79
|
+
expect(fetchMock).toHaveBeenCalledWith('https://huggingface.co/docs/test', {
|
|
80
|
+
headers: { accept: 'text/markdown' },
|
|
81
|
+
});
|
|
82
|
+
expect(result).toBe(markdown);
|
|
83
|
+
});
|
|
84
|
+
|
|
41
85
|
it('should return small documents without chunking', async () => {
|
|
42
86
|
|
|
43
87
|
// Mock fetch to return HTML that converts to short markdown
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
88
|
+
stubFetch(() =>
|
|
89
|
+
createMockResponse({
|
|
90
|
+
content: '<h1>Short Document</h1><p>This is a short document.</p>',
|
|
91
|
+
}),
|
|
92
|
+
);
|
|
48
93
|
|
|
49
94
|
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
50
95
|
|
|
@@ -57,10 +102,11 @@ describe('DocFetchTool', () => {
|
|
|
57
102
|
// Mock fetch to return HTML that converts to long markdown
|
|
58
103
|
const longHtml = '<h1>Long Document</h1>' + '<p>This is a very long sentence that will be repeated many times to create a document that exceeds the 7500 token limit for testing chunking functionality.</p>'.repeat(200);
|
|
59
104
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
105
|
+
stubFetch(() =>
|
|
106
|
+
createMockResponse({
|
|
107
|
+
content: longHtml,
|
|
108
|
+
}),
|
|
109
|
+
);
|
|
64
110
|
|
|
65
111
|
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
66
112
|
|
|
@@ -74,6 +120,8 @@ describe('DocFetchTool', () => {
|
|
|
74
120
|
{ in: 'https://gradio.app/guides/x', out: 'https://www.gradio.app/guides/x' },
|
|
75
121
|
{ in: 'https://www.gradio.app/guides/x', out: 'https://www.gradio.app/guides/x' },
|
|
76
122
|
{ in: 'https://huggingface.co/docs/transformers', out: 'https://huggingface.co/docs/transformers' },
|
|
123
|
+
{ in: '/docs/diffusers/index', out: 'https://huggingface.co/docs/diffusers/index' },
|
|
124
|
+
{ in: './docs/diffusers/index', out: 'https://huggingface.co/docs/diffusers/index' },
|
|
77
125
|
{ in: 'not a url', out: 'not a url' },
|
|
78
126
|
];
|
|
79
127
|
for (const c of cases) {
|
|
@@ -81,14 +129,42 @@ describe('DocFetchTool', () => {
|
|
|
81
129
|
}
|
|
82
130
|
});
|
|
83
131
|
|
|
132
|
+
it('normalizes relative doc paths to the huggingface docs host', async () => {
|
|
133
|
+
const fetchMock = stubFetch(() =>
|
|
134
|
+
createMockResponse({
|
|
135
|
+
content: '<h1>Title</h1><p>Body</p>',
|
|
136
|
+
}),
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
const result = await tool.fetch({ doc_url: '/docs/test' });
|
|
140
|
+
expect(fetchMock).toHaveBeenCalledWith('https://huggingface.co/docs/test', {
|
|
141
|
+
headers: { accept: 'text/markdown' },
|
|
142
|
+
});
|
|
143
|
+
expect(result).toContain('# Title');
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it('normalizes ./docs paths to the huggingface docs host', async () => {
|
|
147
|
+
const fetchMock = stubFetch(() =>
|
|
148
|
+
createMockResponse({
|
|
149
|
+
content: '<h1>Another Title</h1><p>Body</p>',
|
|
150
|
+
}),
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
await tool.fetch({ doc_url: './docs/another' });
|
|
154
|
+
expect(fetchMock).toHaveBeenCalledWith('https://huggingface.co/docs/another', {
|
|
155
|
+
headers: { accept: 'text/markdown' },
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
84
159
|
it('should return subsequent chunks with offset', async () => {
|
|
85
160
|
// Mock fetch to return the same long HTML
|
|
86
161
|
const longHtml = '<h1>Long Document</h1>' + '<p>This is a very long sentence that will be repeated many times to create a document that exceeds the 7500 token limit for testing chunking functionality.</p>'.repeat(200);
|
|
87
162
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
163
|
+
stubFetch(() =>
|
|
164
|
+
createMockResponse({
|
|
165
|
+
content: longHtml,
|
|
166
|
+
}),
|
|
167
|
+
);
|
|
92
168
|
|
|
93
169
|
// Get first chunk
|
|
94
170
|
const firstChunk = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
@@ -106,10 +182,11 @@ describe('DocFetchTool', () => {
|
|
|
106
182
|
});
|
|
107
183
|
|
|
108
184
|
it('should handle offset beyond document length', async () => {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
185
|
+
stubFetch(() =>
|
|
186
|
+
createMockResponse({
|
|
187
|
+
content: '<h1>Short Document</h1><p>This is short.</p>',
|
|
188
|
+
}),
|
|
189
|
+
);
|
|
113
190
|
|
|
114
191
|
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test', offset: 10000 });
|
|
115
192
|
|
|
@@ -67,7 +67,8 @@ export class DocFetchTool {
|
|
|
67
67
|
const nodeName = ((node as unknown as { nodeName?: string }).nodeName || '').toLowerCase();
|
|
68
68
|
if (nodeName === 'img') {
|
|
69
69
|
try {
|
|
70
|
-
const src =
|
|
70
|
+
const src =
|
|
71
|
+
(node as unknown as { getAttribute?: (name: string) => string | null }).getAttribute?.('src') ??
|
|
71
72
|
((node as unknown as { src?: string }).src || '');
|
|
72
73
|
if (
|
|
73
74
|
/\.svg(\?|$)/i.test(src) ||
|
|
@@ -87,13 +88,22 @@ export class DocFetchTool {
|
|
|
87
88
|
this.turndownService.addRule('dropHeadingAnchors', {
|
|
88
89
|
filter: (node) => {
|
|
89
90
|
try {
|
|
90
|
-
const n = node as unknown as {
|
|
91
|
+
const n = node as unknown as {
|
|
92
|
+
nodeName?: string;
|
|
93
|
+
getAttribute?: (k: string) => string | null;
|
|
94
|
+
textContent?: string;
|
|
95
|
+
childNodes?: Array<{ nodeName?: string }>;
|
|
96
|
+
};
|
|
91
97
|
if ((n.nodeName || '').toLowerCase() !== 'a') return false;
|
|
92
98
|
const href = n.getAttribute?.('href') || '';
|
|
93
99
|
if (!href || !href.startsWith('#')) return false;
|
|
94
100
|
const text = (n.textContent || '').trim();
|
|
95
101
|
const children = (n as unknown as { childNodes?: Array<{ nodeName?: string }> }).childNodes || [];
|
|
96
|
-
const onlyIcons =
|
|
102
|
+
const onlyIcons =
|
|
103
|
+
children.length > 0 &&
|
|
104
|
+
children.every(
|
|
105
|
+
(c) => (c.nodeName || '').toLowerCase() === 'img' || (c.nodeName || '').toLowerCase() === 'svg'
|
|
106
|
+
);
|
|
97
107
|
const looksLikeEncodedSvg = /data:image\/svg\+xml|%3csvg|svg%2bxml/i.test(text);
|
|
98
108
|
const noAlnumText = text.length <= 3 && !/[a-z0-9]/i.test(text);
|
|
99
109
|
return onlyIcons || looksLikeEncodedSvg || noAlnumText;
|
|
@@ -101,7 +111,7 @@ export class DocFetchTool {
|
|
|
101
111
|
return false;
|
|
102
112
|
}
|
|
103
113
|
},
|
|
104
|
-
replacement: () => ''
|
|
114
|
+
replacement: () => '',
|
|
105
115
|
});
|
|
106
116
|
}
|
|
107
117
|
|
|
@@ -136,37 +146,35 @@ export class DocFetchTool {
|
|
|
136
146
|
const normalizedUrl = normalizeDocUrl(params.doc_url);
|
|
137
147
|
this.validateUrl(normalizedUrl);
|
|
138
148
|
|
|
139
|
-
const response = await fetch(normalizedUrl);
|
|
140
|
-
|
|
149
|
+
const response = await fetch(normalizedUrl, { headers: { accept: 'text/markdown' } });
|
|
141
150
|
if (!response.ok) {
|
|
142
151
|
throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`);
|
|
143
152
|
}
|
|
144
|
-
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
.replace(/\[[^\]]*(?:data:image\/svg\+xml|%3csvg|svg%2bxml)[^\]]*\]\([^)]*\)/gi, '');
|
|
153
|
+
let content = await response.text();
|
|
154
|
+
const contentType = response.headers.get('content-type') || '';
|
|
155
|
+
const isPlainOrMarkdown = contentType.includes('text/plain') || contentType.includes('text/markdown');
|
|
156
|
+
if (!isPlainOrMarkdown) {
|
|
157
|
+
// attempt conversion to markdown
|
|
158
|
+
content = this.turndownService.turndown(content);
|
|
159
|
+
|
|
160
|
+
// Post-process: strip any leftover SVG images that slipped past DOM filters
|
|
161
|
+
// - Markdown images pointing to data:image/svg+xml or *.svg
|
|
162
|
+
// - Empty links left behind after image removal: [](...)
|
|
163
|
+
content = content
|
|
164
|
+
.replace(/!\[[^\]]*\]\(\s*(?:data:image\/svg\+xml[^)]*|[^)]*\.svg(?:\?[^)]*)?)\s*\)/gi, '')
|
|
165
|
+
.replace(/\[\s*\]\(\s*[^)]*\s*\)/g, '');
|
|
166
|
+
|
|
167
|
+
// Remove anchors whose link text still contains encoded SVG payloads (edge cases)
|
|
168
|
+
content = content.replace(/\[[^\]]*(?:data:image\/svg\+xml|%3csvg|svg%2bxml)[^\]]*\]\([^)]*\)/gi, '');
|
|
169
|
+
}
|
|
160
170
|
|
|
161
171
|
// Apply chunking logic
|
|
162
|
-
return this.applyChunking(
|
|
172
|
+
return this.applyChunking(content, params.offset || 0);
|
|
163
173
|
} catch (error) {
|
|
164
174
|
throw new Error(`Failed to fetch document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
165
175
|
}
|
|
166
176
|
}
|
|
167
177
|
|
|
168
|
-
|
|
169
|
-
|
|
170
178
|
/**
|
|
171
179
|
* Apply chunking logic to markdown content
|
|
172
180
|
*/
|
|
@@ -213,15 +221,23 @@ export class DocFetchTool {
|
|
|
213
221
|
* - Convert gradio.app → www.gradio.app so pages resolve correctly
|
|
214
222
|
*/
|
|
215
223
|
export function normalizeDocUrl(input: string): string {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
224
|
+
try {
|
|
225
|
+
const trimmed = input.trim();
|
|
226
|
+
if (trimmed.startsWith('/docs')) {
|
|
227
|
+
return `https://huggingface.co${trimmed}`;
|
|
228
|
+
}
|
|
229
|
+
if (trimmed.startsWith('./docs')) {
|
|
230
|
+
return `https://huggingface.co/${trimmed.slice(2)}`;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const url = new URL(trimmed);
|
|
234
|
+
const host = url.hostname.toLowerCase();
|
|
235
|
+
if (host === 'gradio.app') {
|
|
236
|
+
url.hostname = 'www.gradio.app';
|
|
237
|
+
return url.toString();
|
|
238
|
+
}
|
|
239
|
+
return trimmed;
|
|
240
|
+
} catch {
|
|
241
|
+
return input;
|
|
242
|
+
}
|
|
227
243
|
}
|