@llmindset/hf-mcp 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/docs-search/doc-fetch.d.ts +9 -3
- package/dist/docs-search/doc-fetch.d.ts.map +1 -1
- package/dist/docs-search/doc-fetch.js +45 -20
- package/dist/docs-search/doc-fetch.js.map +1 -1
- package/dist/docs-search/doc-fetch.test.js +66 -40
- package/dist/docs-search/doc-fetch.test.js.map +1 -1
- package/dist/docs-search/docs-semantic-search.d.ts +3 -2
- package/dist/docs-search/docs-semantic-search.d.ts.map +1 -1
- package/dist/docs-search/docs-semantic-search.js +78 -32
- package/dist/docs-search/docs-semantic-search.js.map +1 -1
- package/dist/docs-search/docs-semantic-search.test.js +32 -0
- package/dist/docs-search/docs-semantic-search.test.js.map +1 -1
- package/dist/utilities.d.ts +1 -0
- package/dist/utilities.d.ts.map +1 -1
- package/dist/utilities.js +4 -0
- package/dist/utilities.js.map +1 -1
- package/package.json +3 -1
- package/src/docs-search/doc-fetch.test.ts +87 -56
- package/src/docs-search/doc-fetch.ts +70 -49
- package/src/docs-search/docs-semantic-search.test.ts +65 -0
- package/src/docs-search/docs-semantic-search.ts +121 -50
- package/src/utilities.ts +13 -0
- package/dist/docs-search/doc-mappings.d.ts +0 -7
- package/dist/docs-search/doc-mappings.d.ts.map +0 -1
- package/dist/docs-search/doc-mappings.js +0 -75
- package/dist/docs-search/doc-mappings.js.map +0 -1
- package/src/docs-search/doc-mappings.ts +0 -79
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
export declare const DOC_FETCH_CONFIG: {
|
|
3
3
|
readonly name: "hf_doc_fetch";
|
|
4
|
-
readonly description: "Fetch a document from the Hugging Face documentation library.";
|
|
4
|
+
readonly description: "Fetch a document from the Hugging Face documentation library. For large documents, use offset to get subsequent chunks.";
|
|
5
5
|
readonly schema: z.ZodObject<{
|
|
6
6
|
doc_url: z.ZodString;
|
|
7
|
+
offset: z.ZodOptional<z.ZodNumber>;
|
|
7
8
|
}, "strip", z.ZodTypeAny, {
|
|
8
9
|
doc_url: string;
|
|
10
|
+
offset?: number | undefined;
|
|
9
11
|
}, {
|
|
10
12
|
doc_url: string;
|
|
13
|
+
offset?: number | undefined;
|
|
11
14
|
}>;
|
|
12
15
|
readonly annotations: {
|
|
13
16
|
readonly title: "Fetch a document from the Hugging Face library";
|
|
@@ -18,7 +21,10 @@ export declare const DOC_FETCH_CONFIG: {
|
|
|
18
21
|
};
|
|
19
22
|
export type DocFetchParams = z.infer<typeof DOC_FETCH_CONFIG.schema>;
|
|
20
23
|
export declare class DocFetchTool {
|
|
21
|
-
|
|
22
|
-
|
|
24
|
+
private turndownService;
|
|
25
|
+
constructor();
|
|
26
|
+
validateUrl(hfUrl: string): void;
|
|
27
|
+
fetch(params: DocFetchParams): Promise<string>;
|
|
28
|
+
private applyChunking;
|
|
23
29
|
}
|
|
24
30
|
//# sourceMappingURL=doc-fetch.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-fetch.d.ts","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"doc-fetch.d.ts","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;CAqBnB,CAAC;AAEX,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC;AAErE,qBAAa,YAAY;IACxB,OAAO,CAAC,eAAe,CAAkB;;IAYzC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAS1B,KAAK,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;IAyBpD,OAAO,CAAC,aAAa;CAoCrB"}
|
|
@@ -1,14 +1,20 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import
|
|
2
|
+
import TurndownService from 'turndown';
|
|
3
|
+
import { estimateTokens } from '../utilities.js';
|
|
3
4
|
export const DOC_FETCH_CONFIG = {
|
|
4
5
|
name: 'hf_doc_fetch',
|
|
5
|
-
description: 'Fetch a document from the Hugging Face documentation library.',
|
|
6
|
+
description: 'Fetch a document from the Hugging Face documentation library. For large documents, use offset to get subsequent chunks.',
|
|
6
7
|
schema: z.object({
|
|
7
8
|
doc_url: z
|
|
8
9
|
.string()
|
|
9
10
|
.min(28, 'Url should start with https://huggingface.co/docs/')
|
|
10
11
|
.max(200, 'Query too long')
|
|
11
12
|
.describe('Hugging Face documentation URL'),
|
|
13
|
+
offset: z
|
|
14
|
+
.number()
|
|
15
|
+
.min(0)
|
|
16
|
+
.optional()
|
|
17
|
+
.describe('Token offset for large documents (use the offset from truncation message)'),
|
|
12
18
|
}),
|
|
13
19
|
annotations: {
|
|
14
20
|
title: 'Fetch a document from the Hugging Face library',
|
|
@@ -18,36 +24,55 @@ export const DOC_FETCH_CONFIG = {
|
|
|
18
24
|
},
|
|
19
25
|
};
|
|
20
26
|
export class DocFetchTool {
|
|
21
|
-
|
|
27
|
+
turndownService;
|
|
28
|
+
constructor() {
|
|
29
|
+
this.turndownService = new TurndownService({
|
|
30
|
+
headingStyle: 'atx',
|
|
31
|
+
codeBlockStyle: 'fenced',
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
validateUrl(hfUrl) {
|
|
22
35
|
if (!hfUrl.startsWith('https://huggingface.co/docs/')) {
|
|
23
36
|
throw new Error('That was not a valid Hugging Face document URL');
|
|
24
37
|
}
|
|
25
|
-
const urlPath = hfUrl.replace('https://huggingface.co/docs/', '').split('#')[0] || '';
|
|
26
|
-
const parts = urlPath.split('/');
|
|
27
|
-
const packageName = parts[0] || '';
|
|
28
|
-
let fetchUrl = hfUrl;
|
|
29
|
-
const mapping = DOC_MAPPINGS[packageName];
|
|
30
|
-
if (mapping) {
|
|
31
|
-
const remainingPath = parts.slice(1).join('/');
|
|
32
|
-
const filePath = remainingPath ? `${remainingPath}.md` : 'index.md';
|
|
33
|
-
const githubUrl = `https://raw.githubusercontent.com/${mapping.repo_id}/refs/heads/main/${mapping.doc_folder}/${filePath}`;
|
|
34
|
-
fetchUrl = githubUrl;
|
|
35
|
-
}
|
|
36
|
-
return fetchUrl;
|
|
37
38
|
}
|
|
38
|
-
async fetch(
|
|
39
|
+
async fetch(params) {
|
|
39
40
|
try {
|
|
40
|
-
|
|
41
|
-
const response = await fetch(
|
|
41
|
+
this.validateUrl(params.doc_url);
|
|
42
|
+
const response = await fetch(params.doc_url);
|
|
42
43
|
if (!response.ok) {
|
|
43
44
|
throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`);
|
|
44
45
|
}
|
|
45
|
-
const
|
|
46
|
-
|
|
46
|
+
const htmlContent = await response.text();
|
|
47
|
+
const fullMarkdownContent = this.turndownService.turndown(htmlContent);
|
|
48
|
+
return this.applyChunking(fullMarkdownContent, params.offset || 0);
|
|
47
49
|
}
|
|
48
50
|
catch (error) {
|
|
49
51
|
throw new Error(`Failed to fetch document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
50
52
|
}
|
|
51
53
|
}
|
|
54
|
+
applyChunking(markdownContent, offset) {
|
|
55
|
+
const totalTokens = estimateTokens(markdownContent);
|
|
56
|
+
const maxTokensPerChunk = 7500;
|
|
57
|
+
const totalChars = markdownContent.length;
|
|
58
|
+
const charsPerToken = totalChars / totalTokens;
|
|
59
|
+
const startChar = Math.floor(offset * charsPerToken);
|
|
60
|
+
if (startChar >= totalChars) {
|
|
61
|
+
return `Error: Offset ${offset} is beyond the document length (${totalTokens} tokens total).`;
|
|
62
|
+
}
|
|
63
|
+
if (totalTokens <= maxTokensPerChunk && offset === 0) {
|
|
64
|
+
return markdownContent;
|
|
65
|
+
}
|
|
66
|
+
const maxCharsPerChunk = Math.floor(maxTokensPerChunk * charsPerToken);
|
|
67
|
+
const endChar = Math.min(startChar + maxCharsPerChunk, totalChars);
|
|
68
|
+
const chunk = markdownContent.slice(startChar, endChar);
|
|
69
|
+
const nextOffset = offset + estimateTokens(chunk);
|
|
70
|
+
const hasMore = nextOffset < totalTokens;
|
|
71
|
+
let result = chunk;
|
|
72
|
+
if (hasMore) {
|
|
73
|
+
result += `\n\n=== DOCUMENT TRUNCATED. CALL ${DOC_FETCH_CONFIG.name} WITH AN OFFSET OF ${nextOffset} FOR THE NEXT CHUNK ===`;
|
|
74
|
+
}
|
|
75
|
+
return result;
|
|
76
|
+
}
|
|
52
77
|
}
|
|
53
78
|
//# sourceMappingURL=doc-fetch.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-fetch.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"doc-fetch.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAEjD,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC/B,IAAI,EAAE,cAAc;IACpB,WAAW,EAAE,yHAAyH;IACtI,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC;QAChB,OAAO,EAAE,CAAC;aACR,MAAM,EAAE;aACR,GAAG,CAAC,EAAE,EAAE,oDAAoD,CAAC;aAC7D,GAAG,CAAC,GAAG,EAAE,gBAAgB,CAAC;aAC1B,QAAQ,CAAC,gCAAgC,CAAC;QAC5C,MAAM,EAAE,CAAC;aACP,MAAM,EAAE;aACR,GAAG,CAAC,CAAC,CAAC;aACN,QAAQ,EAAE;aACV,QAAQ,CAAC,2EAA2E,CAAC;KACvF,CAAC;IACF,WAAW,EAAE;QACZ,KAAK,EAAE,gDAAgD;QACvD,eAAe,EAAE,KAAK;QACtB,YAAY,EAAE,IAAI;QAClB,aAAa,EAAE,IAAI;KACnB;CACQ,CAAC;AAIX,MAAM,OAAO,YAAY;IAChB,eAAe,CAAkB;IAEzC;QACC,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC;YAC1C,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,QAAQ;SACxB,CAAC,CAAC;IACJ,CAAC;IAKD,WAAW,CAAC,KAAa;QACxB,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,8BAA8B,CAAC,EAAE,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACnE,CAAC;IACF,CAAC;IAKD,KAAK,CAAC,KAAK,CAAC,MAAsB;QACjC,IAAI,CAAC;YACJ,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAEjC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAE7C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YACxF,CAAC;YAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAG1C,MAAM,mBAAmB,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAGvE,OAAO,IAAI,CAAC,aAAa,CAAC,mBAAmB,EAAE,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;QACpE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAC1G,CAAC;IACF,CAAC;IAKO,aAAa,CAAC,eAAuB,EAAE,MAAc;QAC5D,MAAM,WAAW,GAAG,cAAc,CAAC,eAAe,CAAC,CAAC;QACpD,MAAM,iBAAiB,GAAG,IAAI,CAAC;QAG/B,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC;QAC1C,MAAM,aAAa,GAAG,UAAU,GAAG,WAAW,CAAC;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC;QAGrD,IAAI,SAAS,IAAI,UAAU,EAAE,CAAC;YAC7B,OAAO,iBAAiB,MAAM,mCAAmC,WAAW,iBAAiB,CAAC;QAC/F,CAAC;QAGD,IAAI,WAAW,IAAI,iBAAiB,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACtD,OAAO,eAAe,CAAC;QACxB,CAAC;QAED,MAAM,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,iBAAiB,GAAG,aAAa,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,gBAAgB,EAAE,UAAU,CAAC,CAAC;QACnE,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAGxD,MAAM,UAAU,GAAG,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,UAAU,GAAG,WAAW,CAAC;QAEzC,IAAI,MAAM,GAAG,KAAK,CAAC;QAGnB,IAAI,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,oCAAoC,gBAAgB,CAAC,IAAI,sBAAsB,UAAU,yBAAyB,CAAC;QAC9H,CAAC;QAED,OAAO,MAAM,CAAC;IACf,CAAC;CACD"}
|
|
@@ -1,51 +1,77 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
1
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
2
2
|
import { DocFetchTool } from './doc-fetch.js';
|
|
3
3
|
describe('DocFetchTool', () => {
|
|
4
4
|
const tool = new DocFetchTool();
|
|
5
|
-
describe('URL
|
|
6
|
-
it('should
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
5
|
+
describe('URL validation', () => {
|
|
6
|
+
it('should accept valid HF docs URLs', () => {
|
|
7
|
+
const validUrls = [
|
|
8
|
+
'https://huggingface.co/docs/dataset-viewer/index',
|
|
9
|
+
'https://huggingface.co/docs/huggingface_hub/guides/upload#faster-uploads',
|
|
10
|
+
'https://huggingface.co/docs/transformers/model_doc/bert',
|
|
11
|
+
'https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion',
|
|
12
|
+
'https://huggingface.co/docs/timm/models',
|
|
13
|
+
'https://huggingface.co/docs/transformers',
|
|
14
|
+
];
|
|
15
|
+
for (const url of validUrls) {
|
|
16
|
+
expect(() => tool.validateUrl(url)).not.toThrow();
|
|
17
|
+
}
|
|
11
18
|
});
|
|
12
|
-
it('should
|
|
13
|
-
const
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
});
|
|
24
|
-
it('should handle diffusers documentation', () => {
|
|
25
|
-
const hfUrl = 'https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion';
|
|
26
|
-
const expectedUrl = 'https://raw.githubusercontent.com/huggingface/diffusers/refs/heads/main/docs/source/en/api/pipelines/stable_diffusion.md';
|
|
27
|
-
const result = tool.processUrl(hfUrl);
|
|
28
|
-
expect(result).toBe(expectedUrl);
|
|
19
|
+
it('should throw error for URLs not starting with correct prefix', () => {
|
|
20
|
+
const invalidUrls = [
|
|
21
|
+
'https://example.com/docs/something',
|
|
22
|
+
'https://github.com/huggingface/transformers',
|
|
23
|
+
'http://huggingface.co/docs/transformers',
|
|
24
|
+
'huggingface.co/docs/transformers',
|
|
25
|
+
'https://huggingface.co/models/bert-base-uncased',
|
|
26
|
+
];
|
|
27
|
+
for (const url of invalidUrls) {
|
|
28
|
+
expect(() => tool.validateUrl(url)).toThrow('That was not a valid Hugging Face document URL');
|
|
29
|
+
}
|
|
29
30
|
});
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
});
|
|
32
|
+
describe('document chunking', () => {
|
|
33
|
+
it('should return small documents without chunking', async () => {
|
|
34
|
+
global.fetch = vi.fn().mockResolvedValue({
|
|
35
|
+
ok: true,
|
|
36
|
+
text: () => Promise.resolve('<h1>Short Document</h1><p>This is a short document.</p>'),
|
|
37
|
+
});
|
|
38
|
+
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
39
|
+
expect(result).toContain('# Short Document');
|
|
40
|
+
expect(result).toContain('This is a short document');
|
|
41
|
+
expect(result).not.toContain('DOCUMENT TRUNCATED');
|
|
35
42
|
});
|
|
36
|
-
it('should
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
43
|
+
it('should chunk large documents and show truncation message', async () => {
|
|
44
|
+
const longHtml = '<h1>Long Document</h1>' + '<p>This is a very long sentence that will be repeated many times to create a document that exceeds the 7500 token limit for testing chunking functionality.</p>'.repeat(200);
|
|
45
|
+
global.fetch = vi.fn().mockResolvedValue({
|
|
46
|
+
ok: true,
|
|
47
|
+
text: () => Promise.resolve(longHtml),
|
|
48
|
+
});
|
|
49
|
+
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
50
|
+
expect(result).toContain('# Long Document');
|
|
51
|
+
expect(result).toContain('DOCUMENT TRUNCATED');
|
|
52
|
+
expect(result).toContain('CALL hf_doc_fetch WITH AN OFFSET OF');
|
|
41
53
|
});
|
|
42
|
-
it('should
|
|
43
|
-
const
|
|
44
|
-
|
|
54
|
+
it('should return subsequent chunks with offset', async () => {
|
|
55
|
+
const longHtml = '<h1>Long Document</h1>' + '<p>This is a very long sentence that will be repeated many times to create a document that exceeds the 7500 token limit for testing chunking functionality.</p>'.repeat(200);
|
|
56
|
+
global.fetch = vi.fn().mockResolvedValue({
|
|
57
|
+
ok: true,
|
|
58
|
+
text: () => Promise.resolve(longHtml),
|
|
59
|
+
});
|
|
60
|
+
const firstChunk = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test' });
|
|
61
|
+
const offsetMatch = firstChunk.match(/OFFSET OF (\d+)/);
|
|
62
|
+
expect(offsetMatch).toBeTruthy();
|
|
63
|
+
const offset = parseInt(offsetMatch?.[1] || '0', 10);
|
|
64
|
+
const secondChunk = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test', offset });
|
|
65
|
+
expect(secondChunk).not.toEqual(firstChunk);
|
|
66
|
+
expect(secondChunk.length).toBeGreaterThan(0);
|
|
45
67
|
});
|
|
46
|
-
it('should
|
|
47
|
-
|
|
48
|
-
|
|
68
|
+
it('should handle offset beyond document length', async () => {
|
|
69
|
+
global.fetch = vi.fn().mockResolvedValue({
|
|
70
|
+
ok: true,
|
|
71
|
+
text: () => Promise.resolve('<h1>Short Document</h1><p>This is short.</p>'),
|
|
72
|
+
});
|
|
73
|
+
const result = await tool.fetch({ doc_url: 'https://huggingface.co/docs/test', offset: 10000 });
|
|
74
|
+
expect(result).toContain('Error: Offset 10000 is beyond');
|
|
49
75
|
});
|
|
50
76
|
});
|
|
51
77
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doc-fetch.test.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"doc-fetch.test.js","sourceRoot":"","sources":["../../src/docs-search/doc-fetch.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC7B,MAAM,IAAI,GAAG,IAAI,YAAY,EAAE,CAAC;IAEhC,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC/B,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC3C,MAAM,SAAS,GAAG;gBACjB,kDAAkD;gBAClD,0EAA0E;gBAC1E,yDAAyD;gBACzD,sEAAsE;gBACtE,yCAAyC;gBACzC,0CAA0C;aAC1C,CAAC;YAEF,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC7B,MAAM,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;YACnD,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8DAA8D,EAAE,GAAG,EAAE;YACvE,MAAM,WAAW,GAAG;gBACnB,oCAAoC;gBACpC,6CAA6C;gBAC7C,yCAAyC;gBACzC,kCAAkC;gBAClC,iDAAiD;aACjD,CAAC;YAEF,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;gBAC/B,MAAM,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,gDAAgD,CAAC,CAAC;YAC/F,CAAC;QACF,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAG/D,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBACxC,EAAE,EAAE,IAAI;gBACR,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,yDAAyD,CAAC;aACtF,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YAEjF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,0BAA0B,CAAC,CAAC;YACrD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;YAEzE,MAAM,QAAQ,GAAG,wBAAwB,GAAG,iKAAiK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAE1N,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBACxC,EAAE,EAAE,IAAI;gBACR,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC;aACrC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YAEjF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;YAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;YAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,qCAAqC,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAE5D,MAAM,QAAQ,GAAG,wBAAwB,GAAG,iKAAiK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAE1N,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBACxC,EAAE,EAAE,IAAI;gBACR,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC;aACrC,CAAC,CAAC;YAGH,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,CAAC,CAAC;YAGrF,MAAM,WAAW,GAAG,UAAU,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YACxD,MAAM,CAAC,WAAW,CAAC,CAAC,UAAU,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;YAGrD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,MAAM,EAAE,CAAC,CAAC;YAE9F,MAAM,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAC5C,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,CAAC,KAAK,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBACxC,EAAE,EAAE,IAAI;gBACR,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,8CAA8C,CAAC;aAC3E,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,kCAAkC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;YAEhG,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,+BAA+B,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
|
|
@@ -2,7 +2,7 @@ import { z } from 'zod';
|
|
|
2
2
|
import { HfApiCall } from '../hf-api-call.js';
|
|
3
3
|
export declare const DOCS_SEMANTIC_SEARCH_CONFIG: {
|
|
4
4
|
readonly name: "hf_doc_search";
|
|
5
|
-
readonly description:
|
|
5
|
+
readonly description: string;
|
|
6
6
|
readonly schema: z.ZodObject<{
|
|
7
7
|
query: z.ZodString;
|
|
8
8
|
product: z.ZodOptional<z.ZodString>;
|
|
@@ -34,7 +34,8 @@ interface DocSearchApiParams {
|
|
|
34
34
|
product?: string;
|
|
35
35
|
}
|
|
36
36
|
export declare class DocSearchTool extends HfApiCall<DocSearchApiParams, DocSearchResult[]> {
|
|
37
|
-
|
|
37
|
+
private tokenBudget;
|
|
38
|
+
constructor(hfToken?: string, apiUrl?: string, tokenBudget?: number);
|
|
38
39
|
search(params: DocSearchParams): Promise<string>;
|
|
39
40
|
}
|
|
40
41
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docs-semantic-search.d.ts","sourceRoot":"","sources":["../../src/docs-search/docs-semantic-search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"docs-semantic-search.d.ts","sourceRoot":"","sources":["../../src/docs-search/docs-semantic-search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAO9C,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;CAwB9B,CAAC;AAEX,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,MAAM,CAAC,CAAC;AAEjF,UAAU,eAAe;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,UAAU,kBAAkB;IAC3B,CAAC,EAAE,MAAM,CAAC;IACV,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AASD,qBAAa,aAAc,SAAQ,SAAS,CAAC,kBAAkB,EAAE,eAAe,EAAE,CAAC;IAClF,OAAO,CAAC,WAAW,CAAS;gBAOhB,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,SAAkC,EAAE,WAAW,SAAuB;IASpG,MAAM,CAAC,MAAM,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC;CAyBtD"}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { HfApiCall } from '../hf-api-call.js';
|
|
3
|
-
import { escapeMarkdown } from '../utilities.js';
|
|
3
|
+
import { escapeMarkdown, estimateTokens } from '../utilities.js';
|
|
4
4
|
import { DOC_FETCH_CONFIG } from './doc-fetch.js';
|
|
5
5
|
export const DOCS_SEMANTIC_SEARCH_CONFIG = {
|
|
6
6
|
name: 'hf_doc_search',
|
|
7
|
-
description: 'Search the Hugging Face documentation library.
|
|
7
|
+
description: 'Search the Hugging Face documentation library. Use this for the most up-to-date information ' +
|
|
8
|
+
'Returns excerpts grouped by Product and Document.',
|
|
8
9
|
schema: z.object({
|
|
9
10
|
query: z
|
|
10
11
|
.string()
|
|
@@ -23,9 +24,13 @@ export const DOCS_SEMANTIC_SEARCH_CONFIG = {
|
|
|
23
24
|
openWorldHint: true,
|
|
24
25
|
},
|
|
25
26
|
};
|
|
27
|
+
const DEFAULT_TOKEN_BUDGET = 12500;
|
|
28
|
+
const TRUNCATE_EXCERPT_LENGTH = 400;
|
|
26
29
|
export class DocSearchTool extends HfApiCall {
|
|
27
|
-
|
|
30
|
+
tokenBudget;
|
|
31
|
+
constructor(hfToken, apiUrl = 'https://hf.co/api/docs/search', tokenBudget = DEFAULT_TOKEN_BUDGET) {
|
|
28
32
|
super(apiUrl, hfToken);
|
|
33
|
+
this.tokenBudget = tokenBudget;
|
|
29
34
|
}
|
|
30
35
|
async search(params) {
|
|
31
36
|
try {
|
|
@@ -41,7 +46,7 @@ export class DocSearchTool extends HfApiCall {
|
|
|
41
46
|
? `No documentation found for query '${params.query}' in product '${params.product}'`
|
|
42
47
|
: `No documentation found for query '${params.query}'`;
|
|
43
48
|
}
|
|
44
|
-
return formatSearchResults(params.query, results, params.product);
|
|
49
|
+
return formatSearchResults(params.query, results, params.product, this.tokenBudget);
|
|
45
50
|
}
|
|
46
51
|
catch (error) {
|
|
47
52
|
if (error instanceof Error) {
|
|
@@ -85,34 +90,42 @@ function groupBySection(pageResults) {
|
|
|
85
90
|
}
|
|
86
91
|
return sectionGroups;
|
|
87
92
|
}
|
|
88
|
-
function formatSectionExcerpts(section, results) {
|
|
93
|
+
function formatSectionExcerpts(section, results, useTruncatedMode, hasAlreadyShownTruncation) {
|
|
89
94
|
const lines = [];
|
|
95
|
+
let tokensUsed = 0;
|
|
96
|
+
let wasContentTruncated = false;
|
|
90
97
|
if (section) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
}
|
|
97
|
-
lines.push('');
|
|
98
|
+
const heading = results.length > 1
|
|
99
|
+
? `#### Excerpts from the "${escapeMarkdown(section)}" section`
|
|
100
|
+
: `#### Excerpt from the "${escapeMarkdown(section)}" section`;
|
|
101
|
+
lines.push(heading, '');
|
|
102
|
+
tokensUsed += estimateTokens(heading + '\n\n');
|
|
98
103
|
}
|
|
99
104
|
for (const result of results) {
|
|
100
|
-
|
|
105
|
+
let cleanText = result.text
|
|
101
106
|
.replace(/<[^>]*>/g, '')
|
|
102
107
|
.replace(/\n\s*\n/g, '\n')
|
|
103
108
|
.trim();
|
|
104
|
-
|
|
105
|
-
|
|
109
|
+
if (useTruncatedMode && cleanText.length > TRUNCATE_EXCERPT_LENGTH && !hasAlreadyShownTruncation) {
|
|
110
|
+
cleanText =
|
|
111
|
+
cleanText.substring(0, TRUNCATE_EXCERPT_LENGTH) +
|
|
112
|
+
`...\n\n*[Content truncated - use ${DOC_FETCH_CONFIG.name} for full text or narrow search terms]*`;
|
|
113
|
+
wasContentTruncated = true;
|
|
114
|
+
}
|
|
115
|
+
lines.push(cleanText, '');
|
|
116
|
+
tokensUsed += estimateTokens(cleanText + '\n\n');
|
|
106
117
|
}
|
|
107
|
-
|
|
118
|
+
if (lines.length > 0 && lines[lines.length - 1] === '') {
|
|
119
|
+
lines.pop();
|
|
120
|
+
}
|
|
121
|
+
return { text: lines.join('\n'), tokensUsed, wasContentTruncated };
|
|
108
122
|
}
|
|
109
|
-
function formatSearchResults(query, results, productFilter) {
|
|
123
|
+
function formatSearchResults(query, results, productFilter, tokenBudget = DEFAULT_TOKEN_BUDGET) {
|
|
110
124
|
const lines = [];
|
|
125
|
+
let hasShownTruncationMessage = false;
|
|
111
126
|
const filterText = productFilter ? ` (filtered by product: ${productFilter})` : '';
|
|
112
|
-
|
|
113
|
-
lines.push(
|
|
114
|
-
lines.push(`Found ${results.length} results`);
|
|
115
|
-
lines.push('');
|
|
127
|
+
const header = `# Documentation Library Search Results for "${escapeMarkdown(query)}"${filterText}\n\nFound ${results.length} results\n`;
|
|
128
|
+
lines.push(header);
|
|
116
129
|
const grouped = groupResults(results);
|
|
117
130
|
const sortedProducts = Array.from(grouped.keys()).sort((a, b) => {
|
|
118
131
|
const productGroupA = grouped.get(a);
|
|
@@ -123,13 +136,29 @@ function formatSearchResults(query, results, productFilter) {
|
|
|
123
136
|
const countB = Array.from(productGroupB.values()).reduce((sum, arr) => sum + arr.length, 0);
|
|
124
137
|
return countB - countA;
|
|
125
138
|
});
|
|
139
|
+
const linkOnlyResults = [];
|
|
126
140
|
for (const product of sortedProducts) {
|
|
127
141
|
const productGroup = grouped.get(product);
|
|
128
142
|
if (!productGroup)
|
|
129
143
|
continue;
|
|
144
|
+
const currentText = lines.join('\n');
|
|
145
|
+
if (estimateTokens(currentText) > tokenBudget) {
|
|
146
|
+
for (const url of productGroup.keys()) {
|
|
147
|
+
const pageResults = productGroup.get(url);
|
|
148
|
+
if (!pageResults?.[0])
|
|
149
|
+
continue;
|
|
150
|
+
linkOnlyResults.push({
|
|
151
|
+
product,
|
|
152
|
+
url,
|
|
153
|
+
title: pageResults[0].heading1 || pageResults[0].source_page_title,
|
|
154
|
+
count: pageResults.length,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
130
159
|
const totalProductHits = Array.from(productGroup.values()).reduce((sum, arr) => sum + arr.length, 0);
|
|
131
|
-
|
|
132
|
-
lines.push(
|
|
160
|
+
const productHeader = `## Results for Product: ${escapeMarkdown(product)} (${totalProductHits} results)\n`;
|
|
161
|
+
lines.push(productHeader);
|
|
133
162
|
const sortedUrls = Array.from(productGroup.keys()).sort((a, b) => {
|
|
134
163
|
const pageResultsA = productGroup.get(a);
|
|
135
164
|
const pageResultsB = productGroup.get(b);
|
|
@@ -139,23 +168,40 @@ function formatSearchResults(query, results, productFilter) {
|
|
|
139
168
|
});
|
|
140
169
|
for (const url of sortedUrls) {
|
|
141
170
|
const pageResults = productGroup.get(url);
|
|
142
|
-
if (!pageResults
|
|
171
|
+
if (!pageResults?.[0])
|
|
143
172
|
continue;
|
|
144
|
-
const
|
|
145
|
-
|
|
173
|
+
const pageTitle = pageResults[0].heading1 || pageResults[0].source_page_title;
|
|
174
|
+
const currentText = lines.join('\n');
|
|
175
|
+
if (estimateTokens(currentText) > tokenBudget) {
|
|
176
|
+
linkOnlyResults.push({ product, url, title: pageTitle, count: pageResults.length });
|
|
146
177
|
continue;
|
|
147
|
-
|
|
178
|
+
}
|
|
148
179
|
const hitCount = pageResults.length > 1 ? ` (${pageResults.length} results)` : '';
|
|
149
|
-
|
|
150
|
-
lines.push(
|
|
180
|
+
const pageHeader = `### Results from [${escapeMarkdown(pageTitle)}](${url})${hitCount}\n`;
|
|
181
|
+
lines.push(pageHeader);
|
|
151
182
|
const sectionGroups = groupBySection(pageResults);
|
|
152
183
|
for (const [section, sectionResults] of sectionGroups) {
|
|
153
|
-
lines.
|
|
184
|
+
const currentTokens = estimateTokens(lines.join('\n'));
|
|
185
|
+
const useTruncatedMode = currentTokens > tokenBudget * 0.7;
|
|
186
|
+
const result = formatSectionExcerpts(section, sectionResults, useTruncatedMode, hasShownTruncationMessage);
|
|
187
|
+
if (result.text.trim()) {
|
|
188
|
+
lines.push(result.text);
|
|
189
|
+
if (result.wasContentTruncated) {
|
|
190
|
+
hasShownTruncationMessage = true;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
154
193
|
}
|
|
155
194
|
}
|
|
156
195
|
}
|
|
157
|
-
|
|
158
|
-
|
|
196
|
+
if (linkOnlyResults.length > 0) {
|
|
197
|
+
lines.push(`\n## Further results were found in:\n`);
|
|
198
|
+
for (const linkResult of linkOnlyResults) {
|
|
199
|
+
const hitText = linkResult.count > 1 ? ` (${linkResult.count} results)` : '';
|
|
200
|
+
lines.push(`- [${escapeMarkdown(linkResult.title)}](${linkResult.url})${hitText} *(${linkResult.product})*`);
|
|
201
|
+
}
|
|
202
|
+
lines.push('');
|
|
203
|
+
}
|
|
204
|
+
lines.push('---\n');
|
|
159
205
|
lines.push(`Use the "${DOC_FETCH_CONFIG.name}" tool to fetch a document from the library.`);
|
|
160
206
|
return lines.join('\n');
|
|
161
207
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docs-semantic-search.js","sourceRoot":"","sources":["../../src/docs-search/docs-semantic-search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"docs-semantic-search.js","sourceRoot":"","sources":["../../src/docs-search/docs-semantic-search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAKlD,MAAM,CAAC,MAAM,2BAA2B,GAAG;IAC1C,IAAI,EAAE,eAAe;IACrB,WAAW,EACV,8FAA8F;QAC9F,mDAAmD;IACpD,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC;QAChB,KAAK,EAAE,CAAC;aACN,MAAM,EAAE;aACR,GAAG,CAAC,CAAC,EAAE,iCAAiC,CAAC;aACzC,GAAG,CAAC,GAAG,EAAE,gBAAgB,CAAC;aAC1B,QAAQ,CAAC,uBAAuB,CAAC;QACnC,OAAO,EAAE,CAAC;aACR,MAAM,EAAE;aACR,QAAQ,EAAE;aACV,QAAQ,CACR,0GAA0G,CAC1G;KACF,CAAC;IACF,WAAW,EAAE;QACZ,KAAK,EAAE,2CAA2C;QAClD,eAAe,EAAE,KAAK;QACtB,YAAY,EAAE,IAAI;QAClB,aAAa,EAAE,IAAI;KACnB;CACQ,CAAC;AAmBX,MAAM,oBAAoB,GAAG,KAAK,CAAC;AACnC,MAAM,uBAAuB,GAAG,GAAG,CAAC;AAKpC,MAAM,OAAO,aAAc,SAAQ,SAAgD;IAC1E,WAAW,CAAS;IAO5B,YAAY,OAAgB,EAAE,MAAM,GAAG,+BAA+B,EAAE,WAAW,GAAG,oBAAoB;QACzG,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACvB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IAChC,CAAC;IAMD,KAAK,CAAC,MAAM,CAAC,MAAuB;QACnC,IAAI,CAAC;YACJ,IAAI,CAAC,MAAM,CAAC,KAAK;gBAAE,OAAO,mBAAmB,CAAC;YAE9C,MAAM,SAAS,GAAuB,EAAE,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxE,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,SAAS,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;YACpC,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAoB,SAAS,CAAC,CAAC;YAEjE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,OAAO,MAAM,CAAC,OAAO;oBACpB,CAAC,CAAC,qCAAqC,MAAM,CAAC,KAAK,iBAAiB,MAAM,CAAC,OAAO,GAAG;oBACrF,CAAC,CAAC,qCAAqC,MAAM,CAAC,KAAK,GAAG,CAAC;YACzD,CAAC;YAED,OAAO,mBAAmB,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QACrF,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,mCAAmC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YACrE,CAAC;YACD,MAAM,KAAK,CAAC;QACb,CAAC;IACF,CAAC;CACD;AAKD,SAAS,YAAY,CAAC,OAA0B;IAC/C,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0C,CAAC;IAElE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACjD,IAAI,CAAC,YAAY;YAAE,SAAS;QAG5B,MAAM,OAAO,GAAG,MAAM,CAAC,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,eAAe,CAAC;QAE/E,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAChC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,WAAW,EAAE,CAAC;YACjB,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC1B,CAAC;IACF,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAKD,SAAS,cAAc,CAAC,WAA8B;IACrD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyC,CAAC;IAEvE,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QAClC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC;QAChC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,aAAa,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAChC,CAAC;QACD,MAAM,cAAc,GAAG,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAClD,IAAI,cAAc,EAAE,CAAC;YACpB,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7B,CAAC;IACF,CAAC;IAED,OAAO,aAAa,CAAC;AACtB,CAAC;AAKD,SAAS,qBAAqB,CAC7B,OAA2B,EAC3B,OAA0B,EAC1B,gBAAyB,EACzB,yBAAkC;IAElC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,mBAAmB,GAAG,KAAK,CAAC;IAGhC,IAAI,OAAO,EAAE,CAAC;QACb,MAAM,OAAO,GACZ,OAAO,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2BAA2B,cAAc,CAAC,OAAO,CAAC,WAAW;YAC/D,CAAC,CAAC,0BAA0B,cAAc,CAAC,OAAO,CAAC,WAAW,CAAC;QAEjE,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACxB,UAAU,IAAI,cAAc,CAAC,OAAO,GAAG,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,MAAM,CAAC,IAAI;aACzB,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;aACvB,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC;aACzB,IAAI,EAAE,CAAC;QAGT,IAAI,gBAAgB,IAAI,SAAS,CAAC,MAAM,GAAG,uBAAuB,IAAI,CAAC,yBAAyB,EAAE,CAAC;YAClG,SAAS;gBACR,SAAS,CAAC,SAAS,CAAC,CAAC,EAAE,uBAAuB,CAAC;oBAC/C,oCAAoC,gBAAgB,CAAC,IAAI,yCAAyC,CAAC;YACpG,mBAAmB,GAAG,IAAI,CAAC;QAC5B,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC1B,UAAU,IAAI,cAAc,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;IAClD,CAAC;IAGD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC;QACxD,KAAK,CAAC,GAAG,EAAE,CAAC;IACb,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,mBAAmB,EAAE,CAAC;AACpE,CAAC;AAKD,SAAS,mBAAmB,CAC3B,KAAa,EACb,OAA0B,EAC1B,aAAsB,EACtB,WAAW,GAAG,oBAAoB;IAElC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,yBAAyB,GAAG,KAAK,CAAC;IAGtC,MAAM,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,0BAA0B,aAAa,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IACnF,MAAM,MAAM,GAAG,+CAA+C,cAAc,CAAC,KAAK,CAAC,IAAI,UAAU,aAAa,OAAO,CAAC,MAAM,YAAY,CAAC;IACzI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAGnB,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACtC,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC/D,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,CAAC,aAAa,IAAI,CAAC,aAAa;YAAE,OAAO,CAAC,CAAC;QAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC5F,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC5F,OAAO,MAAM,GAAG,MAAM,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,MAAM,eAAe,GAA0E,EAAE,CAAC;IAElG,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;QACtC,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC1C,IAAI,CAAC,YAAY;YAAE,SAAS;QAG5B,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,cAAc,CAAC,WAAW,CAAC,GAAG,WAAW,EAAE,CAAC;YAE/C,KAAK,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;gBACvC,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC1C,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;oBAAE,SAAS;gBAChC,eAAe,CAAC,IAAI,CAAC;oBACpB,OAAO;oBACP,GAAG;oBACH,KAAK,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC,iBAAiB;oBAClE,KAAK,EAAE,WAAW,CAAC,MAAM;iBACzB,CAAC,CAAC;YACJ,CAAC;YACD,SAAS;QACV,CAAC;QAGD,MAAM,gBAAgB,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACrG,MAAM,aAAa,GAAG,2BAA2B,cAAc,CAAC,OAAO,CAAC,KAAK,gBAAgB,aAAa,CAAC;QAC3G,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAG1B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChE,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACzC,MAAM,YAAY,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACzC,IAAI,CAAC,YAAY,IAAI,CAAC,YAAY;gBAAE,OAAO,CAAC,CAAC;YAC7C,OAAO,YAAY,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;gBAAE,SAAS;YAEhC,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;YAG9E,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrC,IAAI,cAAc,CAAC,WAAW,CAAC,GAAG,WAAW,EAAE,CAAC;gBAC/C,eAAe,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC;gBACpF,SAAS;YACV,CAAC;YAED,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,MAAM,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;YAClF,MAAM,UAAU,GAAG,qBAAqB,cAAc,CAAC,SAAS,CAAC,KAAK,GAAG,IAAI,QAAQ,IAAI,CAAC;YAC1F,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAGvB,MAAM,aAAa,GAAG,cAAc,CAAC,WAAW,CAAC,CAAC;YAClD,KAAK,MAAM,CAAC,OAAO,EAAE,cAAc,CAAC,IAAI,aAAa,EAAE,CAAC;gBACvD,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;gBACvD,MAAM,gBAAgB,GAAG,aAAa,GAAG,WAAW,GAAG,GAAG,CAAC;gBAE3D,MAAM,MAAM,GAAG,qBAAqB,CAAC,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,yBAAyB,CAAC,CAAC;gBAE3G,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;oBACxB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;oBACxB,IAAI,MAAM,CAAC,mBAAmB,EAAE,CAAC;wBAChC,yBAAyB,GAAG,IAAI,CAAC;oBAClC,CAAC;gBACF,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IAGD,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QACpD,KAAK,MAAM,UAAU,IAAI,eAAe,EAAE,CAAC;YAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;YAC7E,KAAK,CAAC,IAAI,CAAC,MAAM,cAAc,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,UAAU,CAAC,GAAG,IAAI,OAAO,MAAM,UAAU,CAAC,OAAO,IAAI,CAAC,CAAC;QAC9G,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpB,KAAK,CAAC,IAAI,CAAC,YAAY,gBAAgB,CAAC,IAAI,8CAA8C,CAAC,CAAC;IAE5F,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC"}
|
|
@@ -239,6 +239,38 @@ describe('DocSearchTool', () => {
|
|
|
239
239
|
mockFetch.mockRejectedValueOnce(new Error('Network error'));
|
|
240
240
|
await expect(docSearchTool.search({ query: 'test' })).rejects.toThrow('Failed to search documentation:');
|
|
241
241
|
});
|
|
242
|
+
it('should handle token budget by truncating or switching to links', async () => {
|
|
243
|
+
const smallBudgetTool = new DocSearchTool(undefined, undefined, 5000);
|
|
244
|
+
const longText = 'This is a very long text that repeats. '.repeat(100);
|
|
245
|
+
const manyResults = [];
|
|
246
|
+
for (let i = 0; i < 8; i++) {
|
|
247
|
+
manyResults.push({
|
|
248
|
+
text: longText,
|
|
249
|
+
product: 'hub',
|
|
250
|
+
heading1: `Page ${i}`,
|
|
251
|
+
source_page_url: `https://huggingface.co/docs/hub/page${i}`,
|
|
252
|
+
source_page_title: `Page ${i}`,
|
|
253
|
+
heading2: `Section ${i}`,
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
mockFetch.mockResolvedValueOnce({
|
|
257
|
+
ok: true,
|
|
258
|
+
json: () => Promise.resolve(manyResults),
|
|
259
|
+
});
|
|
260
|
+
const result = await smallBudgetTool.search({ query: 'test' });
|
|
261
|
+
expect(result).toContain('# Documentation Library Search Results for "test"');
|
|
262
|
+
expect(result).toContain('Found 8 results');
|
|
263
|
+
expect(result).toContain('This is a very long text that repeats');
|
|
264
|
+
console.log('Result length:', result.length);
|
|
265
|
+
console.log('Estimated tokens:', Math.ceil(result.length / 3.3));
|
|
266
|
+
expect(result).toContain('#### Excerpt from the "Section 0" section');
|
|
267
|
+
expect(result).toContain('This is a very long text that repeats');
|
|
268
|
+
const hasTruncation = result.includes(`*[Content truncated - use ${DOC_FETCH_CONFIG.name} for full text or narrow search terms]*`);
|
|
269
|
+
const hasAdditionalResults = result.includes('## Further results were found in:');
|
|
270
|
+
expect(hasTruncation || hasAdditionalResults).toBeTruthy();
|
|
271
|
+
expect(result).toContain(`Use the "${DOC_FETCH_CONFIG.name}" tool to fetch a document from the library.`);
|
|
272
|
+
expect(result.length).toBeLessThan(25000);
|
|
273
|
+
});
|
|
242
274
|
});
|
|
243
275
|
describe('groupResults', () => {
|
|
244
276
|
it('should group results by product and page URL', async () => {
|