@llmindset/hf-mcp 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/docs-search/doc-fetch.d.ts +24 -0
  2. package/dist/docs-search/doc-fetch.d.ts.map +1 -0
  3. package/dist/docs-search/doc-fetch.js +53 -0
  4. package/dist/docs-search/doc-fetch.js.map +1 -0
  5. package/dist/docs-search/doc-fetch.test.d.ts +2 -0
  6. package/dist/docs-search/doc-fetch.test.d.ts.map +1 -0
  7. package/dist/docs-search/doc-fetch.test.js +52 -0
  8. package/dist/docs-search/doc-fetch.test.js.map +1 -0
  9. package/dist/docs-search/doc-mappings.d.ts +7 -0
  10. package/dist/docs-search/doc-mappings.d.ts.map +1 -0
  11. package/dist/docs-search/doc-mappings.js +75 -0
  12. package/dist/docs-search/doc-mappings.js.map +1 -0
  13. package/dist/docs-search/docs-semantic-search.d.ts +41 -0
  14. package/dist/docs-search/docs-semantic-search.d.ts.map +1 -0
  15. package/dist/docs-search/docs-semantic-search.js +162 -0
  16. package/dist/docs-search/docs-semantic-search.js.map +1 -0
  17. package/dist/docs-search/docs-semantic-search.test.d.ts +2 -0
  18. package/dist/docs-search/docs-semantic-search.test.d.ts.map +1 -0
  19. package/dist/docs-search/docs-semantic-search.test.js +283 -0
  20. package/dist/docs-search/docs-semantic-search.test.js.map +1 -0
  21. package/dist/index.d.ts +2 -0
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +2 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/tool-ids.d.ts +6 -4
  26. package/dist/tool-ids.d.ts.map +1 -1
  27. package/dist/tool-ids.js +7 -16
  28. package/dist/tool-ids.js.map +1 -1
  29. package/package.json +2 -2
  30. package/src/docs-search/doc-fetch.test.ts +74 -0
  31. package/src/docs-search/doc-fetch.ts +92 -0
  32. package/src/docs-search/doc-mappings.ts +79 -0
  33. package/src/docs-search/docs-semantic-search.test.ts +365 -0
  34. package/src/docs-search/docs-semantic-search.ts +244 -0
  35. package/src/index.ts +2 -0
  36. package/src/tool-ids.ts +9 -18
@@ -0,0 +1,79 @@
1
+ interface DocMapping {
2
+ repo_id: string;
3
+ doc_folder: string;
4
+ }
5
+
6
+ export const DOC_MAPPINGS: Record<string, DocMapping> = {
7
+ 'tokenizers': {
8
+ repo_id: 'huggingface/tokenizers',
9
+ doc_folder: 'docs/source-doc-builder'
10
+ },
11
+ 'diffusers': {
12
+ repo_id: 'huggingface/diffusers',
13
+ doc_folder: 'docs/source/en'
14
+ },
15
+ 'accelerate': {
16
+ repo_id: 'huggingface/accelerate',
17
+ doc_folder: 'docs/source'
18
+ },
19
+ 'huggingface_hub': {
20
+ repo_id: 'huggingface/huggingface_hub',
21
+ doc_folder: 'docs/source/en'
22
+ },
23
+ 'transformers': {
24
+ repo_id: 'huggingface/transformers',
25
+ doc_folder: 'docs/source/en'
26
+ },
27
+ 'hub': {
28
+ repo_id: 'huggingface/hub-docs',
29
+ doc_folder: 'docs/hub'
30
+ },
31
+ 'huggingface.js': {
32
+ repo_id: 'huggingface/huggingface.js',
33
+ doc_folder: 'docs'
34
+ },
35
+ 'transformers.js': {
36
+ repo_id: 'huggingface/transformers.js',
37
+ doc_folder: 'docs/source'
38
+ },
39
+ 'smolagents': {
40
+ repo_id: 'huggingface/smolagents',
41
+ doc_folder: 'docs/source/en'
42
+ },
43
+ 'peft': {
44
+ repo_id: 'huggingface/peft',
45
+ doc_folder: 'docs/source'
46
+ },
47
+ 'trl': {
48
+ repo_id: 'huggingface/trl',
49
+ doc_folder: 'docs/source'
50
+ },
51
+ 'bitsandbytes': {
52
+ repo_id: 'bitsandbytes-foundation/bitsandbytes',
53
+ doc_folder: 'docs/source'
54
+ },
55
+ 'lerobot': {
56
+ repo_id: 'huggingface/lerobot',
57
+ doc_folder: 'docs/source'
58
+ },
59
+ 'timm': {
60
+ repo_id: 'huggingface/pytorch-image-models',
61
+ doc_folder: 'hfdocs/source'
62
+ },
63
+ 'inference-providers': {
64
+ repo_id: 'huggingface/hub-docs',
65
+ doc_folder: 'docs/inference-providers'
66
+ },
67
+ 'safetensors': {
68
+ repo_id: 'huggingface/safetensors',
69
+ doc_folder: 'docs/source'
70
+ },
71
+ 'inference-endpoints': {
72
+ repo_id: 'huggingface/hf-endpoints-documentation',
73
+ doc_folder: 'docs/source'
74
+ },
75
+ 'dataset-viewer': {
76
+ repo_id: 'huggingface/dataset-viewer',
77
+ doc_folder: 'docs/source'
78
+ }
79
+ };
@@ -0,0 +1,365 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
2
+ import { DocSearchTool } from './docs-semantic-search.js';
3
+ import { DOC_FETCH_CONFIG } from './doc-fetch.js';
4
+
5
+ // Mock the fetch function
6
+ const mockFetch = vi.fn();
7
+ global.fetch = mockFetch as typeof fetch;
8
+
9
+ describe('DocSearchTool', () => {
10
+ let docSearchTool: DocSearchTool;
11
+
12
+ beforeEach(() => {
13
+ docSearchTool = new DocSearchTool();
14
+ vi.clearAllMocks();
15
+ });
16
+
17
+ describe('search', () => {
18
+ it('should return error when query is too short', async () => {
19
+ await expect(docSearchTool.search({ query: 'ab' })).rejects.toThrow();
20
+ });
21
+
22
+ it('should return no results message when API returns empty array', async () => {
23
+ mockFetch.mockResolvedValueOnce({
24
+ ok: true,
25
+ json: () => Promise.resolve([]),
26
+ });
27
+
28
+ const result = await docSearchTool.search({ query: 'nonexistent' });
29
+ expect(result).toBe(`No documentation found for query 'nonexistent'`);
30
+ });
31
+
32
+ it('should return no results message with product filter', async () => {
33
+ mockFetch.mockResolvedValueOnce({
34
+ ok: true,
35
+ json: () => Promise.resolve([]),
36
+ });
37
+
38
+ const result = await docSearchTool.search({ query: 'nonexistent', product: 'hub' });
39
+ expect(result).toBe(`No documentation found for query 'nonexistent' in product 'hub'`);
40
+ });
41
+
42
+ it('should format results grouped by product and page', async () => {
43
+ const sampleResults = [
44
+ {
45
+ text: 'Download a comprehensive CSV file containing analytics',
46
+ product: 'hub',
47
+ heading1: 'Analytics',
48
+ source_page_url: 'https://huggingface.co/docs/hub/enterprise-hub-analytics#export-analytics-as-csv',
49
+ source_page_title: 'Enterprise-hub-analytics',
50
+ heading2: 'Export Analytics as CSV',
51
+ },
52
+ {
53
+ text: 'View analytics for your repositories',
54
+ product: 'hub',
55
+ heading1: 'Analytics',
56
+ source_page_url: 'https://huggingface.co/docs/hub/enterprise-hub-analytics#export-analytics-as-csv',
57
+ source_page_title: 'Enterprise-hub-analytics',
58
+ heading2: 'View Analytics',
59
+ },
60
+ {
61
+ text: 'In this quickstart, you will learn how to use the dataset viewer REST API',
62
+ product: 'dataset-viewer',
63
+ heading1: 'Quickstart',
64
+ source_page_url: 'https://huggingface.co/docs/dataset-viewer/quick_start#quickstart',
65
+ source_page_title: 'Quick start',
66
+ },
67
+ ];
68
+
69
+ mockFetch.mockResolvedValueOnce({
70
+ ok: true,
71
+ json: () => Promise.resolve(sampleResults),
72
+ });
73
+
74
+ const result = await docSearchTool.search({ query: 'analytics' });
75
+
76
+ // Check header
77
+ expect(result).toContain('# Documentation Library Search Results for "analytics"');
78
+ expect(result).toContain('Found 3 results');
79
+
80
+ // Check product grouping - hub should come before dataset-viewer (hub has 2 results, dataset-viewer has 1)
81
+ const hubIndex = result.indexOf('## Results for Product: hub');
82
+ const datasetViewerIndex = result.indexOf('## Results for Product: dataset-viewer');
83
+ expect(hubIndex).toBeLessThan(datasetViewerIndex);
84
+ expect(hubIndex).toBeGreaterThan(-1);
85
+ expect(datasetViewerIndex).toBeGreaterThan(-1);
86
+
87
+ // Check that result counts are shown
88
+ expect(result).toContain('## Results for Product: hub (2 results)');
89
+ expect(result).toContain('## Results for Product: dataset-viewer (1 results)');
90
+
91
+ // Check page links (without anchors)
92
+ expect(result).toContain(
93
+ '### Results from [Analytics](https://huggingface.co/docs/hub/enterprise-hub-analytics)'
94
+ );
95
+ expect(result).toContain('### Results from [Quickstart](https://huggingface.co/docs/dataset-viewer/quick_start)');
96
+
97
+ // Check excerpts with heading2
98
+ expect(result).toContain('#### Excerpt from the "Export Analytics as CSV" section');
99
+ expect(result).toContain('#### Excerpt from the "View Analytics" section');
100
+
101
+ // Check excerpt content appears as plain text
102
+ expect(result).toContain('Download a comprehensive CSV file containing analytics');
103
+ expect(result).toContain('View analytics for your repositories');
104
+ expect(result).toContain('In this quickstart, you will learn how to use the dataset viewer REST API');
105
+
106
+ // Check footer
107
+ expect(result).toContain('Use the "' + DOC_FETCH_CONFIG.name + '" tool to fetch a document from the library.');
108
+ });
109
+
110
+ it('should handle results without heading2', async () => {
111
+ const sampleResults = [
112
+ {
113
+ text: 'This is a simple text without heading2',
114
+ product: 'transformers',
115
+ heading1: 'Introduction',
116
+ source_page_url: 'https://huggingface.co/docs/transformers/index',
117
+ source_page_title: 'Transformers',
118
+ },
119
+ ];
120
+
121
+ mockFetch.mockResolvedValueOnce({
122
+ ok: true,
123
+ json: () => Promise.resolve(sampleResults),
124
+ });
125
+
126
+ const result = await docSearchTool.search({ query: 'transformers' });
127
+
128
+ // Should not contain "Excerpt from" when heading2 is missing
129
+ expect(result).not.toContain('#### Excerpt from');
130
+ expect(result).toContain('This is a simple text without heading2');
131
+ });
132
+
133
+ it('should properly escape markdown special characters', async () => {
134
+ const sampleResults = [
135
+ {
136
+ text: 'Text with [brackets] and *asterisks* and _underscores_',
137
+ product: 'hub',
138
+ heading1: 'Special * Characters',
139
+ source_page_url: 'https://huggingface.co/docs/hub/test',
140
+ source_page_title: 'Test',
141
+ heading2: 'Section with [brackets]',
142
+ },
143
+ ];
144
+
145
+ mockFetch.mockResolvedValueOnce({
146
+ ok: true,
147
+ json: () => Promise.resolve(sampleResults),
148
+ });
149
+
150
+ const result = await docSearchTool.search({ query: 'special' });
151
+
152
+ // Check that special characters are escaped in headings and page titles
153
+ expect(result).toContain('Special \\* Characters');
154
+ // Note: heading2 appears in header text, but brackets don't get escaped
155
+ expect(result).toContain('#### Excerpt from the "Section with [brackets]" section');
156
+ });
157
+
158
+ it('should clean HTML tags from text', async () => {
159
+ const sampleResults = [
160
+ {
161
+ text: 'Text with <div class="test">HTML tags</div> and <img src="test.png" alt="image"/>',
162
+ product: 'hub',
163
+ heading1: 'HTML Test',
164
+ source_page_url: 'https://huggingface.co/docs/hub/html-test',
165
+ source_page_title: 'HTML Test',
166
+ },
167
+ ];
168
+
169
+ mockFetch.mockResolvedValueOnce({
170
+ ok: true,
171
+ json: () => Promise.resolve(sampleResults),
172
+ });
173
+
174
+ const result = await docSearchTool.search({ query: 'html' });
175
+
176
+ // HTML tags should be removed
177
+ expect(result).toContain('Text with HTML tags and');
178
+ expect(result).not.toContain('<div');
179
+ expect(result).not.toContain('<img');
180
+ });
181
+
182
+ it('should sort multiple products and pages correctly by count', async () => {
183
+ const sampleResults = [
184
+ // Hub has 3 results (should be first)
185
+ {
186
+ text: 'Result from hub page 1 - first',
187
+ product: 'hub',
188
+ heading1: 'Page 1',
189
+ source_page_url: 'https://huggingface.co/docs/hub/page1',
190
+ source_page_title: 'Page 1',
191
+ },
192
+ {
193
+ text: 'Result from hub page 1 - second',
194
+ product: 'hub',
195
+ heading1: 'Page 1',
196
+ source_page_url: 'https://huggingface.co/docs/hub/page1',
197
+ source_page_title: 'Page 1',
198
+ },
199
+ {
200
+ text: 'Result from hub page 2',
201
+ product: 'hub',
202
+ heading1: 'Page 2',
203
+ source_page_url: 'https://huggingface.co/docs/hub/page2',
204
+ source_page_title: 'Page 2',
205
+ },
206
+ // Transformers has 1 result (should be second)
207
+ {
208
+ text: 'Result from transformers',
209
+ product: 'transformers',
210
+ heading1: 'Transformers Page',
211
+ source_page_url: 'https://huggingface.co/docs/transformers/page1',
212
+ source_page_title: 'Transformers',
213
+ },
214
+ // Datasets has 1 result (should be third)
215
+ {
216
+ text: 'Result from datasets',
217
+ product: 'datasets',
218
+ heading1: 'Datasets Page',
219
+ source_page_url: 'https://huggingface.co/docs/datasets/page1',
220
+ source_page_title: 'Datasets',
221
+ },
222
+ ];
223
+
224
+ mockFetch.mockResolvedValueOnce({
225
+ ok: true,
226
+ json: () => Promise.resolve(sampleResults),
227
+ });
228
+
229
+ const result = await docSearchTool.search({ query: 'test' });
230
+
231
+ // Check product order by count: hub (3) > transformers (1) = datasets (1)
232
+ const hubIndex = result.indexOf('## Results for Product: hub');
233
+ const transformersIndex = result.indexOf('## Results for Product: transformers');
234
+ const datasetsIndex = result.indexOf('## Results for Product: datasets');
235
+
236
+ expect(hubIndex).toBeLessThan(transformersIndex);
237
+ expect(hubIndex).toBeLessThan(datasetsIndex);
238
+
239
+ // Check that hub shows total count
240
+ expect(result).toContain('## Results for Product: hub (3 results)');
241
+
242
+ // Check page order within hub product: page1 (2 results) should come before page2 (1 result)
243
+ const page1Index = result.indexOf('https://huggingface.co/docs/hub/page1');
244
+ const page2Index = result.indexOf('https://huggingface.co/docs/hub/page2');
245
+ expect(page1Index).toBeLessThan(page2Index);
246
+
247
+ // Check that page1 shows its multiple results count
248
+ expect(result).toContain('### Results from [Page 1](https://huggingface.co/docs/hub/page1) (2 results)');
249
+ });
250
+
251
+ it('should include product filter in API call when provided', async () => {
252
+ mockFetch.mockResolvedValueOnce({
253
+ ok: true,
254
+ json: () => Promise.resolve([]),
255
+ });
256
+
257
+ await docSearchTool.search({ query: 'test', product: 'hub' });
258
+
259
+ expect(mockFetch).toHaveBeenCalledWith(expect.stringContaining('q=test&product=hub'), expect.any(Object));
260
+ });
261
+
262
+ it('should group results from the same page with different anchors together', async () => {
263
+ const sampleResults = [
264
+ {
265
+ text: 'First result from section 1',
266
+ product: 'hub',
267
+ heading1: 'Analytics',
268
+ source_page_url: 'https://huggingface.co/docs/hub/analytics#section1',
269
+ source_page_title: 'Analytics Page',
270
+ heading2: 'Section 1',
271
+ },
272
+ {
273
+ text: 'Second result from section 2',
274
+ product: 'hub',
275
+ heading1: 'Analytics',
276
+ source_page_url: 'https://huggingface.co/docs/hub/analytics#section2',
277
+ source_page_title: 'Analytics Page',
278
+ heading2: 'Section 2',
279
+ },
280
+ {
281
+ text: 'Third result from section 3',
282
+ product: 'hub',
283
+ heading1: 'Analytics',
284
+ source_page_url: 'https://huggingface.co/docs/hub/analytics#section3',
285
+ source_page_title: 'Analytics Page',
286
+ heading2: 'Section 3',
287
+ },
288
+ ];
289
+
290
+ mockFetch.mockResolvedValueOnce({
291
+ ok: true,
292
+ json: () => Promise.resolve(sampleResults),
293
+ });
294
+
295
+ const result = await docSearchTool.search({ query: 'analytics' });
296
+
297
+ // All three results should be grouped under one page heading (without anchor)
298
+ expect(result).toContain('### Results from [Analytics](https://huggingface.co/docs/hub/analytics) (3 results)');
299
+
300
+ // All three excerpts should appear under the same page
301
+ expect(result).toContain('First result from section 1');
302
+ expect(result).toContain('Second result from section 2');
303
+ expect(result).toContain('Third result from section 3');
304
+
305
+ // There should only be one "Results from" heading for this page
306
+ const resultsFromCount = (result.match(/### Results from/g) || []).length;
307
+ expect(resultsFromCount).toBe(1);
308
+ });
309
+
310
+ it('should handle API errors gracefully', async () => {
311
+ mockFetch.mockRejectedValueOnce(new Error('Network error'));
312
+
313
+ await expect(docSearchTool.search({ query: 'test' })).rejects.toThrow('Failed to search documentation:');
314
+ });
315
+ });
316
+
317
+ describe('groupResults', () => {
318
+ it('should group results by product and page URL', async () => {
319
+ const sampleResults = [
320
+ {
321
+ text: 'Result 1',
322
+ product: 'hub',
323
+ heading1: 'Page 1',
324
+ source_page_url: 'https://example.com/page1',
325
+ source_page_title: 'Page 1',
326
+ },
327
+ {
328
+ text: 'Result 2',
329
+ product: 'hub',
330
+ heading1: 'Page 1',
331
+ source_page_url: 'https://example.com/page1',
332
+ source_page_title: 'Page 1',
333
+ },
334
+ {
335
+ text: 'Result 3',
336
+ product: 'transformers',
337
+ heading1: 'Page 2',
338
+ source_page_url: 'https://example.com/page2',
339
+ source_page_title: 'Page 2',
340
+ },
341
+ ];
342
+
343
+ mockFetch.mockResolvedValueOnce({
344
+ ok: true,
345
+ json: () => Promise.resolve(sampleResults),
346
+ });
347
+
348
+ const result = await docSearchTool.search({ query: 'test' });
349
+
350
+ // Verify grouping structure in output
351
+ expect(result).toContain('## Results for Product: hub');
352
+ expect(result).toContain('## Results for Product: transformers');
353
+
354
+ // Verify that both results from the same page are together
355
+ const result1Index = result.indexOf('Result 1');
356
+ const result2Index = result.indexOf('Result 2');
357
+ const result3Index = result.indexOf('Result 3');
358
+
359
+ // Results 1 and 2 should be close together (same page)
360
+ expect(Math.abs(result2Index - result1Index)).toBeLessThan(100);
361
+ // Result 3 should be further away (different product)
362
+ expect(Math.abs(result3Index - result1Index)).toBeGreaterThan(50);
363
+ });
364
+ });
365
+ });
@@ -0,0 +1,244 @@
1
+ import { z } from 'zod';
2
+ import { HfApiCall } from '../hf-api-call.js';
3
+ import { escapeMarkdown } from '../utilities.js';
4
+ import { DOC_FETCH_CONFIG } from './doc-fetch.js';
5
+
6
+ export const DOCS_SEMANTIC_SEARCH_CONFIG = {
7
+ name: 'hf_doc_search',
8
+ description: 'Search the Hugging Face documentation library. Returns excerpts grouped by Product and Document.',
9
+ schema: z.object({
10
+ query: z
11
+ .string()
12
+ .min(3, 'Supply at least one search term')
13
+ .max(200, 'Query too long')
14
+ .describe('Semantic search query'),
15
+ product: z
16
+ .string()
17
+ .optional()
18
+ .describe(
19
+ 'Filter by Product (e.g., "hub", "dataset-viewer", "transformers"). Supply when known for focused results'
20
+ ),
21
+ }),
22
+ annotations: {
23
+ title: 'Hugging Face Documentation Library Search',
24
+ destructiveHint: false,
25
+ readOnlyHint: true,
26
+ openWorldHint: true,
27
+ },
28
+ } as const;
29
+
30
+ export type DocSearchParams = z.infer<typeof DOCS_SEMANTIC_SEARCH_CONFIG.schema>;
31
+
32
+ interface DocSearchResult {
33
+ text: string;
34
+ product: string;
35
+ heading1: string;
36
+ source_page_url: string;
37
+ source_page_title: string;
38
+ heading2?: string;
39
+ }
40
+
41
+ interface DocSearchApiParams {
42
+ q: string;
43
+ product?: string;
44
+ }
45
+
46
+ /**
47
+ * Use the Hugging Face Semantic Document Search API
48
+ */
49
+ export class DocSearchTool extends HfApiCall<DocSearchApiParams, DocSearchResult[]> {
50
+ /**
51
+ * @param apiUrl The URL of the Hugging Face document search API
52
+ * @param hfToken Optional Hugging Face token for API access
53
+ */
54
+ constructor(hfToken?: string, apiUrl = 'https://hf.co/api/docs/search') {
55
+ super(apiUrl, hfToken);
56
+ }
57
+
58
+ /**
59
+ * @param query Search query string (e.g. "rate limits", "analytics")
60
+ * @param product Optional product filter
61
+ */
62
+ async search(params: DocSearchParams): Promise<string> {
63
+ try {
64
+ if (!params.query) return 'No query provided';
65
+
66
+ const apiParams: DocSearchApiParams = { q: params.query.toLowerCase() };
67
+ if (params.product) {
68
+ apiParams.product = params.product;
69
+ }
70
+
71
+ const results = await this.callApi<DocSearchResult[]>(apiParams);
72
+
73
+ if (results.length === 0) {
74
+ return params.product
75
+ ? `No documentation found for query '${params.query}' in product '${params.product}'`
76
+ : `No documentation found for query '${params.query}'`;
77
+ }
78
+
79
+ return formatSearchResults(params.query, results, params.product);
80
+ } catch (error) {
81
+ if (error instanceof Error) {
82
+ throw new Error(`Failed to search documentation: ${error.message}`);
83
+ }
84
+ throw error;
85
+ }
86
+ }
87
+ }
88
+
89
+ /**
90
+ * Group results by product and source page URL
91
+ */
92
+ function groupResults(results: DocSearchResult[]): Map<string, Map<string, DocSearchResult[]>> {
93
+ const grouped = new Map<string, Map<string, DocSearchResult[]>>();
94
+
95
+ for (const result of results) {
96
+ if (!grouped.has(result.product)) {
97
+ grouped.set(result.product, new Map());
98
+ }
99
+
100
+ const productGroup = grouped.get(result.product);
101
+ if (!productGroup) continue;
102
+
103
+ // Strip the anchor (#section) from the URL for grouping purposes
104
+ const baseUrl = result.source_page_url.split('#')[0] || result.source_page_url;
105
+
106
+ if (!productGroup.has(baseUrl)) {
107
+ productGroup.set(baseUrl, []);
108
+ }
109
+
110
+ const pageResults = productGroup.get(baseUrl);
111
+ if (pageResults) {
112
+ pageResults.push(result);
113
+ }
114
+ }
115
+
116
+ return grouped;
117
+ }
118
+
119
+ /**
120
+ * Group page results by section (heading2)
121
+ */
122
+ function groupBySection(pageResults: DocSearchResult[]): Map<string | undefined, DocSearchResult[]> {
123
+ const sectionGroups = new Map<string | undefined, DocSearchResult[]>();
124
+
125
+ for (const result of pageResults) {
126
+ const section = result.heading2;
127
+ if (!sectionGroups.has(section)) {
128
+ sectionGroups.set(section, []);
129
+ }
130
+ const sectionResults = sectionGroups.get(section);
131
+ if (sectionResults) {
132
+ sectionResults.push(result);
133
+ }
134
+ }
135
+
136
+ return sectionGroups;
137
+ }
138
+
139
+ /**
140
+ * Format excerpts from a section
141
+ */
142
+ function formatSectionExcerpts(section: string | undefined, results: DocSearchResult[]): string {
143
+ const lines: string[] = [];
144
+
145
+ // Add section heading if present
146
+ if (section) {
147
+ if (results.length > 1) {
148
+ lines.push(`#### Excerpts from the "${escapeMarkdown(section)}" section`);
149
+ } else {
150
+ lines.push(`#### Excerpt from the "${escapeMarkdown(section)}" section`);
151
+ }
152
+ lines.push('');
153
+ }
154
+
155
+ // Add all excerpts from this section
156
+ for (const result of results) {
157
+ // Clean up the text - remove HTML tags if any
158
+ const cleanText = result.text
159
+ .replace(/<[^>]*>/g, '')
160
+ .replace(/\n\s*\n/g, '\n')
161
+ .trim();
162
+
163
+ lines.push(cleanText);
164
+ lines.push('');
165
+ }
166
+
167
+ return lines.join('\n');
168
+ }
169
+
170
+ /**
171
+ * Format search results grouped by product and page
172
+ */
173
+ function formatSearchResults(query: string, results: DocSearchResult[], productFilter?: string): string {
174
+ const lines: string[] = [];
175
+
176
+ // Header
177
+ const filterText = productFilter ? ` (filtered by product: ${productFilter})` : '';
178
+ lines.push(`# Documentation Library Search Results for "${escapeMarkdown(query)}"${filterText}`);
179
+ lines.push('');
180
+ lines.push(`Found ${results.length} results`);
181
+ lines.push('');
182
+
183
+ // Group results
184
+ const grouped = groupResults(results);
185
+
186
+ // Sort products by count (most hits first)
187
+ const sortedProducts = Array.from(grouped.keys()).sort((a, b) => {
188
+ const productGroupA = grouped.get(a);
189
+ const productGroupB = grouped.get(b);
190
+ if (!productGroupA || !productGroupB) return 0;
191
+
192
+ const countA = Array.from(productGroupA.values()).reduce((sum, arr) => sum + arr.length, 0);
193
+ const countB = Array.from(productGroupB.values()).reduce((sum, arr) => sum + arr.length, 0);
194
+ return countB - countA; // Descending order
195
+ });
196
+
197
+ for (const product of sortedProducts) {
198
+ const productGroup = grouped.get(product);
199
+ if (!productGroup) continue;
200
+
201
+ const totalProductHits = Array.from(productGroup.values()).reduce((sum, arr) => sum + arr.length, 0);
202
+ lines.push(`## Results for Product: ${escapeMarkdown(product)} (${totalProductHits} results)`);
203
+ lines.push('');
204
+
205
+ // Sort URLs within each product by count (most hits first)
206
+ const sortedUrls = Array.from(productGroup.keys()).sort((a, b) => {
207
+ const pageResultsA = productGroup.get(a);
208
+ const pageResultsB = productGroup.get(b);
209
+ if (!pageResultsA || !pageResultsB) return 0;
210
+ return pageResultsB.length - pageResultsA.length;
211
+ });
212
+
213
+ for (const url of sortedUrls) {
214
+ const pageResults = productGroup.get(url);
215
+ if (!pageResults || pageResults.length === 0) continue;
216
+ const firstResult = pageResults[0];
217
+
218
+ // Skip if no results (shouldn't happen but TypeScript safety)
219
+ if (!firstResult) continue;
220
+
221
+ // Page header with link and hit count
222
+ const pageTitle = firstResult.heading1 || firstResult.source_page_title;
223
+ const hitCount = pageResults.length > 1 ? ` (${pageResults.length} results)` : '';
224
+ // Use the base URL (without anchor) for the page link
225
+ lines.push(`### Results from [${escapeMarkdown(pageTitle)}](${url})${hitCount}`);
226
+ lines.push('');
227
+
228
+ // Group results by section and format them
229
+ const sectionGroups = groupBySection(pageResults);
230
+
231
+ // Format each section's excerpts
232
+ for (const [section, sectionResults] of sectionGroups) {
233
+ lines.push(formatSectionExcerpts(section, sectionResults));
234
+ }
235
+ }
236
+ }
237
+
238
+ // Add suggestion to use doc fetch tool
239
+ lines.push('---');
240
+ lines.push('');
241
+ lines.push(`Use the "${DOC_FETCH_CONFIG.name}" tool to fetch a document from the library.`);
242
+
243
+ return lines.join('\n');
244
+ }
package/src/index.ts CHANGED
@@ -13,6 +13,8 @@ export * from './space-info.js';
13
13
  export * from './space-files.js';
14
14
  export * from './user-summary.js';
15
15
  export * from './paper-summary.js';
16
+ export * from './docs-search/docs-semantic-search.js';
17
+ export * from './docs-search/doc-fetch.js';
16
18
 
17
19
  // Export tool IDs for external use - these are the canonical tool identifiers
18
20
  export * from './tool-ids.js';