@llmindset/hf-mcp 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/create-repo.d.ts +47 -0
  2. package/dist/create-repo.d.ts.map +1 -0
  3. package/dist/create-repo.js +83 -0
  4. package/dist/create-repo.js.map +1 -0
  5. package/dist/create-repo.test.d.ts +2 -0
  6. package/dist/create-repo.test.d.ts.map +1 -0
  7. package/dist/create-repo.test.js +155 -0
  8. package/dist/create-repo.test.js.map +1 -0
  9. package/dist/dataset-detail.js +1 -1
  10. package/dist/dataset-detail.js.map +1 -1
  11. package/dist/dataset-viewer-inspect.d.ts +48 -0
  12. package/dist/dataset-viewer-inspect.d.ts.map +1 -0
  13. package/dist/dataset-viewer-inspect.js +660 -0
  14. package/dist/dataset-viewer-inspect.js.map +1 -0
  15. package/dist/dataset-viewer-inspect.test.d.ts +2 -0
  16. package/dist/dataset-viewer-inspect.test.d.ts.map +1 -0
  17. package/dist/dataset-viewer-inspect.test.js +218 -0
  18. package/dist/dataset-viewer-inspect.test.js.map +1 -0
  19. package/dist/gradio-files.d.ts +2 -2
  20. package/dist/hub-inspect.d.ts +19 -2
  21. package/dist/hub-inspect.d.ts.map +1 -1
  22. package/dist/hub-inspect.js +68 -4
  23. package/dist/hub-inspect.js.map +1 -1
  24. package/dist/hub-inspect.test.d.ts +2 -0
  25. package/dist/hub-inspect.test.d.ts.map +1 -0
  26. package/dist/hub-inspect.test.js +24 -0
  27. package/dist/hub-inspect.test.js.map +1 -0
  28. package/dist/index.browser.d.ts +5 -3
  29. package/dist/index.browser.d.ts.map +1 -1
  30. package/dist/index.browser.js +16 -1
  31. package/dist/index.browser.js.map +1 -1
  32. package/dist/index.d.ts +2 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +2 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/jobs/types.d.ts +23 -23
  37. package/dist/jobs/types.d.ts.map +1 -1
  38. package/dist/jobs/types.js +7 -2
  39. package/dist/jobs/types.js.map +1 -1
  40. package/dist/model-detail.js +1 -1
  41. package/dist/model-detail.js.map +1 -1
  42. package/dist/readme-utils.d.ts +1 -1
  43. package/dist/readme-utils.d.ts.map +1 -1
  44. package/dist/readme-utils.js +2 -13
  45. package/dist/readme-utils.js.map +1 -1
  46. package/dist/repo-search.d.ts +2 -2
  47. package/dist/tool-ids.d.ts +4 -3
  48. package/dist/tool-ids.d.ts.map +1 -1
  49. package/dist/tool-ids.js +4 -1
  50. package/dist/tool-ids.js.map +1 -1
  51. package/package.json +1 -1
  52. package/src/create-repo.test.ts +182 -0
  53. package/src/create-repo.ts +103 -0
  54. package/src/dataset-detail.ts +1 -1
  55. package/src/dataset-viewer-inspect.test.ts +234 -0
  56. package/src/dataset-viewer-inspect.ts +809 -0
  57. package/src/hub-inspect.test.ts +28 -0
  58. package/src/hub-inspect.ts +88 -4
  59. package/src/index.browser.ts +18 -1
  60. package/src/index.ts +2 -0
  61. package/src/jobs/types.ts +10 -2
  62. package/src/model-detail.ts +1 -1
  63. package/src/readme-utils.ts +2 -32
  64. package/src/tool-ids.ts +4 -0
@@ -0,0 +1,28 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { HUB_REPO_DETAILS_TOOL_CONFIG } from './hub-inspect.js';
3
+
4
+ describe('HUB_REPO_DETAILS_TOOL_CONFIG', () => {
5
+ it('defaults to overview and accepts dataset viewer operations', () => {
6
+ const parsed = HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
7
+ repo_ids: ['rajpurkar/squad'],
8
+ repo_type: 'dataset',
9
+ operations: ['dataset_structure', 'dataset_preview'],
10
+ config: 'plain_text',
11
+ split: 'train',
12
+ offset: 0,
13
+ limit: 5,
14
+ });
15
+
16
+ expect(parsed.include_readme).toBe(false);
17
+ expect(parsed.operations).toEqual(['dataset_structure', 'dataset_preview']);
18
+ });
19
+
20
+ it('does not expose a redundant readme operation', () => {
21
+ expect(() =>
22
+ HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
23
+ repo_ids: ['rajpurkar/squad'],
24
+ operations: ['readme'],
25
+ })
26
+ ).toThrow();
27
+ });
28
+ });
@@ -4,12 +4,17 @@ import { ModelDetailTool } from './model-detail.js';
4
4
  import { DatasetDetailTool } from './dataset-detail.js';
5
5
  import { spaceInfo } from '@huggingface/hub';
6
6
  import { formatDate } from './utilities.js';
7
+ import { DatasetViewerInspector } from './dataset-viewer-inspect.js';
8
+
9
+ const HUB_INSPECT_OPERATIONS = ['overview', 'dataset_structure', 'dataset_preview'] as const;
7
10
 
8
11
  export const HUB_REPO_DETAILS_TOOL_CONFIG = {
9
12
  name: 'hub_repo_details',
10
13
  description:
11
14
  'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
12
- 'Auto-detects type unless specified.',
15
+ 'Auto-detects type unless specified. For datasets, use operations: overview, dataset_structure, dataset_preview. ' +
16
+ 'Use dataset_structure first to discover configs, splits, sizes, and schema. Use dataset_preview only when ' +
17
+ 'config and split are known, unless the dataset has a single config/split.',
13
18
  schema: z.object({
14
19
  repo_ids: z
15
20
  .array(z.string().min(1))
@@ -18,6 +23,26 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG = {
18
23
  .describe('Repo IDs for (models|dataset/space) - usually in author/name format (e.g. openai/gpt-oss-120b)'),
19
24
  repo_type: z.enum(['model', 'dataset', 'space']).optional().describe('Specify lookup type; otherwise auto-detects'),
20
25
  include_readme: z.boolean().default(false).describe('Include README from the repo'),
26
+ operations: z
27
+ .array(z.enum(HUB_INSPECT_OPERATIONS))
28
+ .optional()
29
+ .describe(
30
+ 'Details to return. Defaults to ["overview"]. For datasets, prefer ["overview", "dataset_structure"] first; then call ["dataset_preview"] with config and split.'
31
+ ),
32
+ config: z
33
+ .string()
34
+ .optional()
35
+ .describe(
36
+ 'Dataset Viewer config. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
37
+ ),
38
+ split: z
39
+ .string()
40
+ .optional()
41
+ .describe(
42
+ 'Dataset Viewer split. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
43
+ ),
44
+ offset: z.number().int().nonnegative().optional().describe('Row offset for dataset_preview. Defaults to 0.'),
45
+ limit: z.number().int().optional().describe('Row count for dataset_preview. Defaults to 5 and is clamped to 1-100.'),
21
46
  }),
22
47
  annotations: {
23
48
  title: 'Hub Repo Details',
@@ -32,11 +57,13 @@ export type HubInspectParams = z.infer<typeof HUB_REPO_DETAILS_TOOL_CONFIG.schem
32
57
  export class HubInspectTool {
33
58
  private readonly modelDetail: ModelDetailTool;
34
59
  private readonly datasetDetail: DatasetDetailTool;
60
+ private readonly datasetViewer: DatasetViewerInspector;
35
61
  private readonly hubUrl?: string;
36
62
 
37
63
  constructor(hfToken?: string, hubUrl?: string) {
38
64
  this.modelDetail = new ModelDetailTool(hfToken, hubUrl);
39
65
  this.datasetDetail = new DatasetDetailTool(hfToken, hubUrl);
66
+ this.datasetViewer = new DatasetViewerInspector(hfToken, { hubUrl });
40
67
  this.hubUrl = hubUrl;
41
68
  }
42
69
 
@@ -46,7 +73,7 @@ export class HubInspectTool {
46
73
 
47
74
  for (const id of params.repo_ids) {
48
75
  try {
49
- const section = await this.inspectSingle(id, params.repo_type, includeReadme);
76
+ const section = await this.inspectSingle(id, params, includeReadme);
50
77
  parts.push(section);
51
78
  successCount += 1;
52
79
  } catch (err) {
@@ -66,20 +93,30 @@ export class HubInspectTool {
66
93
 
67
94
  private async inspectSingle(
68
95
  repoId: string,
69
- type: 'model' | 'dataset' | 'space' | undefined,
96
+ params: HubInspectParams,
70
97
  includeReadme: boolean
71
98
  ): Promise<string> {
99
+ const type = params.repo_type;
100
+ const operations = normalizeOperations(params.operations);
101
+ const hasDatasetOperation = operations.some((operation) => operation === 'dataset_structure' || operation === 'dataset_preview');
102
+
72
103
  // If caller constrained the type, do only that
73
104
  if (type === 'model') {
105
+ if (hasDatasetOperation) return operationMismatch(repoId, 'model', operations);
74
106
  return (await this.modelDetail.getDetails(repoId, includeReadme)).formatted;
75
107
  }
76
108
  if (type === 'dataset') {
77
- return (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
109
+ return await this.getDatasetDetails(repoId, params, includeReadme, operations);
78
110
  }
79
111
  if (type === 'space') {
112
+ if (hasDatasetOperation) return operationMismatch(repoId, 'space', operations);
80
113
  return await this.getSpaceDetails(repoId);
81
114
  }
82
115
 
116
+ if (hasDatasetOperation) {
117
+ return await this.getDatasetDetails(repoId, params, includeReadme, operations);
118
+ }
119
+
83
120
  // Auto-detect: attempt all three and aggregate. The same id may exist for multiple types.
84
121
  const matches: string[] = [];
85
122
 
@@ -111,6 +148,33 @@ export class HubInspectTool {
111
148
  return matches.join('\n\n---\n\n');
112
149
  }
113
150
 
151
+ private async getDatasetDetails(
152
+ repoId: string,
153
+ params: HubInspectParams,
154
+ includeReadme: boolean,
155
+ operations: HubInspectOperation[]
156
+ ): Promise<string> {
157
+ const sections: string[] = [];
158
+ if (operations.includes('overview')) {
159
+ const overview = (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
160
+ sections.push(`${overview}\n\n${datasetDrillDownHint()}`);
161
+ }
162
+ if (operations.includes('dataset_structure')) {
163
+ sections.push(await this.datasetViewer.getStructure(repoId, { config: params.config, split: params.split }));
164
+ }
165
+ if (operations.includes('dataset_preview')) {
166
+ sections.push(
167
+ await this.datasetViewer.getPreview(repoId, {
168
+ config: params.config,
169
+ split: params.split,
170
+ offset: params.offset,
171
+ limit: params.limit,
172
+ })
173
+ );
174
+ }
175
+ return sections.join('\n\n');
176
+ }
177
+
114
178
  private async getSpaceDetails(spaceId: string): Promise<string> {
115
179
  const additionalFields = ['author', 'tags', 'runtime', 'subdomain', 'sha'] as const;
116
180
  const info = await spaceInfo<(typeof additionalFields)[number]>({
@@ -142,3 +206,23 @@ export class HubInspectTool {
142
206
  return lines.join('\n');
143
207
  }
144
208
  }
209
+
210
+ type HubInspectOperation = (typeof HUB_INSPECT_OPERATIONS)[number];
211
+
212
+ function normalizeOperations(operations: readonly HubInspectOperation[] | undefined): HubInspectOperation[] {
213
+ return operations && operations.length > 0 ? [...new Set(operations)] : ['overview'];
214
+ }
215
+
216
+ function operationMismatch(repoId: string, type: 'model' | 'space', operations: HubInspectOperation[]): string {
217
+ const requested = operations.filter((operation) => operation.startsWith('dataset_')).join(', ');
218
+ return `# ${repoId}\n\nRequested dataset operation(s) \`${requested}\`, but this repo was requested as a ${type}. Dataset Viewer operations only apply to dataset repos.`;
219
+ }
220
+
221
+ function datasetDrillDownHint(): string {
222
+ return [
223
+ '## Available deeper inspections',
224
+ 'Call `hub_repo_details` with:',
225
+ '- `operations: ["dataset_structure"]` for configs, splits, sizes, parquet exports, and schema.',
226
+ '- `operations: ["dataset_preview"]` with `config` and `split` for sample rows.',
227
+ ].join('\n');
228
+ }
@@ -44,6 +44,19 @@ export const REPO_SEARCH_TOOL_CONFIG: BrowserToolConfig = {
44
44
  },
45
45
  };
46
46
 
47
+ export const CREATE_REPO_TOOL_CONFIG: BrowserToolConfig = {
48
+ name: 'create_repo',
49
+ description:
50
+ 'Create a Hugging Face model, dataset, Space, or bucket repository. ' +
51
+ "name must be fully qualified, for example 'username/repo-name'.",
52
+ annotations: {
53
+ title: 'Create Hugging Face Repository',
54
+ destructiveHint: false,
55
+ readOnlyHint: false,
56
+ openWorldHint: true,
57
+ },
58
+ };
59
+
47
60
  export const PAPER_SEARCH_TOOL_CONFIG: BrowserToolConfig = {
48
61
  name: 'paper_search',
49
62
  description:
@@ -62,7 +75,8 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG: BrowserToolConfig = {
62
75
  name: 'hub_repo_details',
63
76
  description:
64
77
  'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
65
- 'Auto-detects type unless specified.',
78
+ 'Auto-detects type unless specified. For datasets, use dataset_structure first to discover configs, splits, ' +
79
+ 'sizes, and schema. Use dataset_preview only when config and split are known, unless the dataset has a single config/split.',
66
80
  annotations: {
67
81
  title: 'Hub Repo Details',
68
82
  destructiveHint: false,
@@ -121,6 +135,7 @@ export const DOC_FETCH_CONFIG: BrowserToolConfig = {
121
135
  export const SPACE_SEARCH_TOOL_ID = SEMANTIC_SEARCH_TOOL_CONFIG.name;
122
136
  export const MODEL_SEARCH_TOOL_ID = 'model_search';
123
137
  export const REPO_SEARCH_TOOL_ID = REPO_SEARCH_TOOL_CONFIG.name;
138
+ export const CREATE_REPO_TOOL_ID = CREATE_REPO_TOOL_CONFIG.name;
124
139
  export const MODEL_DETAIL_TOOL_ID = 'model_details';
125
140
  export const PAPER_SEARCH_TOOL_ID = PAPER_SEARCH_TOOL_CONFIG.name;
126
141
  export const DATASET_SEARCH_TOOL_ID = 'dataset_search';
@@ -139,6 +154,7 @@ export const ALL_BUILTIN_TOOL_IDS = [
139
154
  SPACE_SEARCH_TOOL_ID,
140
155
  MODEL_SEARCH_TOOL_ID,
141
156
  REPO_SEARCH_TOOL_ID,
157
+ CREATE_REPO_TOOL_ID,
142
158
  MODEL_DETAIL_TOOL_ID,
143
159
  PAPER_SEARCH_TOOL_ID,
144
160
  DATASET_SEARCH_TOOL_ID,
@@ -168,6 +184,7 @@ export const TOOL_ID_GROUPS = {
168
184
  hf_api: [
169
185
  SPACE_SEARCH_TOOL_ID,
170
186
  REPO_SEARCH_TOOL_ID,
187
+ CREATE_REPO_TOOL_ID,
171
188
  PAPER_SEARCH_TOOL_ID,
172
189
  HUB_REPO_DETAILS_TOOL_ID,
173
190
  DOCS_SEMANTIC_SEARCH_TOOL_ID,
package/src/index.ts CHANGED
@@ -8,7 +8,9 @@ export * from './utilities.js';
8
8
  export * from './paper-search.js';
9
9
  export * from './dataset-search.js';
10
10
  export * from './repo-search.js';
11
+ export * from './create-repo.js';
11
12
  export * from './dataset-detail.js';
13
+ export * from './dataset-viewer-inspect.js';
12
14
  export * from './hub-inspect.js';
13
15
  export * from './duplicate-space.js';
14
16
  export * from './space-info.js';
package/src/jobs/types.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import type { SpaceHardwareFlavor } from '@huggingface/hub';
1
2
  import { z } from 'zod';
2
3
 
3
4
  /**
@@ -20,8 +21,8 @@ export const GPU_FLAVORS = [
20
21
  'a10g-largex2',
21
22
  'a10g-largex4',
22
23
  'a100-large',
23
- 'h100',
24
- 'h100x8',
24
+ 'a100x4',
25
+ 'a100x8',
25
26
  ] as const;
26
27
 
27
28
  export const SPECIALIZED_FLAVORS = ['inf2x6'] as const;
@@ -30,6 +31,13 @@ export const ALL_FLAVORS = [...CPU_FLAVORS, ...GPU_FLAVORS, ...SPECIALIZED_FLAVO
30
31
 
31
32
  export type JobFlavor = (typeof ALL_FLAVORS)[number];
32
33
 
34
+ function assertExhaustiveHardwareUnion<T extends never>(_value?: T): void {
35
+ void _value;
36
+ }
37
+
38
+ assertExhaustiveHardwareUnion<Exclude<SpaceHardwareFlavor, JobFlavor>>();
39
+ assertExhaustiveHardwareUnion<Exclude<JobFlavor, SpaceHardwareFlavor>>();
40
+
33
41
  /**
34
42
  * Job status stages (from OpenAPI spec)
35
43
  */
@@ -259,7 +259,7 @@ export class ModelDetailTool {
259
259
 
260
260
  // Fetch and append README content if requested
261
261
  if (includeReadme) {
262
- const readmeContent = await fetchReadmeContent(modelDetails.name, 'models', false);
262
+ const readmeContent = await fetchReadmeContent(modelDetails.name, 'models');
263
263
  if (readmeContent) {
264
264
  const result = formatModelDetails(modelDetails);
265
265
  result.formatted += '\n\n## README\n<modelcard-readme>\n\n' + readmeContent.trim() + '\n</modelcard-readme>';
@@ -5,21 +5,16 @@
5
5
  import { fetchWithProfile, NETWORK_FETCH_PROFILES } from './network/fetch-profile.js';
6
6
 
7
7
  // Maximum number of characters to include from a README
8
- const DEFAULT_MAX_README_CHARS = 10_000;
8
+ const DEFAULT_MAX_README_CHARS = 40_000;
9
9
 
10
10
  /**
11
11
  * Fetches README content from a Hugging Face repository
12
12
  *
13
13
  * @param repoName The resolved repository name (e.g., 'rajpurkar/squad', 'openai-community/gpt2')
14
14
  * @param type The repository type ('models' or 'datasets')
15
- * @param includeYaml Whether to include YAML frontmatter (default: false)
16
15
  * @returns Promise<string | null> The README content or null if not found/error
17
16
  */
18
- export async function fetchReadmeContent(
19
- repoName: string,
20
- type: 'models' | 'datasets',
21
- includeYaml: boolean = false
22
- ): Promise<string | null> {
17
+ export async function fetchReadmeContent(repoName: string, type: 'models' | 'datasets'): Promise<string | null> {
23
18
  try {
24
19
  // Construct the URL based on repository type
25
20
  const baseUrl =
@@ -39,11 +34,6 @@ export async function fetchReadmeContent(
39
34
 
40
35
  let content = await response.text();
41
36
 
42
- // If includeYaml is false, strip YAML frontmatter
43
- if (!includeYaml) {
44
- content = stripYamlFrontmatter(content);
45
- }
46
-
47
37
  // Truncate overly long READMEs to a sensible default size
48
38
  if (content.length > DEFAULT_MAX_README_CHARS) {
49
39
  const truncated = content.slice(0, DEFAULT_MAX_README_CHARS);
@@ -62,23 +52,3 @@ export async function fetchReadmeContent(
62
52
  return null;
63
53
  }
64
54
  }
65
-
66
- /**
67
- * Strips YAML frontmatter from markdown content
68
- *
69
- * @param content The full markdown content
70
- * @returns The content with YAML frontmatter removed
71
- */
72
- function stripYamlFrontmatter(content: string): string {
73
- // Match YAML frontmatter: starts with ---, ends with ---
74
- const yamlPattern = /^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))/;
75
- const match = content.match(yamlPattern);
76
-
77
- if (match) {
78
- // Return everything after the closing ---
79
- return content.substring(match[0].length);
80
- }
81
-
82
- // No YAML frontmatter found, return original content
83
- return content;
84
- }
package/src/tool-ids.ts CHANGED
@@ -10,6 +10,7 @@ import {
10
10
  MODEL_DETAIL_PROMPT_CONFIG,
11
11
  PAPER_SEARCH_TOOL_CONFIG,
12
12
  REPO_SEARCH_TOOL_CONFIG,
13
+ CREATE_REPO_TOOL_CONFIG,
13
14
  DATASET_SEARCH_TOOL_CONFIG,
14
15
  DATASET_DETAIL_TOOL_CONFIG,
15
16
  DATASET_DETAIL_PROMPT_CONFIG,
@@ -30,6 +31,7 @@ import {
30
31
  export const SPACE_SEARCH_TOOL_ID = SEMANTIC_SEARCH_TOOL_CONFIG.name;
31
32
  export const MODEL_SEARCH_TOOL_ID = MODEL_SEARCH_TOOL_CONFIG.name;
32
33
  export const REPO_SEARCH_TOOL_ID = REPO_SEARCH_TOOL_CONFIG.name;
34
+ export const CREATE_REPO_TOOL_ID = CREATE_REPO_TOOL_CONFIG.name;
33
35
  export const MODEL_DETAIL_TOOL_ID = MODEL_DETAIL_TOOL_CONFIG.name;
34
36
  export const PAPER_SEARCH_TOOL_ID = PAPER_SEARCH_TOOL_CONFIG.name;
35
37
  export const DATASET_SEARCH_TOOL_ID = DATASET_SEARCH_TOOL_CONFIG.name;
@@ -53,6 +55,7 @@ export const ALL_BUILTIN_TOOL_IDS = [
53
55
  SPACE_SEARCH_TOOL_ID,
54
56
  MODEL_SEARCH_TOOL_ID,
55
57
  REPO_SEARCH_TOOL_ID,
58
+ CREATE_REPO_TOOL_ID,
56
59
  MODEL_DETAIL_TOOL_ID,
57
60
  PAPER_SEARCH_TOOL_ID,
58
61
  DATASET_SEARCH_TOOL_ID,
@@ -82,6 +85,7 @@ export const TOOL_ID_GROUPS = {
82
85
  hf_api: [
83
86
  SPACE_SEARCH_TOOL_ID,
84
87
  REPO_SEARCH_TOOL_ID,
88
+ CREATE_REPO_TOOL_ID,
85
89
  PAPER_SEARCH_TOOL_ID,
86
90
  HUB_REPO_DETAILS_TOOL_ID,
87
91
  DOCS_SEMANTIC_SEARCH_TOOL_ID,