@llmindset/hf-mcp 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/dataset-detail.js +1 -1
  2. package/dist/dataset-detail.js.map +1 -1
  3. package/dist/dataset-viewer-inspect.d.ts +48 -0
  4. package/dist/dataset-viewer-inspect.d.ts.map +1 -0
  5. package/dist/dataset-viewer-inspect.js +660 -0
  6. package/dist/dataset-viewer-inspect.js.map +1 -0
  7. package/dist/dataset-viewer-inspect.test.d.ts +2 -0
  8. package/dist/dataset-viewer-inspect.test.d.ts.map +1 -0
  9. package/dist/dataset-viewer-inspect.test.js +218 -0
  10. package/dist/dataset-viewer-inspect.test.js.map +1 -0
  11. package/dist/gradio-files.d.ts +2 -2
  12. package/dist/hub-inspect.d.ts +17 -0
  13. package/dist/hub-inspect.d.ts.map +1 -1
  14. package/dist/hub-inspect.js +68 -4
  15. package/dist/hub-inspect.js.map +1 -1
  16. package/dist/hub-inspect.test.d.ts +2 -0
  17. package/dist/hub-inspect.test.d.ts.map +1 -0
  18. package/dist/hub-inspect.test.js +24 -0
  19. package/dist/hub-inspect.test.js.map +1 -0
  20. package/dist/index.browser.d.ts.map +1 -1
  21. package/dist/index.browser.js +2 -1
  22. package/dist/index.browser.js.map +1 -1
  23. package/dist/index.d.ts +1 -0
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +1 -0
  26. package/dist/index.js.map +1 -1
  27. package/dist/model-detail.js +1 -1
  28. package/dist/model-detail.js.map +1 -1
  29. package/dist/readme-utils.d.ts +1 -1
  30. package/dist/readme-utils.d.ts.map +1 -1
  31. package/dist/readme-utils.js +2 -13
  32. package/dist/readme-utils.js.map +1 -1
  33. package/package.json +1 -1
  34. package/src/dataset-detail.ts +1 -1
  35. package/src/dataset-viewer-inspect.test.ts +234 -0
  36. package/src/dataset-viewer-inspect.ts +809 -0
  37. package/src/hub-inspect.test.ts +28 -0
  38. package/src/hub-inspect.ts +88 -4
  39. package/src/index.browser.ts +2 -1
  40. package/src/index.ts +1 -0
  41. package/src/model-detail.ts +1 -1
  42. package/src/readme-utils.ts +2 -32
@@ -0,0 +1,28 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { HUB_REPO_DETAILS_TOOL_CONFIG } from './hub-inspect.js';
3
+
4
+ describe('HUB_REPO_DETAILS_TOOL_CONFIG', () => {
5
+ it('defaults to overview and accepts dataset viewer operations', () => {
6
+ const parsed = HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
7
+ repo_ids: ['rajpurkar/squad'],
8
+ repo_type: 'dataset',
9
+ operations: ['dataset_structure', 'dataset_preview'],
10
+ config: 'plain_text',
11
+ split: 'train',
12
+ offset: 0,
13
+ limit: 5,
14
+ });
15
+
16
+ expect(parsed.include_readme).toBe(false);
17
+ expect(parsed.operations).toEqual(['dataset_structure', 'dataset_preview']);
18
+ });
19
+
20
+ it('does not expose a redundant readme operation', () => {
21
+ expect(() =>
22
+ HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
23
+ repo_ids: ['rajpurkar/squad'],
24
+ operations: ['readme'],
25
+ })
26
+ ).toThrow();
27
+ });
28
+ });
@@ -4,12 +4,17 @@ import { ModelDetailTool } from './model-detail.js';
4
4
  import { DatasetDetailTool } from './dataset-detail.js';
5
5
  import { spaceInfo } from '@huggingface/hub';
6
6
  import { formatDate } from './utilities.js';
7
+ import { DatasetViewerInspector } from './dataset-viewer-inspect.js';
8
+
9
+ const HUB_INSPECT_OPERATIONS = ['overview', 'dataset_structure', 'dataset_preview'] as const;
7
10
 
8
11
  export const HUB_REPO_DETAILS_TOOL_CONFIG = {
9
12
  name: 'hub_repo_details',
10
13
  description:
11
14
  'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
12
- 'Auto-detects type unless specified.',
15
+ 'Auto-detects type unless specified. For datasets, use operations: overview, dataset_structure, dataset_preview. ' +
16
+ 'Use dataset_structure first to discover configs, splits, sizes, and schema. Use dataset_preview only when ' +
17
+ 'config and split are known, unless the dataset has a single config/split.',
13
18
  schema: z.object({
14
19
  repo_ids: z
15
20
  .array(z.string().min(1))
@@ -18,6 +23,26 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG = {
18
23
  .describe('Repo IDs for (models|dataset/space) - usually in author/name format (e.g. openai/gpt-oss-120b)'),
19
24
  repo_type: z.enum(['model', 'dataset', 'space']).optional().describe('Specify lookup type; otherwise auto-detects'),
20
25
  include_readme: z.boolean().default(false).describe('Include README from the repo'),
26
+ operations: z
27
+ .array(z.enum(HUB_INSPECT_OPERATIONS))
28
+ .optional()
29
+ .describe(
30
+ 'Details to return. Defaults to ["overview"]. For datasets, prefer ["overview", "dataset_structure"] first; then call ["dataset_preview"] with config and split.'
31
+ ),
32
+ config: z
33
+ .string()
34
+ .optional()
35
+ .describe(
36
+ 'Dataset Viewer config. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
37
+ ),
38
+ split: z
39
+ .string()
40
+ .optional()
41
+ .describe(
42
+ 'Dataset Viewer split. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
43
+ ),
44
+ offset: z.number().int().nonnegative().optional().describe('Row offset for dataset_preview. Defaults to 0.'),
45
+ limit: z.number().int().optional().describe('Row count for dataset_preview. Defaults to 5 and is clamped to 1-100.'),
21
46
  }),
22
47
  annotations: {
23
48
  title: 'Hub Repo Details',
@@ -32,11 +57,13 @@ export type HubInspectParams = z.infer<typeof HUB_REPO_DETAILS_TOOL_CONFIG.schem
32
57
  export class HubInspectTool {
33
58
  private readonly modelDetail: ModelDetailTool;
34
59
  private readonly datasetDetail: DatasetDetailTool;
60
+ private readonly datasetViewer: DatasetViewerInspector;
35
61
  private readonly hubUrl?: string;
36
62
 
37
63
  constructor(hfToken?: string, hubUrl?: string) {
38
64
  this.modelDetail = new ModelDetailTool(hfToken, hubUrl);
39
65
  this.datasetDetail = new DatasetDetailTool(hfToken, hubUrl);
66
+ this.datasetViewer = new DatasetViewerInspector(hfToken, { hubUrl });
40
67
  this.hubUrl = hubUrl;
41
68
  }
42
69
 
@@ -46,7 +73,7 @@ export class HubInspectTool {
46
73
 
47
74
  for (const id of params.repo_ids) {
48
75
  try {
49
- const section = await this.inspectSingle(id, params.repo_type, includeReadme);
76
+ const section = await this.inspectSingle(id, params, includeReadme);
50
77
  parts.push(section);
51
78
  successCount += 1;
52
79
  } catch (err) {
@@ -66,20 +93,30 @@ export class HubInspectTool {
66
93
 
67
94
  private async inspectSingle(
68
95
  repoId: string,
69
- type: 'model' | 'dataset' | 'space' | undefined,
96
+ params: HubInspectParams,
70
97
  includeReadme: boolean
71
98
  ): Promise<string> {
99
+ const type = params.repo_type;
100
+ const operations = normalizeOperations(params.operations);
101
+ const hasDatasetOperation = operations.some((operation) => operation === 'dataset_structure' || operation === 'dataset_preview');
102
+
72
103
  // If caller constrained the type, do only that
73
104
  if (type === 'model') {
105
+ if (hasDatasetOperation) return operationMismatch(repoId, 'model', operations);
74
106
  return (await this.modelDetail.getDetails(repoId, includeReadme)).formatted;
75
107
  }
76
108
  if (type === 'dataset') {
77
- return (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
109
+ return await this.getDatasetDetails(repoId, params, includeReadme, operations);
78
110
  }
79
111
  if (type === 'space') {
112
+ if (hasDatasetOperation) return operationMismatch(repoId, 'space', operations);
80
113
  return await this.getSpaceDetails(repoId);
81
114
  }
82
115
 
116
+ if (hasDatasetOperation) {
117
+ return await this.getDatasetDetails(repoId, params, includeReadme, operations);
118
+ }
119
+
83
120
  // Auto-detect: attempt all three and aggregate. The same id may exist for multiple types.
84
121
  const matches: string[] = [];
85
122
 
@@ -111,6 +148,33 @@ export class HubInspectTool {
111
148
  return matches.join('\n\n---\n\n');
112
149
  }
113
150
 
151
+ private async getDatasetDetails(
152
+ repoId: string,
153
+ params: HubInspectParams,
154
+ includeReadme: boolean,
155
+ operations: HubInspectOperation[]
156
+ ): Promise<string> {
157
+ const sections: string[] = [];
158
+ if (operations.includes('overview')) {
159
+ const overview = (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
160
+ sections.push(`${overview}\n\n${datasetDrillDownHint()}`);
161
+ }
162
+ if (operations.includes('dataset_structure')) {
163
+ sections.push(await this.datasetViewer.getStructure(repoId, { config: params.config, split: params.split }));
164
+ }
165
+ if (operations.includes('dataset_preview')) {
166
+ sections.push(
167
+ await this.datasetViewer.getPreview(repoId, {
168
+ config: params.config,
169
+ split: params.split,
170
+ offset: params.offset,
171
+ limit: params.limit,
172
+ })
173
+ );
174
+ }
175
+ return sections.join('\n\n');
176
+ }
177
+
114
178
  private async getSpaceDetails(spaceId: string): Promise<string> {
115
179
  const additionalFields = ['author', 'tags', 'runtime', 'subdomain', 'sha'] as const;
116
180
  const info = await spaceInfo<(typeof additionalFields)[number]>({
@@ -142,3 +206,23 @@ export class HubInspectTool {
142
206
  return lines.join('\n');
143
207
  }
144
208
  }
209
+
210
+ type HubInspectOperation = (typeof HUB_INSPECT_OPERATIONS)[number];
211
+
212
+ function normalizeOperations(operations: readonly HubInspectOperation[] | undefined): HubInspectOperation[] {
213
+ return operations && operations.length > 0 ? [...new Set(operations)] : ['overview'];
214
+ }
215
+
216
+ function operationMismatch(repoId: string, type: 'model' | 'space', operations: HubInspectOperation[]): string {
217
+ const requested = operations.filter((operation) => operation.startsWith('dataset_')).join(', ');
218
+ return `# ${repoId}\n\nRequested dataset operation(s) \`${requested}\`, but this repo was requested as a ${type}. Dataset Viewer operations only apply to dataset repos.`;
219
+ }
220
+
221
+ function datasetDrillDownHint(): string {
222
+ return [
223
+ '## Available deeper inspections',
224
+ 'Call `hub_repo_details` with:',
225
+ '- `operations: ["dataset_structure"]` for configs, splits, sizes, parquet exports, and schema.',
226
+ '- `operations: ["dataset_preview"]` with `config` and `split` for sample rows.',
227
+ ].join('\n');
228
+ }
@@ -75,7 +75,8 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG: BrowserToolConfig = {
75
75
  name: 'hub_repo_details',
76
76
  description:
77
77
  'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
78
- 'Auto-detects type unless specified.',
78
+ 'Auto-detects type unless specified. For datasets, use dataset_structure first to discover configs, splits, ' +
79
+ 'sizes, and schema. Use dataset_preview only when config and split are known, unless the dataset has a single config/split.',
79
80
  annotations: {
80
81
  title: 'Hub Repo Details',
81
82
  destructiveHint: false,
package/src/index.ts CHANGED
@@ -10,6 +10,7 @@ export * from './dataset-search.js';
10
10
  export * from './repo-search.js';
11
11
  export * from './create-repo.js';
12
12
  export * from './dataset-detail.js';
13
+ export * from './dataset-viewer-inspect.js';
13
14
  export * from './hub-inspect.js';
14
15
  export * from './duplicate-space.js';
15
16
  export * from './space-info.js';
@@ -259,7 +259,7 @@ export class ModelDetailTool {
259
259
 
260
260
  // Fetch and append README content if requested
261
261
  if (includeReadme) {
262
- const readmeContent = await fetchReadmeContent(modelDetails.name, 'models', false);
262
+ const readmeContent = await fetchReadmeContent(modelDetails.name, 'models');
263
263
  if (readmeContent) {
264
264
  const result = formatModelDetails(modelDetails);
265
265
  result.formatted += '\n\n## README\n<modelcard-readme>\n\n' + readmeContent.trim() + '\n</modelcard-readme>';
@@ -5,21 +5,16 @@
5
5
  import { fetchWithProfile, NETWORK_FETCH_PROFILES } from './network/fetch-profile.js';
6
6
 
7
7
  // Maximum number of characters to include from a README
8
- const DEFAULT_MAX_README_CHARS = 10_000;
8
+ const DEFAULT_MAX_README_CHARS = 40_000;
9
9
 
10
10
  /**
11
11
  * Fetches README content from a Hugging Face repository
12
12
  *
13
13
  * @param repoName The resolved repository name (e.g., 'rajpurkar/squad', 'openai-community/gpt2')
14
14
  * @param type The repository type ('models' or 'datasets')
15
- * @param includeYaml Whether to include YAML frontmatter (default: false)
16
15
  * @returns Promise<string | null> The README content or null if not found/error
17
16
  */
18
- export async function fetchReadmeContent(
19
- repoName: string,
20
- type: 'models' | 'datasets',
21
- includeYaml: boolean = false
22
- ): Promise<string | null> {
17
+ export async function fetchReadmeContent(repoName: string, type: 'models' | 'datasets'): Promise<string | null> {
23
18
  try {
24
19
  // Construct the URL based on repository type
25
20
  const baseUrl =
@@ -39,11 +34,6 @@ export async function fetchReadmeContent(
39
34
 
40
35
  let content = await response.text();
41
36
 
42
- // If includeYaml is false, strip YAML frontmatter
43
- if (!includeYaml) {
44
- content = stripYamlFrontmatter(content);
45
- }
46
-
47
37
  // Truncate overly long READMEs to a sensible default size
48
38
  if (content.length > DEFAULT_MAX_README_CHARS) {
49
39
  const truncated = content.slice(0, DEFAULT_MAX_README_CHARS);
@@ -62,23 +52,3 @@ export async function fetchReadmeContent(
62
52
  return null;
63
53
  }
64
54
  }
65
-
66
- /**
67
- * Strips YAML frontmatter from markdown content
68
- *
69
- * @param content The full markdown content
70
- * @returns The content with YAML frontmatter removed
71
- */
72
- function stripYamlFrontmatter(content: string): string {
73
- // Match YAML frontmatter: starts with ---, ends with ---
74
- const yamlPattern = /^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))/;
75
- const match = content.match(yamlPattern);
76
-
77
- if (match) {
78
- // Return everything after the closing ---
79
- return content.substring(match[0].length);
80
- }
81
-
82
- // No YAML frontmatter found, return original content
83
- return content;
84
- }