npm - @llmindset/hf-mcp - Versions diffs - 0.3.10 → 0.3.12 - Mend

@llmindset/hf-mcp 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/dist/create-repo.d.ts +47 -0
package/dist/create-repo.d.ts.map +1 -0
package/dist/create-repo.js +83 -0
package/dist/create-repo.js.map +1 -0
package/dist/create-repo.test.d.ts +2 -0
package/dist/create-repo.test.d.ts.map +1 -0
package/dist/create-repo.test.js +155 -0
package/dist/create-repo.test.js.map +1 -0
package/dist/dataset-detail.js +1 -1
package/dist/dataset-detail.js.map +1 -1
package/dist/dataset-viewer-inspect.d.ts +48 -0
package/dist/dataset-viewer-inspect.d.ts.map +1 -0
package/dist/dataset-viewer-inspect.js +660 -0
package/dist/dataset-viewer-inspect.js.map +1 -0
package/dist/dataset-viewer-inspect.test.d.ts +2 -0
package/dist/dataset-viewer-inspect.test.d.ts.map +1 -0
package/dist/dataset-viewer-inspect.test.js +218 -0
package/dist/dataset-viewer-inspect.test.js.map +1 -0
package/dist/gradio-files.d.ts +2 -2
package/dist/hub-inspect.d.ts +19 -2
package/dist/hub-inspect.d.ts.map +1 -1
package/dist/hub-inspect.js +68 -4
package/dist/hub-inspect.js.map +1 -1
package/dist/hub-inspect.test.d.ts +2 -0
package/dist/hub-inspect.test.d.ts.map +1 -0
package/dist/hub-inspect.test.js +24 -0
package/dist/hub-inspect.test.js.map +1 -0
package/dist/index.browser.d.ts +5 -3
package/dist/index.browser.d.ts.map +1 -1
package/dist/index.browser.js +16 -1
package/dist/index.browser.js.map +1 -1
package/dist/index.d.ts +2 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +2 -0
package/dist/index.js.map +1 -1
package/dist/jobs/types.d.ts +23 -23
package/dist/jobs/types.d.ts.map +1 -1
package/dist/jobs/types.js +7 -2
package/dist/jobs/types.js.map +1 -1
package/dist/model-detail.js +1 -1
package/dist/model-detail.js.map +1 -1
package/dist/readme-utils.d.ts +1 -1
package/dist/readme-utils.d.ts.map +1 -1
package/dist/readme-utils.js +2 -13
package/dist/readme-utils.js.map +1 -1
package/dist/repo-search.d.ts +2 -2
package/dist/tool-ids.d.ts +4 -3
package/dist/tool-ids.d.ts.map +1 -1
package/dist/tool-ids.js +4 -1
package/dist/tool-ids.js.map +1 -1
package/package.json +1 -1
package/src/create-repo.test.ts +182 -0
package/src/create-repo.ts +103 -0
package/src/dataset-detail.ts +1 -1
package/src/dataset-viewer-inspect.test.ts +234 -0
package/src/dataset-viewer-inspect.ts +809 -0
package/src/hub-inspect.test.ts +28 -0
package/src/hub-inspect.ts +88 -4
package/src/index.browser.ts +18 -1
package/src/index.ts +2 -0
package/src/jobs/types.ts +10 -2
package/src/model-detail.ts +1 -1
package/src/readme-utils.ts +2 -32
package/src/tool-ids.ts +4 -0

package/src/hub-inspect.test.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { describe, expect, it } from 'vitest';
+import { HUB_REPO_DETAILS_TOOL_CONFIG } from './hub-inspect.js';
+describe('HUB_REPO_DETAILS_TOOL_CONFIG', () => {
+	it('defaults to overview and accepts dataset viewer operations', () => {
+		const parsed = HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
+			repo_ids: ['rajpurkar/squad'],
+			repo_type: 'dataset',
+			operations: ['dataset_structure', 'dataset_preview'],
+			config: 'plain_text',
+			split: 'train',
+			offset: 0,
+			limit: 5,
+		});
+		expect(parsed.include_readme).toBe(false);
+		expect(parsed.operations).toEqual(['dataset_structure', 'dataset_preview']);
+	});
+	it('does not expose a redundant readme operation', () => {
+		expect(() =>
+			HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
+				repo_ids: ['rajpurkar/squad'],
+				operations: ['readme'],
+			})
+		).toThrow();
+	});
+});

package/src/hub-inspect.ts CHANGED Viewed

@@ -4,12 +4,17 @@ import { ModelDetailTool } from './model-detail.js';
 import { DatasetDetailTool } from './dataset-detail.js';
 import { spaceInfo } from '@huggingface/hub';
 import { formatDate } from './utilities.js';
+import { DatasetViewerInspector } from './dataset-viewer-inspect.js';
+const HUB_INSPECT_OPERATIONS = ['overview', 'dataset_structure', 'dataset_preview'] as const;
 export const HUB_REPO_DETAILS_TOOL_CONFIG = {
 	name: 'hub_repo_details',
 	description:
 		'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
-		'Auto-detects type unless specified.',
+		'Auto-detects type unless specified. For datasets, use operations: overview, dataset_structure, dataset_preview. ' +
+		'Use dataset_structure first to discover configs, splits, sizes, and schema. Use dataset_preview only when ' +
+		'config and split are known, unless the dataset has a single config/split.',
 	schema: z.object({
 		repo_ids: z
 			.array(z.string().min(1))
@@ -18,6 +23,26 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG = {
 			.describe('Repo IDs for (models|dataset/space) - usually in author/name format (e.g. openai/gpt-oss-120b)'),
 		repo_type: z.enum(['model', 'dataset', 'space']).optional().describe('Specify lookup type; otherwise auto-detects'),
 		include_readme: z.boolean().default(false).describe('Include README from the repo'),
+		operations: z
+			.array(z.enum(HUB_INSPECT_OPERATIONS))
+			.optional()
+			.describe(
+				'Details to return. Defaults to ["overview"]. For datasets, prefer ["overview", "dataset_structure"] first; then call ["dataset_preview"] with config and split.'
+			),
+		config: z
+			.string()
+			.optional()
+			.describe(
+				'Dataset Viewer config. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
+			),
+		split: z
+			.string()
+			.optional()
+			.describe(
+				'Dataset Viewer split. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
+			),
+		offset: z.number().int().nonnegative().optional().describe('Row offset for dataset_preview. Defaults to 0.'),
+		limit: z.number().int().optional().describe('Row count for dataset_preview. Defaults to 5 and is clamped to 1-100.'),
 	}),
 	annotations: {
 		title: 'Hub Repo Details',
@@ -32,11 +57,13 @@ export type HubInspectParams = z.infer<typeof HUB_REPO_DETAILS_TOOL_CONFIG.schem
 export class HubInspectTool {
 	private readonly modelDetail: ModelDetailTool;
 	private readonly datasetDetail: DatasetDetailTool;
+	private readonly datasetViewer: DatasetViewerInspector;
 	private readonly hubUrl?: string;
 	constructor(hfToken?: string, hubUrl?: string) {
 		this.modelDetail = new ModelDetailTool(hfToken, hubUrl);
 		this.datasetDetail = new DatasetDetailTool(hfToken, hubUrl);
+		this.datasetViewer = new DatasetViewerInspector(hfToken, { hubUrl });
 		this.hubUrl = hubUrl;
 	}
@@ -46,7 +73,7 @@ export class HubInspectTool {
 		for (const id of params.repo_ids) {
 			try {
-				const section = await this.inspectSingle(id, params.repo_type, includeReadme);
+				const section = await this.inspectSingle(id, params, includeReadme);
 				parts.push(section);
 				successCount += 1;
 			} catch (err) {
@@ -66,20 +93,30 @@ export class HubInspectTool {
 	private async inspectSingle(
 		repoId: string,
-		type: 'model' | 'dataset' | 'space' | undefined,
+		params: HubInspectParams,
 		includeReadme: boolean
 	): Promise<string> {
+		const type = params.repo_type;
+		const operations = normalizeOperations(params.operations);
+		const hasDatasetOperation = operations.some((operation) => operation === 'dataset_structure' || operation === 'dataset_preview');
 		// If caller constrained the type, do only that
 		if (type === 'model') {
+			if (hasDatasetOperation) return operationMismatch(repoId, 'model', operations);
 			return (await this.modelDetail.getDetails(repoId, includeReadme)).formatted;
 		}
 		if (type === 'dataset') {
-			return (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
+			return await this.getDatasetDetails(repoId, params, includeReadme, operations);
 		}
 		if (type === 'space') {
+			if (hasDatasetOperation) return operationMismatch(repoId, 'space', operations);
 			return await this.getSpaceDetails(repoId);
 		}
+		if (hasDatasetOperation) {
+			return await this.getDatasetDetails(repoId, params, includeReadme, operations);
+		}
 		// Auto-detect: attempt all three and aggregate. The same id may exist for multiple types.
 		const matches: string[] = [];
@@ -111,6 +148,33 @@ export class HubInspectTool {
 		return matches.join('\n\n---\n\n');
 	}
+	private async getDatasetDetails(
+		repoId: string,
+		params: HubInspectParams,
+		includeReadme: boolean,
+		operations: HubInspectOperation[]
+	): Promise<string> {
+		const sections: string[] = [];
+		if (operations.includes('overview')) {
+			const overview = (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
+			sections.push(`${overview}\n\n${datasetDrillDownHint()}`);
+		}
+		if (operations.includes('dataset_structure')) {
+			sections.push(await this.datasetViewer.getStructure(repoId, { config: params.config, split: params.split }));
+		}
+		if (operations.includes('dataset_preview')) {
+			sections.push(
+				await this.datasetViewer.getPreview(repoId, {
+					config: params.config,
+					split: params.split,
+					offset: params.offset,
+					limit: params.limit,
+				})
+			);
+		}
+		return sections.join('\n\n');
+	}
 	private async getSpaceDetails(spaceId: string): Promise<string> {
 		const additionalFields = ['author', 'tags', 'runtime', 'subdomain', 'sha'] as const;
 		const info = await spaceInfo<(typeof additionalFields)[number]>({
@@ -142,3 +206,23 @@ export class HubInspectTool {
 		return lines.join('\n');
 	}
 }
+type HubInspectOperation = (typeof HUB_INSPECT_OPERATIONS)[number];
+function normalizeOperations(operations: readonly HubInspectOperation[] | undefined): HubInspectOperation[] {
+	return operations && operations.length > 0 ? [...new Set(operations)] : ['overview'];
+}
+function operationMismatch(repoId: string, type: 'model' | 'space', operations: HubInspectOperation[]): string {
+	const requested = operations.filter((operation) => operation.startsWith('dataset_')).join(', ');
+	return `# ${repoId}\n\nRequested dataset operation(s) \`${requested}\`, but this repo was requested as a ${type}. Dataset Viewer operations only apply to dataset repos.`;
+}
+function datasetDrillDownHint(): string {
+	return [
+		'## Available deeper inspections',
+		'Call `hub_repo_details` with:',
+		'- `operations: ["dataset_structure"]` for configs, splits, sizes, parquet exports, and schema.',
+		'- `operations: ["dataset_preview"]` with `config` and `split` for sample rows.',
+	].join('\n');
+}

package/src/index.browser.ts CHANGED Viewed

@@ -44,6 +44,19 @@ export const REPO_SEARCH_TOOL_CONFIG: BrowserToolConfig = {
 	},
 };
+export const CREATE_REPO_TOOL_CONFIG: BrowserToolConfig = {
+	name: 'create_repo',
+	description:
+		'Create a Hugging Face model, dataset, Space, or bucket repository. ' +
+		"name must be fully qualified, for example 'username/repo-name'.",
+	annotations: {
+		title: 'Create Hugging Face Repository',
+		destructiveHint: false,
+		readOnlyHint: false,
+		openWorldHint: true,
+	},
+};
 export const PAPER_SEARCH_TOOL_CONFIG: BrowserToolConfig = {
 	name: 'paper_search',
 	description:
@@ -62,7 +75,8 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG: BrowserToolConfig = {
 	name: 'hub_repo_details',
 	description:
 		'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
-		'Auto-detects type unless specified.',
+		'Auto-detects type unless specified. For datasets, use dataset_structure first to discover configs, splits, ' +
+		'sizes, and schema. Use dataset_preview only when config and split are known, unless the dataset has a single config/split.',
 	annotations: {
 		title: 'Hub Repo Details',
 		destructiveHint: false,
@@ -121,6 +135,7 @@ export const DOC_FETCH_CONFIG: BrowserToolConfig = {
 export const SPACE_SEARCH_TOOL_ID = SEMANTIC_SEARCH_TOOL_CONFIG.name;
 export const MODEL_SEARCH_TOOL_ID = 'model_search';
 export const REPO_SEARCH_TOOL_ID = REPO_SEARCH_TOOL_CONFIG.name;
+export const CREATE_REPO_TOOL_ID = CREATE_REPO_TOOL_CONFIG.name;
 export const MODEL_DETAIL_TOOL_ID = 'model_details';
 export const PAPER_SEARCH_TOOL_ID = PAPER_SEARCH_TOOL_CONFIG.name;
 export const DATASET_SEARCH_TOOL_ID = 'dataset_search';
@@ -139,6 +154,7 @@ export const ALL_BUILTIN_TOOL_IDS = [
 	SPACE_SEARCH_TOOL_ID,
 	MODEL_SEARCH_TOOL_ID,
 	REPO_SEARCH_TOOL_ID,
+	CREATE_REPO_TOOL_ID,
 	MODEL_DETAIL_TOOL_ID,
 	PAPER_SEARCH_TOOL_ID,
 	DATASET_SEARCH_TOOL_ID,
@@ -168,6 +184,7 @@ export const TOOL_ID_GROUPS = {
 	hf_api: [
 		SPACE_SEARCH_TOOL_ID,
 		REPO_SEARCH_TOOL_ID,
+		CREATE_REPO_TOOL_ID,
 		PAPER_SEARCH_TOOL_ID,
 		HUB_REPO_DETAILS_TOOL_ID,
 		DOCS_SEMANTIC_SEARCH_TOOL_ID,

package/src/index.ts CHANGED Viewed

@@ -8,7 +8,9 @@ export * from './utilities.js';
 export * from './paper-search.js';
 export * from './dataset-search.js';
 export * from './repo-search.js';
+export * from './create-repo.js';
 export * from './dataset-detail.js';
+export * from './dataset-viewer-inspect.js';
 export * from './hub-inspect.js';
 export * from './duplicate-space.js';
 export * from './space-info.js';

package/src/jobs/types.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import type { SpaceHardwareFlavor } from '@huggingface/hub';
 import { z } from 'zod';
 /**
@@ -20,8 +21,8 @@ export const GPU_FLAVORS = [
 	'a10g-largex2',
 	'a10g-largex4',
 	'a100-large',
-	'h100',
-	'h100x8',
+	'a100x4',
+	'a100x8',
 ] as const;
 export const SPECIALIZED_FLAVORS = ['inf2x6'] as const;
@@ -30,6 +31,13 @@ export const ALL_FLAVORS = [...CPU_FLAVORS, ...GPU_FLAVORS, ...SPECIALIZED_FLAVO
 export type JobFlavor = (typeof ALL_FLAVORS)[number];
+function assertExhaustiveHardwareUnion<T extends never>(_value?: T): void {
+	void _value;
+}
+assertExhaustiveHardwareUnion<Exclude<SpaceHardwareFlavor, JobFlavor>>();
+assertExhaustiveHardwareUnion<Exclude<JobFlavor, SpaceHardwareFlavor>>();
 /**
  * Job status stages (from OpenAPI spec)
  */

package/src/model-detail.ts CHANGED Viewed

@@ -259,7 +259,7 @@ export class ModelDetailTool {
 			// Fetch and append README content if requested
 			if (includeReadme) {
-				const readmeContent = await fetchReadmeContent(modelDetails.name, 'models', false);
+				const readmeContent = await fetchReadmeContent(modelDetails.name, 'models');
 				if (readmeContent) {
 					const result = formatModelDetails(modelDetails);
 					result.formatted += '\n\n## README\n<modelcard-readme>\n\n' + readmeContent.trim() + '\n</modelcard-readme>';

package/src/readme-utils.ts CHANGED Viewed

@@ -5,21 +5,16 @@
 import { fetchWithProfile, NETWORK_FETCH_PROFILES } from './network/fetch-profile.js';
 // Maximum number of characters to include from a README
-const DEFAULT_MAX_README_CHARS = 10_000;
+const DEFAULT_MAX_README_CHARS = 40_000;
 /**
  * Fetches README content from a Hugging Face repository
  *
  * @param repoName The resolved repository name (e.g., 'rajpurkar/squad', 'openai-community/gpt2')
  * @param type The repository type ('models' or 'datasets')
- * @param includeYaml Whether to include YAML frontmatter (default: false)
  * @returns Promise<string | null> The README content or null if not found/error
  */
-export async function fetchReadmeContent(
-	repoName: string,
-	type: 'models' | 'datasets',
-	includeYaml: boolean = false
-): Promise<string | null> {
+export async function fetchReadmeContent(repoName: string, type: 'models' | 'datasets'): Promise<string | null> {
 	try {
 		// Construct the URL based on repository type
 		const baseUrl =
@@ -39,11 +34,6 @@ export async function fetchReadmeContent(
 		let content = await response.text();
-		// If includeYaml is false, strip YAML frontmatter
-		if (!includeYaml) {
-			content = stripYamlFrontmatter(content);
-		}
 		// Truncate overly long READMEs to a sensible default size
 		if (content.length > DEFAULT_MAX_README_CHARS) {
 			const truncated = content.slice(0, DEFAULT_MAX_README_CHARS);
@@ -62,23 +52,3 @@ export async function fetchReadmeContent(
 		return null;
 	}
 }
-/**
- * Strips YAML frontmatter from markdown content
- *
- * @param content The full markdown content
- * @returns The content with YAML frontmatter removed
- */
-function stripYamlFrontmatter(content: string): string {
-	// Match YAML frontmatter: starts with ---, ends with ---
-	const yamlPattern = /^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))/;
-	const match = content.match(yamlPattern);
-	if (match) {
-		// Return everything after the closing ---
-		return content.substring(match[0].length);
-	}
-	// No YAML frontmatter found, return original content
-	return content;
-}

package/src/tool-ids.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import {
 	MODEL_DETAIL_PROMPT_CONFIG,
 	PAPER_SEARCH_TOOL_CONFIG,
 	REPO_SEARCH_TOOL_CONFIG,
+	CREATE_REPO_TOOL_CONFIG,
 	DATASET_SEARCH_TOOL_CONFIG,
 	DATASET_DETAIL_TOOL_CONFIG,
 	DATASET_DETAIL_PROMPT_CONFIG,
@@ -30,6 +31,7 @@ import {
 export const SPACE_SEARCH_TOOL_ID = SEMANTIC_SEARCH_TOOL_CONFIG.name;
 export const MODEL_SEARCH_TOOL_ID = MODEL_SEARCH_TOOL_CONFIG.name;
 export const REPO_SEARCH_TOOL_ID = REPO_SEARCH_TOOL_CONFIG.name;
+export const CREATE_REPO_TOOL_ID = CREATE_REPO_TOOL_CONFIG.name;
 export const MODEL_DETAIL_TOOL_ID = MODEL_DETAIL_TOOL_CONFIG.name;
 export const PAPER_SEARCH_TOOL_ID = PAPER_SEARCH_TOOL_CONFIG.name;
 export const DATASET_SEARCH_TOOL_ID = DATASET_SEARCH_TOOL_CONFIG.name;
@@ -53,6 +55,7 @@ export const ALL_BUILTIN_TOOL_IDS = [
 	SPACE_SEARCH_TOOL_ID,
 	MODEL_SEARCH_TOOL_ID,
 	REPO_SEARCH_TOOL_ID,
+	CREATE_REPO_TOOL_ID,
 	MODEL_DETAIL_TOOL_ID,
 	PAPER_SEARCH_TOOL_ID,
 	DATASET_SEARCH_TOOL_ID,
@@ -82,6 +85,7 @@ export const TOOL_ID_GROUPS = {
 	hf_api: [
 		SPACE_SEARCH_TOOL_ID,
 		REPO_SEARCH_TOOL_ID,
+		CREATE_REPO_TOOL_ID,
 		PAPER_SEARCH_TOOL_ID,
 		HUB_REPO_DETAILS_TOOL_ID,
 		DOCS_SEMANTIC_SEARCH_TOOL_ID,