npm - @llmindset/hf-mcp - Versions diffs - 0.3.11 → 0.3.13 - Mend

@llmindset/hf-mcp 0.3.11 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/dist/dataset-detail.js +1 -1
package/dist/dataset-detail.js.map +1 -1
package/dist/dataset-viewer-inspect.d.ts +48 -0
package/dist/dataset-viewer-inspect.d.ts.map +1 -0
package/dist/dataset-viewer-inspect.js +660 -0
package/dist/dataset-viewer-inspect.js.map +1 -0
package/dist/dataset-viewer-inspect.test.d.ts +2 -0
package/dist/dataset-viewer-inspect.test.d.ts.map +1 -0
package/dist/dataset-viewer-inspect.test.js +218 -0
package/dist/dataset-viewer-inspect.test.js.map +1 -0
package/dist/gradio-files.d.ts +2 -2
package/dist/hub-inspect.d.ts +17 -0
package/dist/hub-inspect.d.ts.map +1 -1
package/dist/hub-inspect.js +68 -4
package/dist/hub-inspect.js.map +1 -1
package/dist/hub-inspect.test.d.ts +2 -0
package/dist/hub-inspect.test.d.ts.map +1 -0
package/dist/hub-inspect.test.js +24 -0
package/dist/hub-inspect.test.js.map +1 -0
package/dist/index.browser.d.ts.map +1 -1
package/dist/index.browser.js +2 -1
package/dist/index.browser.js.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/jobs/commands/run.d.ts.map +1 -1
package/dist/jobs/commands/run.js +7 -1
package/dist/jobs/commands/run.js.map +1 -1
package/dist/jobs/commands/scheduled.d.ts.map +1 -1
package/dist/jobs/commands/scheduled.js +2 -0
package/dist/jobs/commands/scheduled.js.map +1 -1
package/dist/jobs/commands/utils.d.ts +3 -1
package/dist/jobs/commands/utils.d.ts.map +1 -1
package/dist/jobs/commands/utils.js +76 -1
package/dist/jobs/commands/utils.js.map +1 -1
package/dist/jobs/jobs-tool.d.ts.map +1 -1
package/dist/jobs/jobs-tool.js +60 -3
package/dist/jobs/jobs-tool.js.map +1 -1
package/dist/jobs/types.d.ts +22 -0
package/dist/jobs/types.d.ts.map +1 -1
package/dist/jobs/types.js +16 -1
package/dist/jobs/types.js.map +1 -1
package/dist/model-detail.js +1 -1
package/dist/model-detail.js.map +1 -1
package/dist/readme-utils.d.ts +1 -1
package/dist/readme-utils.d.ts.map +1 -1
package/dist/readme-utils.js +2 -13
package/dist/readme-utils.js.map +1 -1
package/package.json +1 -1
package/src/dataset-detail.ts +1 -1
package/src/dataset-viewer-inspect.test.ts +234 -0
package/src/dataset-viewer-inspect.ts +809 -0
package/src/hub-inspect.test.ts +28 -0
package/src/hub-inspect.ts +88 -4
package/src/index.browser.ts +2 -1
package/src/index.ts +1 -0
package/src/jobs/commands/run.ts +7 -1
package/src/jobs/commands/scheduled.ts +2 -0
package/src/jobs/commands/utils.ts +95 -5
package/src/jobs/jobs-tool.ts +60 -3
package/src/jobs/types.ts +35 -1
package/src/model-detail.ts +1 -1
package/src/readme-utils.ts +2 -32
package/test/jobs/command-translation.spec.ts +88 -2

package/src/hub-inspect.test.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { describe, expect, it } from 'vitest';
+import { HUB_REPO_DETAILS_TOOL_CONFIG } from './hub-inspect.js';
+describe('HUB_REPO_DETAILS_TOOL_CONFIG', () => {
+	it('defaults to overview and accepts dataset viewer operations', () => {
+		const parsed = HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
+			repo_ids: ['rajpurkar/squad'],
+			repo_type: 'dataset',
+			operations: ['dataset_structure', 'dataset_preview'],
+			config: 'plain_text',
+			split: 'train',
+			offset: 0,
+			limit: 5,
+		});
+		expect(parsed.include_readme).toBe(false);
+		expect(parsed.operations).toEqual(['dataset_structure', 'dataset_preview']);
+	});
+	it('does not expose a redundant readme operation', () => {
+		expect(() =>
+			HUB_REPO_DETAILS_TOOL_CONFIG.schema.parse({
+				repo_ids: ['rajpurkar/squad'],
+				operations: ['readme'],
+			})
+		).toThrow();
+	});
+});

package/src/hub-inspect.ts CHANGED Viewed

@@ -4,12 +4,17 @@ import { ModelDetailTool } from './model-detail.js';
 import { DatasetDetailTool } from './dataset-detail.js';
 import { spaceInfo } from '@huggingface/hub';
 import { formatDate } from './utilities.js';
+import { DatasetViewerInspector } from './dataset-viewer-inspect.js';
+const HUB_INSPECT_OPERATIONS = ['overview', 'dataset_structure', 'dataset_preview'] as const;
 export const HUB_REPO_DETAILS_TOOL_CONFIG = {
 	name: 'hub_repo_details',
 	description:
 		'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
-		'Auto-detects type unless specified.',
+		'Auto-detects type unless specified. For datasets, use operations: overview, dataset_structure, dataset_preview. ' +
+		'Use dataset_structure first to discover configs, splits, sizes, and schema. Use dataset_preview only when ' +
+		'config and split are known, unless the dataset has a single config/split.',
 	schema: z.object({
 		repo_ids: z
 			.array(z.string().min(1))
@@ -18,6 +23,26 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG = {
 			.describe('Repo IDs for (models|dataset/space) - usually in author/name format (e.g. openai/gpt-oss-120b)'),
 		repo_type: z.enum(['model', 'dataset', 'space']).optional().describe('Specify lookup type; otherwise auto-detects'),
 		include_readme: z.boolean().default(false).describe('Include README from the repo'),
+		operations: z
+			.array(z.enum(HUB_INSPECT_OPERATIONS))
+			.optional()
+			.describe(
+				'Details to return. Defaults to ["overview"]. For datasets, prefer ["overview", "dataset_structure"] first; then call ["dataset_preview"] with config and split.'
+			),
+		config: z
+			.string()
+			.optional()
+			.describe(
+				'Dataset Viewer config. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
+			),
+		split: z
+			.string()
+			.optional()
+			.describe(
+				'Dataset Viewer split. Required for dataset_preview when the dataset has multiple config/split options. Discover via dataset_structure.'
+			),
+		offset: z.number().int().nonnegative().optional().describe('Row offset for dataset_preview. Defaults to 0.'),
+		limit: z.number().int().optional().describe('Row count for dataset_preview. Defaults to 5 and is clamped to 1-100.'),
 	}),
 	annotations: {
 		title: 'Hub Repo Details',
@@ -32,11 +57,13 @@ export type HubInspectParams = z.infer<typeof HUB_REPO_DETAILS_TOOL_CONFIG.schem
 export class HubInspectTool {
 	private readonly modelDetail: ModelDetailTool;
 	private readonly datasetDetail: DatasetDetailTool;
+	private readonly datasetViewer: DatasetViewerInspector;
 	private readonly hubUrl?: string;
 	constructor(hfToken?: string, hubUrl?: string) {
 		this.modelDetail = new ModelDetailTool(hfToken, hubUrl);
 		this.datasetDetail = new DatasetDetailTool(hfToken, hubUrl);
+		this.datasetViewer = new DatasetViewerInspector(hfToken, { hubUrl });
 		this.hubUrl = hubUrl;
 	}
@@ -46,7 +73,7 @@ export class HubInspectTool {
 		for (const id of params.repo_ids) {
 			try {
-				const section = await this.inspectSingle(id, params.repo_type, includeReadme);
+				const section = await this.inspectSingle(id, params, includeReadme);
 				parts.push(section);
 				successCount += 1;
 			} catch (err) {
@@ -66,20 +93,30 @@ export class HubInspectTool {
 	private async inspectSingle(
 		repoId: string,
-		type: 'model' | 'dataset' | 'space' | undefined,
+		params: HubInspectParams,
 		includeReadme: boolean
 	): Promise<string> {
+		const type = params.repo_type;
+		const operations = normalizeOperations(params.operations);
+		const hasDatasetOperation = operations.some((operation) => operation === 'dataset_structure' || operation === 'dataset_preview');
 		// If caller constrained the type, do only that
 		if (type === 'model') {
+			if (hasDatasetOperation) return operationMismatch(repoId, 'model', operations);
 			return (await this.modelDetail.getDetails(repoId, includeReadme)).formatted;
 		}
 		if (type === 'dataset') {
-			return (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
+			return await this.getDatasetDetails(repoId, params, includeReadme, operations);
 		}
 		if (type === 'space') {
+			if (hasDatasetOperation) return operationMismatch(repoId, 'space', operations);
 			return await this.getSpaceDetails(repoId);
 		}
+		if (hasDatasetOperation) {
+			return await this.getDatasetDetails(repoId, params, includeReadme, operations);
+		}
 		// Auto-detect: attempt all three and aggregate. The same id may exist for multiple types.
 		const matches: string[] = [];
@@ -111,6 +148,33 @@ export class HubInspectTool {
 		return matches.join('\n\n---\n\n');
 	}
+	private async getDatasetDetails(
+		repoId: string,
+		params: HubInspectParams,
+		includeReadme: boolean,
+		operations: HubInspectOperation[]
+	): Promise<string> {
+		const sections: string[] = [];
+		if (operations.includes('overview')) {
+			const overview = (await this.datasetDetail.getDetails(repoId, includeReadme)).formatted;
+			sections.push(`${overview}\n\n${datasetDrillDownHint()}`);
+		}
+		if (operations.includes('dataset_structure')) {
+			sections.push(await this.datasetViewer.getStructure(repoId, { config: params.config, split: params.split }));
+		}
+		if (operations.includes('dataset_preview')) {
+			sections.push(
+				await this.datasetViewer.getPreview(repoId, {
+					config: params.config,
+					split: params.split,
+					offset: params.offset,
+					limit: params.limit,
+				})
+			);
+		}
+		return sections.join('\n\n');
+	}
 	private async getSpaceDetails(spaceId: string): Promise<string> {
 		const additionalFields = ['author', 'tags', 'runtime', 'subdomain', 'sha'] as const;
 		const info = await spaceInfo<(typeof additionalFields)[number]>({
@@ -142,3 +206,23 @@ export class HubInspectTool {
 		return lines.join('\n');
 	}
 }
+type HubInspectOperation = (typeof HUB_INSPECT_OPERATIONS)[number];
+function normalizeOperations(operations: readonly HubInspectOperation[] | undefined): HubInspectOperation[] {
+	return operations && operations.length > 0 ? [...new Set(operations)] : ['overview'];
+}
+function operationMismatch(repoId: string, type: 'model' | 'space', operations: HubInspectOperation[]): string {
+	const requested = operations.filter((operation) => operation.startsWith('dataset_')).join(', ');
+	return `# ${repoId}\n\nRequested dataset operation(s) \`${requested}\`, but this repo was requested as a ${type}. Dataset Viewer operations only apply to dataset repos.`;
+}
+function datasetDrillDownHint(): string {
+	return [
+		'## Available deeper inspections',
+		'Call `hub_repo_details` with:',
+		'- `operations: ["dataset_structure"]` for configs, splits, sizes, parquet exports, and schema.',
+		'- `operations: ["dataset_preview"]` with `config` and `split` for sample rows.',
+	].join('\n');
+}

package/src/index.browser.ts CHANGED Viewed

@@ -75,7 +75,8 @@ export const HUB_REPO_DETAILS_TOOL_CONFIG: BrowserToolConfig = {
 	name: 'hub_repo_details',
 	description:
 		'Get details for one or more Hugging Face repos (model, dataset, or space). ' +
-		'Auto-detects type unless specified.',
+		'Auto-detects type unless specified. For datasets, use dataset_structure first to discover configs, splits, ' +
+		'sizes, and schema. Use dataset_preview only when config and split are known, unless the dataset has a single config/split.',
 	annotations: {
 		title: 'Hub Repo Details',
 		destructiveHint: false,

package/src/index.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export * from './dataset-search.js';
 export * from './repo-search.js';
 export * from './create-repo.js';
 export * from './dataset-detail.js';
+export * from './dataset-viewer-inspect.js';
 export * from './hub-inspect.js';
 export * from './duplicate-space.js';
 export * from './space-info.js';

package/src/jobs/commands/run.ts CHANGED Viewed

@@ -18,6 +18,7 @@ export async function runCommand(args: RunArgs, client: JobsApiClient, token?: s
 		secrets: args.secrets,
 		timeout: args.timeout,
 		hfToken: token,
+		volumes: args.volumes,
 	});
 	// Submit job
@@ -39,7 +40,11 @@ export async function runCommand(args: RunArgs, client: JobsApiClient, token?: s
 	// Not detached - fetch logs
 	const logsUrl = client.getLogsUrl(job.id, job.owner.name);
-	const logResult = await fetchJobLogs(logsUrl, { token, maxDuration: DEFAULT_LOG_WAIT_MS, maxLines: DEFAULT_MAX_LOG_LINES });
+	const logResult = await fetchJobLogs(logsUrl, {
+		token,
+		maxDuration: DEFAULT_LOG_WAIT_MS,
+		maxLines: DEFAULT_MAX_LOG_LINES,
+	});
 	let response = `Job started: ${job.id}\n\n`;
@@ -80,6 +85,7 @@ export async function uvCommand(args: UvArgs, client: JobsApiClient, token?: str
 		timeout: args.timeout,
 		detach: args.detach,
 		namespace: args.namespace,
+		volumes: args.volumes,
 	};
 	return runCommand(runArgs, client, token);

package/src/jobs/commands/scheduled.ts CHANGED Viewed

@@ -28,6 +28,7 @@ export async function scheduledRunCommand(
 		secrets: args.secrets,
 		timeout: args.timeout,
 		hfToken: token,
+		volumes: args.volumes,
 	});
 	// Create scheduled job spec
@@ -78,6 +79,7 @@ export async function scheduledUvCommand(
 		timeout: args.timeout,
 		detach: args.detach,
 		namespace: args.namespace,
+		volumes: args.volumes,
 	};
 	return scheduledRunCommand(scheduledRunArgs, client, token);

package/src/jobs/commands/utils.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { JobSpec } from '../types.js';
+import type { JobSpec, JobVolume, JobVolumeType } from '../types.js';
 import { parse as parseShellArgs } from 'shell-quote';
 interface EnvToken {
@@ -7,6 +7,17 @@ interface EnvToken {
 }
 const SPECIAL_PARAMS = new Set(['*', '@', '#', '?', '!', '-', '_']);
+const HF_VOLUME_PREFIX = 'hf://';
+const VOLUME_FORMAT_HELP =
+	'Expected format: hf://[TYPE/]OWNER/NAME[/PATH]:/MOUNT_PATH[:ro|:rw], ' +
+	'e.g. hf://datasets/org/dataset:/data:ro or hf://buckets/org/bucket:/output.';
+const HF_VOLUME_TYPES: Record<string, JobVolumeType> = {
+	models: 'model',
+	datasets: 'dataset',
+	spaces: 'space',
+	buckets: 'bucket',
+};
+const SINGULAR_VOLUME_TYPES = new Set(['model', 'dataset', 'space', 'bucket']);
 function isEnvToken(entry: unknown): entry is EnvToken {
 	return Boolean(entry && typeof entry === 'object' && (entry as EnvToken).type === 'env');
@@ -50,9 +61,7 @@ export function parseTimeout(timeout: string): number {
 		if (!isNaN(seconds)) {
 			return seconds;
 		}
-		throw new Error(
-			`Invalid timeout format: "${timeout}". Use format like "5m", "2h", "30s", or plain seconds.`
-		);
+		throw new Error(`Invalid timeout format: "${timeout}". Use format like "5m", "2h", "30s", or plain seconds.`);
 	}
 	const value = parseFloat(match[1]);
@@ -93,7 +102,7 @@ export function parseCommand(command: string | string[]): { command: string[]; a
 	}
 	// Parse the command string using shell-quote for POSIX-compliant parsing
-	const parsed = parseShellArgs<EnvToken>(command, key => ({ type: 'env', key }));
+	const parsed = parseShellArgs<EnvToken>(command, (key) => ({ type: 'env', key }));
 	// Convert parsed result to string array
 	// shell-quote can return various types (strings, objects for operators, etc.)
@@ -121,6 +130,82 @@ export function parseCommand(command: string | string[]): { command: string[]; a
 	return { command: stringArgs, arguments: [] };
 }
+function invalidVolume(rawSpec: string, message: string): Error {
+	return new Error(`Invalid volume "${rawSpec}". ${message} ${VOLUME_FORMAT_HELP}`);
+}
+function parseVolume(rawSpec: string): JobVolume {
+	let spec = rawSpec;
+	let readOnly: boolean | undefined;
+	if (spec.endsWith(':ro')) {
+		readOnly = true;
+		spec = spec.slice(0, -3);
+	} else if (spec.endsWith(':rw')) {
+		readOnly = false;
+		spec = spec.slice(0, -3);
+	}
+	if (!spec.startsWith(HF_VOLUME_PREFIX)) {
+		throw invalidVolume(rawSpec, `Volume source must start with "${HF_VOLUME_PREFIX}".`);
+	}
+	const body = spec.slice(HF_VOLUME_PREFIX.length);
+	const mountSeparator = body.lastIndexOf(':/');
+	if (mountSeparator === -1) {
+		throw invalidVolume(rawSpec, 'Missing mount path.');
+	}
+	const sourcePart = body.slice(0, mountSeparator);
+	const mountPath = body.slice(mountSeparator + 1);
+	if (!sourcePart) {
+		throw invalidVolume(rawSpec, 'Missing Hub source before mount path.');
+	}
+	if (!mountPath.startsWith('/') || mountPath === '/') {
+		throw invalidVolume(rawSpec, `Mount path must be a non-empty absolute path, got "${mountPath}".`);
+	}
+	const segments = sourcePart.split('/');
+	const firstSegment = segments[0];
+	if (!firstSegment) {
+		throw invalidVolume(rawSpec, 'Missing Hub source type or owner.');
+	}
+	if (SINGULAR_VOLUME_TYPES.has(firstSegment)) {
+		throw invalidVolume(rawSpec, `Type prefix must be plural, got "${firstSegment}/".`);
+	}
+	const explicitType = HF_VOLUME_TYPES[firstSegment];
+	const type = explicitType ?? 'model';
+	const locationSegments = explicitType ? segments.slice(1) : segments;
+	if (locationSegments.length < 2 || !locationSegments[0] || !locationSegments[1]) {
+		throw invalidVolume(rawSpec, 'Hub source must include OWNER/NAME.');
+	}
+	const source = `${locationSegments[0]}/${locationSegments[1]}`;
+	const path = locationSegments.slice(2).join('/') || undefined;
+	const volume: JobVolume = { type, source, mountPath };
+	if (readOnly !== undefined) {
+		volume.readOnly = readOnly;
+	}
+	if (path) {
+		volume.path = path;
+	}
+	return volume;
+}
+/**
+ * Parse hf:// volume mount strings into the Jobs API payload shape.
+ */
+export function parseVolumes(volumes?: string[]): JobVolume[] | undefined {
+	if (!volumes || volumes.length === 0) {
+		return undefined;
+	}
+	return volumes.map(parseVolume);
+}
 /**
  * Replace HF token placeholder with actual token if available
  */
@@ -162,6 +247,7 @@ export function createJobSpec(args: {
 	secrets?: Record<string, string>;
 	timeout?: string;
 	hfToken?: string;
+	volumes?: string[];
 }): JobSpec {
 	// Validate required fields
 	if (!args.image) {
@@ -176,6 +262,7 @@ export function createJobSpec(args: {
 	const timeoutSeconds = args.timeout ? parseTimeout(args.timeout) : undefined;
 	const environment = transformEnvMap(args.env, args.hfToken) || {};
 	const secrets = transformEnvMap(args.secrets, args.hfToken) || {};
+	const volumes = parseVolumes(args.volumes);
 	const spec: JobSpec = {
 		...imageSource,
@@ -186,6 +273,9 @@ export function createJobSpec(args: {
 		secrets,
 		timeoutSeconds,
 	};
+	if (volumes) {
+		spec.volumes = volumes;
+	}
 	return spec;
 }

package/src/jobs/jobs-tool.ts CHANGED Viewed

@@ -337,6 +337,42 @@ Manage compute jobs on Hugging Face infrastructure.
 ${renderExampleSection('Run a simple job', 'run')}${renderExampleSection('Run a Python script with UV', 'uv')}
+## Deep Hub Dataset/Repo Analysis
+Use Jobs for deep analysis prompts involving Hugging Face datasets, models, Spaces, repos, traces, or large Hub files—especially when the user asks to "analyze", "find trends", "process all rows/files", "run a complete analysis", "take your time", or "install/use Python libraries".
+Recommended workflow:
+1. Inspect the repo with \`hub_repo_details\` for schema, splits, and parquet URLs.
+2. Run \`operation: "uv"\` with a self-contained Python script; do not call \`{"operation": "uv"}\` by itself except to request help.
+3. Always put third-party packages in \`with_deps\`; do not assume packages like \`pandas\`, \`polars\`, \`pyarrow\`, \`datasets\`, or \`huggingface_hub\` are installed. Prefer \`with_deps\` over relying on inline PEP 723 script metadata.
+4. Prefer converted parquet URLs for Hub datasets when available; they are often more reliable for mixed JSONL/session repos than \`datasets.load_dataset(...)\`.
+5. Print the final report at the end of the job. If the initial response only shows installation logs or partial output, call \`logs\` with the exact returned job ID and a larger \`tail\`, e.g. \`{"tail": 500}\`.
+6. Jobs do not automatically inherit the MCP server's Hugging Face token inside the container. For private/gated data or uploads, pass \`secrets: { "HF_TOKEN": "$HF_TOKEN" }\`.
+Example:
+\`\`\`json
+{
+  "operation": "uv",
+  "args": {
+    "with_deps": ["polars", "pyarrow", "huggingface_hub"],
+    "timeout": "60m",
+    "flavor": "cpu-upgrade",
+    "script": "import polars as pl\\nurl = 'PARQUET_URL_FROM_HUB_REPO_DETAILS'\\ndf = pl.read_parquet(url)\\nprint(df.shape)\\nprint(df.head())"
+  }
+}
+\`\`\`
+If output is incomplete, fetch more logs:
+\`\`\`json
+{
+  "operation": "logs",
+  "args": {
+    "job_id": "JOB_ID_FROM_RUN_RESPONSE",
+    "tail": 500
+  }
+}
+\`\`\`
 ## Hardware Flavors
 ${HARDWARE_FLAVORS_SECTION}
@@ -357,6 +393,28 @@ ${HARDWARE_FLAVORS_SECTION}
 - Include newline characters directly in the argument (e.g., \`"first line\\nsecond line"\`)
 - UV inline scripts are automatically base64-decoded inside the container; just send the raw script text
+## Volumes
+Attach Hub repositories or buckets into the job container with \`hf://\` volume URLs.
+Format: \`hf://[TYPE/]OWNER/NAME[/PATH]:/MOUNT_PATH[:ro|:rw]\`
+- \`TYPE\` is one of \`models\`, \`datasets\`, \`spaces\`, or \`buckets\`; omitted type defaults to models.
+- \`OWNER/NAME\` source IDs are required.
+- \`:ro\` and \`:rw\` are optional; backend defaults are preserved when omitted.
+Example:
+\`\`\`json
+{
+  "operation": "run",
+  "args": {
+    "image": "python:3.12",
+    "command": ["python", "-c", "import os; print(os.listdir('/data'))"],
+    "volumes": ["hf://datasets/org/dataset:/data:ro"]
+  }
+}
+\`\`\`
 ### Show command-specific help
 Call this tool with:
 \`\`\`json
@@ -378,9 +436,8 @@ Call this tool with:
 export const HF_JOBS_TOOL_CONFIG = {
 	name: 'hf_jobs',
 	description:
-		'Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, ' +
-		'execute Python scripts with UV. List, schedule and monitor jobs/logs. ' +
-		'Call this tool with no operation for full usage instructions and examples. ',
+		'Remote compute for Hugging Face workflows. Run Python/UV or Docker jobs to deeply analyze Hub datasets, repos, traces, models, and large files; compute trends/statistics; run batch inference/evaluation; or perform long-running work with installed libraries. ' +
+		'Use for dataset/repo analysis prompts when local chat inspection is insufficient. Includes submit, logs, inspect, cancel, schedule, and volume mounting.',
 	schema: z.object({
 		operation: z
 			.enum(OPERATION_NAMES)

package/src/jobs/types.ts CHANGED Viewed

@@ -60,6 +60,20 @@ export interface JobOwner {
 	type: 'user' | 'org';
 }
+/**
+ * Hugging Face Hub volume mounted into a Job container.
+ */
+export type JobVolumeType = 'bucket' | 'model' | 'dataset' | 'space';
+export interface JobVolume {
+	type: JobVolumeType;
+	source: string;
+	mountPath: string;
+	revision?: string;
+	readOnly?: boolean;
+	path?: string;
+}
 /**
  * Job information from API
  * Based on OpenAPI schema
@@ -97,6 +111,7 @@ export interface JobSpec {
 	secrets?: Record<string, string>;
 	flavor: string;
 	timeoutSeconds?: number;
+	volumes?: JobVolume[];
 }
 /**
@@ -164,6 +179,14 @@ export const runArgsSchema = commonArgsSchema.extend({
 		.optional()
 		.describe('Secrets as key-value pairs. Use HF_TOKEN=$HF_TOKEN to include your token'),
 	timeout: z.string().optional().describe('Max duration (e.g., "5m", "2h", "30s"). Default: 30m').default('30m'),
+	volumes: z
+		.array(z.string())
+		.optional()
+		.describe(
+			'Volume mounts using hf:// URLs. Format: hf://TYPE/OWNER/NAME[/PATH]:/MOUNT_PATH[:ro|:rw]. ' +
+				'TYPE is models, datasets, spaces, or buckets. ' +
+				'Examples: ["hf://datasets/org/ds:/data:ro", "hf://buckets/org/b:/output"].'
+		),
 	detach: z
 		.boolean()
 		.optional()
@@ -187,7 +210,18 @@ export const uvArgsSchema = commonArgsSchema.extend({
 		.optional()
 		.describe('Secrets as key-value pairs. Use HF_TOKEN=$HF_TOKEN to include your token'),
 	timeout: z.string().optional().default('30m').describe('Max duration'),
-	detach: z.boolean().optional().default(false).describe('If true, return immediately with job ID. If false (default), tail logs for up to 10 seconds.'),
+	volumes: z
+		.array(z.string())
+		.optional()
+		.describe(
+			'Volume mounts using hf:// URLs. Format: hf://TYPE/OWNER/NAME[/PATH]:/MOUNT_PATH[:ro|:rw]. ' +
+				'TYPE is models, datasets, spaces, or buckets.'
+		),
+	detach: z
+		.boolean()
+		.optional()
+		.default(false)
+		.describe('If true, return immediately with job ID. If false (default), tail logs for up to 10 seconds.'),
 });
 // PS command args

package/src/model-detail.ts CHANGED Viewed

@@ -259,7 +259,7 @@ export class ModelDetailTool {
 			// Fetch and append README content if requested
 			if (includeReadme) {
-				const readmeContent = await fetchReadmeContent(modelDetails.name, 'models', false);
+				const readmeContent = await fetchReadmeContent(modelDetails.name, 'models');
 				if (readmeContent) {
 					const result = formatModelDetails(modelDetails);
 					result.formatted += '\n\n## README\n<modelcard-readme>\n\n' + readmeContent.trim() + '\n</modelcard-readme>';

package/src/readme-utils.ts CHANGED Viewed

@@ -5,21 +5,16 @@
 import { fetchWithProfile, NETWORK_FETCH_PROFILES } from './network/fetch-profile.js';
 // Maximum number of characters to include from a README
-const DEFAULT_MAX_README_CHARS = 10_000;
+const DEFAULT_MAX_README_CHARS = 40_000;
 /**
  * Fetches README content from a Hugging Face repository
  *
  * @param repoName The resolved repository name (e.g., 'rajpurkar/squad', 'openai-community/gpt2')
  * @param type The repository type ('models' or 'datasets')
- * @param includeYaml Whether to include YAML frontmatter (default: false)
  * @returns Promise<string | null> The README content or null if not found/error
  */
-export async function fetchReadmeContent(
-	repoName: string,
-	type: 'models' | 'datasets',
-	includeYaml: boolean = false
-): Promise<string | null> {
+export async function fetchReadmeContent(repoName: string, type: 'models' | 'datasets'): Promise<string | null> {
 	try {
 		// Construct the URL based on repository type
 		const baseUrl =
@@ -39,11 +34,6 @@ export async function fetchReadmeContent(
 		let content = await response.text();
-		// If includeYaml is false, strip YAML frontmatter
-		if (!includeYaml) {
-			content = stripYamlFrontmatter(content);
-		}
 		// Truncate overly long READMEs to a sensible default size
 		if (content.length > DEFAULT_MAX_README_CHARS) {
 			const truncated = content.slice(0, DEFAULT_MAX_README_CHARS);
@@ -62,23 +52,3 @@ export async function fetchReadmeContent(
 		return null;
 	}
 }
-/**
- * Strips YAML frontmatter from markdown content
- *
- * @param content The full markdown content
- * @returns The content with YAML frontmatter removed
- */
-function stripYamlFrontmatter(content: string): string {
-	// Match YAML frontmatter: starts with ---, ends with ---
-	const yamlPattern = /^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))/;
-	const match = content.match(yamlPattern);
-	if (match) {
-		// Return everything after the closing ---
-		return content.substring(match[0].length);
-	}
-	// No YAML frontmatter found, return original content
-	return content;
-}