npm - @llmindset/hf-mcp - Versions diffs - 0.1.16 - Mend

@llmindset/hf-mcp 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/LICENSE +21 -0
package/dist/dataset-detail.d.ts +26 -0
package/dist/dataset-detail.d.ts.map +1 -0
package/dist/dataset-detail.js +157 -0
package/dist/dataset-detail.js.map +1 -0
package/dist/dataset-search.d.ts +62 -0
package/dist/dataset-search.d.ts.map +1 -0
package/dist/dataset-search.js +158 -0
package/dist/dataset-search.js.map +1 -0
package/dist/duplicate-space.d.ts +75 -0
package/dist/duplicate-space.d.ts.map +1 -0
package/dist/duplicate-space.js +189 -0
package/dist/duplicate-space.js.map +1 -0
package/dist/error-messages.d.ts +4 -0
package/dist/error-messages.d.ts.map +1 -0
package/dist/error-messages.js +30 -0
package/dist/error-messages.js.map +1 -0
package/dist/hf-api-call.d.ts +18 -0
package/dist/hf-api-call.d.ts.map +1 -0
package/dist/hf-api-call.js +105 -0
package/dist/hf-api-call.js.map +1 -0
package/dist/index.d.ts +16 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +16 -0
package/dist/index.js.map +1 -0
package/dist/model-detail.d.ts +26 -0
package/dist/model-detail.d.ts.map +1 -0
package/dist/model-detail.js +224 -0
package/dist/model-detail.js.map +1 -0
package/dist/model-search.d.ts +64 -0
package/dist/model-search.d.ts.map +1 -0
package/dist/model-search.js +161 -0
package/dist/model-search.js.map +1 -0
package/dist/paper-search.d.ts +58 -0
package/dist/paper-search.d.ts.map +1 -0
package/dist/paper-search.js +114 -0
package/dist/paper-search.js.map +1 -0
package/dist/paper-summary.d.ts +35 -0
package/dist/paper-summary.d.ts.map +1 -0
package/dist/paper-summary.js +187 -0
package/dist/paper-summary.js.map +1 -0
package/dist/space-files.d.ts +44 -0
package/dist/space-files.d.ts.map +1 -0
package/dist/space-files.js +242 -0
package/dist/space-files.js.map +1 -0
package/dist/space-info.d.ts +56 -0
package/dist/space-info.d.ts.map +1 -0
package/dist/space-info.js +135 -0
package/dist/space-info.js.map +1 -0
package/dist/space-search.d.ts +71 -0
package/dist/space-search.d.ts.map +1 -0
package/dist/space-search.js +95 -0
package/dist/space-search.js.map +1 -0
package/dist/tool-ids.d.ts +23 -0
package/dist/tool-ids.d.ts.map +1 -0
package/dist/tool-ids.js +55 -0
package/dist/tool-ids.js.map +1 -0
package/dist/user-summary.d.ts +56 -0
package/dist/user-summary.d.ts.map +1 -0
package/dist/user-summary.js +271 -0
package/dist/user-summary.js.map +1 -0
package/dist/utilities.d.ts +8 -0
package/dist/utilities.d.ts.map +1 -0
package/dist/utilities.js +53 -0
package/dist/utilities.js.map +1 -0
package/eslint.config.js +43 -0
package/package.json +47 -0
package/src/dataset-detail.ts +257 -0
package/src/dataset-search.ts +237 -0
package/src/duplicate-space.ts +263 -0
package/src/error-messages.ts +57 -0
package/src/hf-api-call.ts +182 -0
package/src/index.ts +18 -0
package/src/model-detail.ts +359 -0
package/src/model-search.ts +231 -0
package/src/paper-search.ts +188 -0
package/src/paper-summary.ts +303 -0
package/src/space-files.ts +325 -0
package/src/space-info.ts +190 -0
package/src/space-search.ts +177 -0
package/src/tool-ids.ts +84 -0
package/src/user-summary.ts +421 -0
package/src/utilities.ts +64 -0
package/test/duplicate-space.spec.ts +41 -0
package/test/fixtures/paper_result_kazakh.json +854 -0
package/test/fixtures/space-result.json +263 -0
package/test/paper-search.spec.ts +57 -0
package/test/paper-summary.spec.ts +113 -0
package/test/space-files.spec.ts +232 -0
package/test/space-search.spec.ts +29 -0
package/test/user-summary.spec.ts +131 -0
package/tsconfig.json +31 -0
package/vitest.config.ts +11 -0

package/src/paper-search.ts ADDED Viewed

@@ -0,0 +1,188 @@
+import { z } from 'zod';
+import { HfApiCall } from './hf-api-call.js';
+import { formatUnknownDate } from './utilities.js';
+// https://github.com/huggingface/huggingface_hub/blob/a26b93e8ba0b51ce76ce5c2044896587c47c6b60/src/huggingface_hub/hf_api.py#L1481-L1542
+// Raw JSON response for https://hf.co/api/papers/search?q=llama%203%20herd Llama Herd is ~50,000 tokens
+// Raw JSON response for https://hf.co/api/papers/search?q=kazakh -> ~ 9 papers,
+// Return papers as delimited markdown (or simplified JSON)
+// ---
+//
+// can we link to Collections, Datasets, Models, Spaces?
+// Create a schema validator for search parameters
+// 80 papers in full mode is ~ 35,000 tokens
+// 105 papers in summary mode is ~ 23094 tokens
+// 105 papers in full mode is ~ 45797 tokens
+export const DEFAULT_AUTHORS_TO_SHOW = 8;
+const RESULTS_TO_RETURN = 10;
+export const PAPER_SEARCH_TOOL_CONFIG = {
+	name: 'paper_search',
+	description:
+		'Find Machine Learning research papers on the Hugging Face hub. ' +
+		"Include 'Link to paper' When presenting the results. " +
+		'Consider whether tabulating results matches user intent.',
+	schema: z.object({
+		query: z
+			.string()
+			.min(3, 'Supply at least one search term')
+			.max(200, 'Query too long')
+			.describe('Semantic Search query'),
+		results_limit: z.number().optional().default(12).describe('Number of results to return'),
+		concise_only: z
+			.boolean()
+			.optional()
+			.default(false)
+			.describe(
+				'Return a 2 sentence summary of the abstract. Use for broad search terms which may return a lot of results. Check with User if unsure.'
+			),
+	}),
+	annotations: {
+		title: 'Paper Search',
+		destructiveHint: false,
+		readOnlyHint: true,
+		openWorldHint: true,
+	},
+} as const;
+export interface Author {
+	name?: string;
+	user?: {
+		user: string;
+	};
+}
+interface Paper {
+	id: string;
+	authors?: Author[];
+	publishedAt?: string;
+	title?: string;
+	summary?: string;
+	upvotes?: number;
+	ai_keywords?: string[];
+	ai_summary?: string;
+}
+export interface PaperSearchResult {
+	paper: Paper;
+	numComments?: number;
+	isAuthorParticipating?: boolean;
+}
+// Define input types for paper search
+interface PaperSearchParams {
+	q: string;
+}
+/**
+ * Service for searching Hugging Face Papers
+ */
+export class PaperSearchTool extends HfApiCall<PaperSearchParams, PaperSearchResult[]> {
+	/**
+	 * Creates a new papers search service
+	 * @param apiUrl The URL of the Hugging Face papers search API
+	 * @param hfToken Optional Hugging Face token for API access
+	 */
+	constructor(hfToken?: string, apiUrl = 'https://huggingface.co/api/papers/search') {
+		super(apiUrl, hfToken);
+	}
+	/**
+	 * Searches for papers on the Hugging Face Hub
+	 * @param query Search query string (e.g. "llama", "attention")
+	 * @param limit Maximum number of results to return
+	 * @returns Formatted string with paper information
+	 */
+	async search(query: string, limit: number = RESULTS_TO_RETURN, conciseOnly: boolean = false): Promise<string> {
+		try {
+			if (!query) return 'No query';
+			const papers = await this.callApi<PaperSearchResult[]>({ q: query });
+			if (papers.length === 0) return `No papers found for query '${query}'`;
+			return formatSearchResults(query, papers.slice(0, limit), papers.length, conciseOnly);
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`Failed to search for papers: ${error.message}`);
+			}
+			throw error;
+		}
+	}
+}
+export function published(publishedDate: string | undefined): string {
+	const formatted = formatUnknownDate(publishedDate ?? '');
+	return formatted ? `Published on ${formatted}` : 'Publication date not available';
+}
+function formatSearchResults(
+	query: string,
+	papers: PaperSearchResult[],
+	totalCount: number,
+	conciseOnly: boolean = false
+): string {
+	const r: string[] = [];
+	const showingText =
+		papers.length < totalCount
+			? `${totalCount} papers matched the query '${query}'. Here are the first ${papers.length} results.`
+			: `All ${papers.length} papers that matched the query '${query}'`;
+	r.push(showingText);
+	for (const result of papers) {
+		r.push('');
+		r.push('---');
+		const title = result.paper.title ?? `Paper ID ${result.paper.id}`;
+		r.push('');
+		r.push(`## ${title}`);
+		r.push('');
+		const publishedDate = result.paper.publishedAt
+			? `Published on ${published(result.paper.publishedAt)}`
+			: 'Publication date not available';
+		r.push(publishedDate);
+		r.push(authors(result.paper.authors));
+		r.push('');
+		// Handle concise_only option: use ai_summary when enabled, or fallback to ai_summary if summary is blank
+		const useAiSummary = conciseOnly || !result.paper.summary;
+		const summaryText = useAiSummary ? result.paper.ai_summary : result.paper.summary;
+		const summaryHeader = useAiSummary ? '### AI Generated Summary' : '### Abstract';
+		r.push(summaryHeader);
+		r.push('');
+		r.push(summaryText ?? 'No summary available');
+		r.push('');
+		r.push(result.paper.ai_keywords ? `**AI Keywords**: ${result.paper.ai_keywords.join(', ')}` : '');
+		const upvotes: string =
+			result.paper.upvotes && result.paper.upvotes > 0 ? `Upvoted ${result.paper.upvotes} times` : '';
+		if (result.numComments && result.numComments > 0) {
+			if (result.isAuthorParticipating)
+				r.push(`${upvotes}. The authors are participating in a discussion with ${result.numComments} comments.`);
+			else r.push(`${upvotes}. There is a community discussion with ${result.numComments} comments.`);
+		} else {
+			if ('' != upvotes) r.push(upvotes);
+		}
+		r.push(`**Link to paper:** [https://hf.co/papers/${result.paper.id}](https://hf.co/papers/${result.paper.id})`);
+	}
+	r.push('');
+	r.push('---');
+	return r.join('\n');
+}
+export function authors(authors: Author[] | undefined, authorsToShow: number = DEFAULT_AUTHORS_TO_SHOW): string {
+	if (!authors || 0 === authors.length) return '**Authors:** Not available';
+	const f: string[] = [];
+	for (const author of authors.slice(0, authorsToShow)) {
+		const profileLink: string = author.user?.user ? ` ([${author.user.user}](https://hf.co/${author.user.user}))` : '';
+		const authorName: string = author.name ?? 'Unknown';
+		f.push(`${authorName}${profileLink}`);
+	}
+	if (authors.length > authorsToShow) {
+		f.push(`and ${authors.length - authorsToShow} more.`);
+	}
+	return `**Authors:** ${f.join(', ')}`;
+}

package/src/paper-summary.ts ADDED Viewed

@@ -0,0 +1,303 @@
+import { z } from 'zod';
+import { HfApiCall, HfApiError } from './hf-api-call.js';
+import { formatDate, formatNumber, escapeMarkdown } from './utilities.js';
+import { ModelSearchTool } from './model-search.js';
+import { DatasetSearchTool } from './dataset-search.js';
+import { SpaceSearchTool } from './space-search.js';
+import { authors, type Author } from './paper-search.js';
+// Paper Summary Prompt Configuration
+export const PAPER_SUMMARY_PROMPT_CONFIG = {
+	name: 'Paper Summary',
+	description:
+		'Generate a comprehensive summary of an arXiv paper including its details and related models, datasets, and spaces on Hugging Face. ' +
+		'Accepts various formats: "2502.16161", "arxiv:2502.16161", "https://arxiv.org/abs/2502.16161", or Hugging Face paper URLs.',
+	schema: z.object({
+		paper_id: z
+			.string()
+			.min(1, 'Paper ID is required')
+			.describe('arXiv paper ID in various formats (e.g., "2502.16161", "arxiv:2502.16161", or full URL)')
+			.max(60)
+			.describe('Maximum length is 100 characters'),
+	}),
+} as const;
+// Define parameter types
+export type PaperSummaryParams = z.infer<typeof PAPER_SUMMARY_PROMPT_CONFIG.schema>;
+// Paper API response interface
+interface PaperDetails {
+	id: string;
+	title: string;
+	authors?: Author[];
+	publishedAt: string;
+	summary?: string; // This is the abstract field in the API
+	upvotes?: number;
+	comments?: number;
+	pageUrl?: string;
+}
+/**
+ * Validates and extracts arXiv ID from various input formats
+ * @param input - The user input (arXiv ID or URL)
+ * @returns The extracted arXiv ID in format "YYMM.NNNNN"
+ * @throws Error if input is invalid
+ */
+export function extractArxivIdFromInput(input: string): string {
+	// Remove whitespace
+	const trimmed = input.trim();
+	// Check for empty input
+	if (!trimmed) {
+		throw new Error('Paper ID is required');
+	}
+	// Pattern for valid arXiv ID: YYMM.NNNNN (e.g., 2502.16161)
+	const arxivPattern = /^\d{4}\.\d{4,5}$/;
+	// Check if it's already a plain arXiv ID
+	if (arxivPattern.test(trimmed)) {
+		return trimmed;
+	}
+	// Handle URL formats first - check if it looks like a URL
+	// Check for: protocol, www prefix, domain pattern with TLD, or path separator
+	const urlPattern = /^(https?:\/\/|www\.)|^[a-zA-Z0-9-]+\.[a-zA-Z]{2,}(\/|$)/;
+	if (urlPattern.test(trimmed) || trimmed.includes('://')) {
+		let url: URL;
+		try {
+			// Try to parse as URL, adding protocol if missing
+			if (!trimmed.startsWith('http')) {
+				url = new URL(`https://${trimmed}`);
+			} else {
+				url = new URL(trimmed);
+			}
+		} catch {
+			throw new Error('Invalid URL format');
+		}
+		// Check for query parameters or fragments
+		if (url.search || url.hash) {
+			throw new Error('URL must contain only the paper ID path');
+		}
+		// Only accept specific domains
+		const allowedHosts = ['arxiv.org', 'www.arxiv.org', 'huggingface.co', 'hf.co'];
+		if (!allowedHosts.includes(url.hostname)) {
+			throw new Error(`URL must be from arxiv.org, huggingface.co, or hf.co. Got: ${url.hostname}`);
+		}
+		// Handle arxiv.org URLs
+		if (url.hostname === 'arxiv.org' || url.hostname === 'www.arxiv.org') {
+			// Pattern: /abs/YYMM.NNNNN
+			const match = url.pathname.match(/\/abs\/(\d{4}\.\d{4,5})/);
+			if (match && match[1]) {
+				return match[1];
+			}
+			throw new Error('arXiv URL must be in format: arxiv.org/abs/YYMM.NNNNN');
+		}
+		// Handle Hugging Face paper URLs
+		if (url.hostname === 'huggingface.co' || url.hostname === 'hf.co') {
+			// Pattern: /papers/YYMM.NNNNN
+			const match = url.pathname.match(/\/papers\/(\d{4}\.\d{4,5})/);
+			if (match && match[1]) {
+				return match[1];
+			}
+			throw new Error('Hugging Face URL must be in format: hf.co/papers/YYMM.NNNNN');
+		}
+		// This should never be reached due to the allowedHosts check above
+		throw new Error('URL does not contain a valid arXiv ID');
+	}
+	// Handle "arxiv:" prefix variations
+	if (trimmed.toLowerCase().startsWith('arxiv:')) {
+		const id = trimmed.substring(6);
+		if (arxivPattern.test(id)) {
+			return id;
+		}
+		throw new Error('Invalid arXiv ID format after "arxiv:" prefix');
+	}
+	// Handle "arxiv." prefix (typo)
+	if (trimmed.toLowerCase().startsWith('arxiv.')) {
+		const id = trimmed.substring(6);
+		if (arxivPattern.test(id)) {
+			return id;
+		}
+		throw new Error('Invalid arXiv ID format after "arxiv." prefix');
+	}
+	// If we get here, it's not a recognized format
+	throw new Error(
+		`Invalid arXiv ID format: "${trimmed}". Expected formats: "2502.16161", "arxiv:2502.16161", or paper URL`
+	);
+}
+/**
+ * Service for generating comprehensive paper summaries
+ */
+export class PaperSummaryPrompt extends HfApiCall<Record<string, string>, PaperDetails> {
+	/**
+	 * @param hfToken Optional Hugging Face token for API access
+	 */
+	constructor(hfToken?: string) {
+		super('https://huggingface.co/api/papers', hfToken);
+	}
+	/**
+	 * Generate a comprehensive paper summary
+	 */
+	async generateSummary(params: PaperSummaryParams): Promise<string> {
+		try {
+			// Extract and validate arXiv ID
+			const arxivId = extractArxivIdFromInput(params.paper_id);
+			// Get paper details
+			let paperDetails: PaperDetails;
+			try {
+				paperDetails = await this.getPaperDetails(arxivId);
+			} catch (error) {
+				if (error instanceof HfApiError && error.status === 404) {
+					return "I'm sorry, paper not found.";
+				}
+				throw error;
+			}
+			// Build the summary
+			const sections: string[] = [];
+			// Paper details section
+			sections.push(this.formatPaperDetails(paperDetails));
+			// Search for related resources
+			const relatedResources = await this.getRelatedResources(arxivId);
+			// Add related models section if found
+			if (relatedResources.models) {
+				sections.push(relatedResources.models);
+			}
+			// Add related datasets section if found
+			if (relatedResources.datasets) {
+				sections.push(relatedResources.datasets);
+			}
+			// Add related spaces section if found
+			if (relatedResources.spaces) {
+				sections.push(relatedResources.spaces);
+			}
+			// Add reminder about tags
+			sections.push(
+				'\n**Note:** Tags and paper references on Hugging Face are not always complete or up-to-date. ' +
+					'-- validate information if necessary'
+			);
+			// Add final instruction
+			sections.push('\nPlease provide a summary of this paper and any associated resources.');
+			return sections.join('\n\n');
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`Failed to generate paper summary: ${error.message}`);
+			}
+			throw error;
+		}
+	}
+	/**
+	 * Get paper details from HF API
+	 */
+	private async getPaperDetails(arxivId: string): Promise<PaperDetails> {
+		const url = new URL(`${this.apiUrl}/${arxivId}`);
+		return this.fetchFromApi<PaperDetails>(url);
+	}
+	/**
+	 * Format paper details as markdown
+	 */
+	private formatPaperDetails(paper: PaperDetails): string {
+		const lines: string[] = [];
+		// Title as main heading
+		lines.push(`# ${escapeMarkdown(paper.title || 'Untitled')}`);
+		lines.push('');
+		// Authors - use the existing authors formatting function
+		lines.push(authors(paper.authors));
+		// Published date
+		lines.push(`**Published:** ${formatDate(paper.publishedAt)}`);
+		// Engagement metrics - only show if they exist and are > 0
+		if (paper.upvotes && paper.upvotes > 0) {
+			lines.push(`**Upvotes:** ${formatNumber(paper.upvotes)}`);
+		}
+		if (paper.comments && paper.comments > 0) {
+			lines.push(`**Comments:** ${formatNumber(paper.comments)}`);
+		}
+		// Links
+		lines.push('');
+		lines.push('**Links:**');
+		lines.push(`- [Hugging Face Paper Page](https://hf.co/papers/${paper.id})`);
+		lines.push(`- [arXiv Page](https://arxiv.org/abs/${paper.id})`);
+		// Abstract
+		if (paper.summary) {
+			lines.push('');
+			lines.push('## Abstract');
+			lines.push('');
+			lines.push(paper.summary);
+		}
+		return lines.join('\n');
+	}
+	/**
+	 * Search for related resources (models, datasets, spaces)
+	 */
+	private async getRelatedResources(arxivId: string): Promise<{ models?: string; datasets?: string; spaces?: string }> {
+		const results: { models?: string; datasets?: string; spaces?: string } = {};
+		// Search for related models
+		try {
+			const modelSearch = new ModelSearchTool(this.hfToken);
+			// Use the filter parameter to search for models referencing this paper
+			const modelResults = await modelSearch.searchWithFilter(`arxiv:${arxivId}`, 25);
+			if (modelResults && !modelResults.includes('No models found')) {
+				results.models = `## Related Models\n\n${modelResults}`;
+			}
+		} catch (error) {
+			console.warn(`Failed to fetch related models for paper ${arxivId}:`, error);
+		}
+		// Search for related datasets
+		try {
+			const datasetSearch = new DatasetSearchTool(this.hfToken);
+			// Use the filter parameter to search for datasets referencing this paper
+			const datasetResults = await datasetSearch.searchWithFilter(`arxiv:${arxivId}`, 25);
+			if (datasetResults && !datasetResults.includes('No datasets found')) {
+				results.datasets = `## Related Datasets\n\n${datasetResults}`;
+			}
+		} catch (error) {
+			console.warn(`Failed to fetch related datasets for paper ${arxivId}:`, error);
+		}
+		// Search for related spaces
+		try {
+			const spaceSearch = new SpaceSearchTool(this.hfToken);
+			// Use the filter parameter to search for spaces referencing this paper
+			const spaceResults = await spaceSearch.searchWithFilter(`arxiv:${arxivId}`, 25, 2);
+			if (spaceResults && !spaceResults.includes('No matching Hugging Face Spaces found')) {
+				results.spaces = `## Related Spaces\n\n${spaceResults}`;
+			}
+		} catch (error) {
+			console.warn(`Failed to fetch related spaces for paper ${arxivId}:`, error);
+		}
+		return results;
+	}
+}