npm - @youdotcom-oss/mcp - Versions diffs - 1.4.1 → 1.5.1 - Mend

@youdotcom-oss/mcp 1.4.1 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +4 -2
package/bin/stdio.js +186 -109
package/dist/contents/contents.schemas.d.ts +21 -5
package/dist/contents/contents.utils.d.ts +3 -3
package/package.json +2 -2
package/src/contents/contents.schemas.ts +25 -8
package/src/contents/contents.utils.ts +78 -20
package/src/contents/register-contents-tool.ts +7 -3
package/src/contents/tests/contents.utils.spec.ts +15 -16
package/src/search/tests/search.utils.spec.ts +0 -1
package/src/tests/tool.spec.ts +9 -9

package/dist/contents/contents.schemas.d.ts CHANGED Viewed

@@ -1,14 +1,20 @@
 import * as z from 'zod';
 /**
  * Input schema for the you-contents tool
- * Accepts an array of URLs and optional format
+ * Accepts an array of URLs, optional formats array (or legacy format string), and optional crawl timeout
  */
 export declare const ContentsQuerySchema: z.ZodObject<{
     urls: z.ZodArray<z.ZodString>;
-    format: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
+    formats: z.ZodOptional<z.ZodArray<z.ZodEnum<{
         markdown: "markdown";
         html: "html";
+        metadata: "metadata";
     }>>>;
+    format: z.ZodOptional<z.ZodEnum<{
+        markdown: "markdown";
+        html: "html";
+    }>>;
+    crawl_timeout: z.ZodOptional<z.ZodNumber>;
 }, z.core.$strip>;
 export type ContentsQuery = z.infer<typeof ContentsQuerySchema>;
 /**
@@ -20,6 +26,11 @@ export declare const ContentsApiResponseSchema: z.ZodArray<z.ZodObject<{
     title: z.ZodOptional<z.ZodString>;
     html: z.ZodOptional<z.ZodString>;
     markdown: z.ZodOptional<z.ZodString>;
+    metadata: z.ZodOptional<z.ZodObject<{
+        jsonld: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
+        opengraph: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+        twitter: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+    }, z.core.$strip>>;
 }, z.core.$strip>>;
 export type ContentsApiResponse = z.infer<typeof ContentsApiResponseSchema>;
 /**
@@ -28,12 +39,17 @@ export type ContentsApiResponse = z.infer<typeof ContentsApiResponseSchema>;
  */
 export declare const ContentsStructuredContentSchema: z.ZodObject<{
     count: z.ZodNumber;
-    format: z.ZodString;
+    formats: z.ZodArray<z.ZodString>;
     items: z.ZodArray<z.ZodObject<{
         url: z.ZodString;
         title: z.ZodOptional<z.ZodString>;
-        content: z.ZodString;
-        contentLength: z.ZodNumber;
+        markdown: z.ZodOptional<z.ZodString>;
+        html: z.ZodOptional<z.ZodString>;
+        metadata: z.ZodOptional<z.ZodObject<{
+            jsonld: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
+            opengraph: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+            twitter: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+        }, z.core.$strip>>;
     }, z.core.$strip>>;
 }, z.core.$strip>;
 export type ContentsStructuredContent = z.infer<typeof ContentsStructuredContentSchema>;

package/dist/contents/contents.utils.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import { type ContentsApiResponse, type ContentsQuery, type ContentsStructuredCo
  * @param getUserAgent - Function to get User-Agent string
  * @returns Parsed and validated API response
  */
-export declare const fetchContents: ({ contentsQuery: { urls, format }, YDC_API_KEY, getUserAgent, }: {
+export declare const fetchContents: ({ contentsQuery: { urls, formats, format, crawl_timeout }, YDC_API_KEY, getUserAgent, }: {
     contentsQuery: ContentsQuery;
     YDC_API_KEY?: string;
     getUserAgent: () => string;
@@ -16,10 +16,10 @@ export declare const fetchContents: ({ contentsQuery: { urls, format }, YDC_API_
  * Format contents API response for MCP output
  * Returns full content in both text and structured formats
  * @param response - Validated API response
- * @param format - Format used for extraction
+ * @param formats - Formats used for extraction
  * @returns Formatted response with content and structuredContent
  */
-export declare const formatContentsResponse: (response: ContentsApiResponse, format: string) => {
+export declare const formatContentsResponse: (response: ContentsApiResponse, formats: string[]) => {
     content: Array<{
         type: "text";
         text: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@youdotcom-oss/mcp",
-  "version": "1.4.1",
+  "version": "1.5.1",
   "description": "You.com API Model Context Protocol Server",
   "license": "MIT",
   "engines": {
@@ -71,7 +71,7 @@
     "zod": "^4.3.5",
     "@hono/mcp": "^0.2.3",
     "@modelcontextprotocol/sdk": "^1.25.2",
-    "hono": "^4.11.3"
+    "hono": "^4.11.4"
   },
   "devDependencies": {
     "@modelcontextprotocol/inspector": "0.18.0"

package/src/contents/contents.schemas.ts CHANGED Viewed

@@ -2,18 +2,19 @@ import * as z from 'zod';
 /**
  * Input schema for the you-contents tool
- * Accepts an array of URLs and optional format
+ * Accepts an array of URLs, optional formats array (or legacy format string), and optional crawl timeout
  */
 export const ContentsQuerySchema = z.object({
   urls: z
     .array(z.string().url())
     .min(1)
     .describe('Array of webpage URLs to extract content from (e.g., ["https://example.com"])'),
-  format: z
-    .enum(['markdown', 'html'])
+  formats: z
+    .array(z.enum(['markdown', 'html', 'metadata']))
     .optional()
-    .default('markdown')
-    .describe('Output format: markdown (text) or html (layout)'),
+    .describe('Output formats: array of "markdown" (text), "html" (layout), or "metadata" (structured data)'),
+  format: z.enum(['markdown', 'html']).optional().describe('(Deprecated) Output format - use formats array instead'),
+  crawl_timeout: z.number().min(1).max(60).optional().describe('Optional timeout in seconds (1-60) for page crawling'),
 });
 export type ContentsQuery = z.infer<typeof ContentsQuerySchema>;
@@ -26,6 +27,14 @@ const ContentsItemSchema = z.object({
   title: z.string().optional().describe('Title'),
   html: z.string().optional().describe('HTML content'),
   markdown: z.string().optional().describe('Markdown content'),
+  metadata: z
+    .object({
+      jsonld: z.array(z.record(z.string(), z.unknown())).optional().describe('JSON-LD structured data (Schema.org)'),
+      opengraph: z.record(z.string(), z.string()).optional().describe('OpenGraph meta tags'),
+      twitter: z.record(z.string(), z.string()).optional().describe('Twitter Card metadata'),
+    })
+    .optional()
+    .describe('Structured metadata when available'),
 });
 /**
@@ -42,14 +51,22 @@ export type ContentsApiResponse = z.infer<typeof ContentsApiResponseSchema>;
  */
 export const ContentsStructuredContentSchema = z.object({
   count: z.number().describe('URLs processed'),
-  format: z.string().describe('Content format'),
+  formats: z.array(z.string()).describe('Content formats requested'),
   items: z
     .array(
       z.object({
         url: z.string().describe('URL'),
         title: z.string().optional().describe('Title'),
-        content: z.string().describe('Extracted content'),
-        contentLength: z.number().describe('Content length'),
+        markdown: z.string().optional().describe('Markdown content'),
+        html: z.string().optional().describe('HTML content'),
+        metadata: z
+          .object({
+            jsonld: z.array(z.record(z.string(), z.unknown())).optional(),
+            opengraph: z.record(z.string(), z.string()).optional(),
+            twitter: z.record(z.string(), z.string()).optional(),
+          })
+          .optional()
+          .describe('Structured metadata'),
       }),
     )
     .describe('Extracted items'),

package/src/contents/contents.utils.ts CHANGED Viewed

@@ -16,7 +16,7 @@ import {
  * @returns Parsed and validated API response
  */
 export const fetchContents = async ({
-  contentsQuery: { urls, format = 'markdown' },
+  contentsQuery: { urls, formats, format, crawl_timeout },
   YDC_API_KEY = process.env.YDC_API_KEY,
   getUserAgent,
 }: {
@@ -28,6 +28,23 @@ export const fetchContents = async ({
     throw new Error('YDC_API_KEY is required for Contents API');
   }
+  // Handle backward compatibility: prefer formats array, fallback to format string, default to ['markdown']
+  const requestFormats = formats || (format ? [format] : ['markdown']);
+  // Build request body
+  const requestBody: {
+    urls: string[];
+    formats: string[];
+    crawl_timeout?: number;
+  } = {
+    urls,
+    formats: requestFormats,
+  };
+  if (crawl_timeout !== undefined) {
+    requestBody.crawl_timeout = crawl_timeout;
+  }
   // Make single API call with all URLs
   const options = {
     method: 'POST',
@@ -36,10 +53,7 @@ export const fetchContents = async ({
       'Content-Type': 'application/json',
       'User-Agent': getUserAgent(),
     }),
-    body: JSON.stringify({
-      urls,
-      format,
-    }),
+    body: JSON.stringify(requestBody),
   };
   const response = await fetch(CONTENTS_API_URL, options);
@@ -91,40 +105,84 @@ export const fetchContents = async ({
  * Format contents API response for MCP output
  * Returns full content in both text and structured formats
  * @param response - Validated API response
- * @param format - Format used for extraction
+ * @param formats - Formats used for extraction
  * @returns Formatted response with content and structuredContent
  */
 export const formatContentsResponse = (
   response: ContentsApiResponse,
-  format: string,
+  formats: string[],
 ): {
   content: Array<{ type: 'text'; text: string }>;
   structuredContent: ContentsStructuredContent;
 } => {
   // Build text content with full extracted content
   const textParts: string[] = [`Successfully extracted content from ${response.length} URL(s):\n`];
+  textParts.push(`Formats: ${formats.join(', ')}\n`);
   const items: ContentsStructuredContent['items'] = [];
   for (const item of response) {
-    const contentField = format === 'html' ? item.html : item.markdown;
-    const content = contentField || '';
-    // Add full content for this item
-    textParts.push(`\n## ${item.title}`);
-    textParts.push(`URL: ${item.url}`);
-    textParts.push(`Format: ${format}`);
-    textParts.push(`Content Length: ${content.length} characters\n`);
+    // Add header for this item
+    textParts.push(`\n## ${item.title || 'Untitled'}`);
+    textParts.push(`URL: ${item.url}\n`);
     textParts.push('---\n');
-    textParts.push(content);
+    // Add content based on requested formats
+    if (formats.includes('markdown') && item.markdown) {
+      textParts.push('\n### Markdown Content\n');
+      textParts.push(item.markdown);
+      textParts.push('\n');
+    }
+    if (formats.includes('html') && item.html) {
+      textParts.push('\n### HTML Content\n');
+      textParts.push(`Length: ${item.html.length} characters\n`);
+      textParts.push(item.html.substring(0, 500));
+      if (item.html.length > 500) {
+        textParts.push('...\n(truncated for display)');
+      }
+      textParts.push('\n');
+    }
+    if (formats.includes('metadata') && item.metadata) {
+      textParts.push('\n### Metadata\n');
+      if (item.metadata.jsonld && item.metadata.jsonld.length > 0) {
+        textParts.push('\n**JSON-LD:**\n');
+        const jsonldStr = JSON.stringify(item.metadata.jsonld, null, 2);
+        if (jsonldStr.length > 2000) {
+          textParts.push(jsonldStr.substring(0, 2000));
+          textParts.push('\n...(truncated for display, see structuredContent for full data)');
+        } else {
+          textParts.push(jsonldStr);
+        }
+        textParts.push('\n');
+      }
+      if (item.metadata.opengraph) {
+        textParts.push('\n**OpenGraph:**\n');
+        for (const [key, value] of Object.entries(item.metadata.opengraph)) {
+          textParts.push(`- ${key}: ${value}\n`);
+        }
+      }
+      if (item.metadata.twitter) {
+        textParts.push('\n**Twitter:**\n');
+        for (const [key, value] of Object.entries(item.metadata.twitter)) {
+          textParts.push(`- ${key}: ${value}\n`);
+        }
+      }
+    }
     textParts.push('\n---\n');
-    // Add to structured content with full content
+    // Add to structured content
     items.push({
       url: item.url,
       title: item.title,
-      content,
-      contentLength: content.length,
+      markdown: item.markdown,
+      html: item.html,
+      metadata: item.metadata,
     });
   }
@@ -137,7 +195,7 @@ export const formatContentsResponse = (
     ],
     structuredContent: {
       count: response.length,
-      format,
+      formats,
       items,
     },
   };

package/src/contents/register-contents-tool.ts CHANGED Viewed

@@ -32,12 +32,16 @@ export const registerContentsTool = ({
       try {
         // Validate and parse input
         const contentsQuery = ContentsQuerySchema.parse(toolInput);
-        const { urls, format = 'markdown' } = contentsQuery;
+        const { urls, formats, format, crawl_timeout } = contentsQuery;
+        // Handle backward compatibility: prefer formats array, fallback to format string, default to ['markdown']
+        const requestFormats = formats || (format ? [format] : ['markdown']);
         // Log the request
+        const timeoutInfo = crawl_timeout ? ` with timeout: ${crawl_timeout}s` : '';
         await logger({
           level: 'info',
-          data: `Contents API call initiated for ${urls.length} URL(s) with format: ${format}`,
+          data: `Contents API call initiated for ${urls.length} URL(s) with formats: ${requestFormats.join(', ')}${timeoutInfo}`,
         });
         // Fetch contents from API
@@ -48,7 +52,7 @@ export const registerContentsTool = ({
         });
         // Format response with full content
-        const { content, structuredContent } = formatContentsResponse(response, format);
+        const { content, structuredContent } = formatContentsResponse(response, requestFormats);
         // Log success
         await logger({

package/src/contents/tests/contents.utils.spec.ts CHANGED Viewed

@@ -88,7 +88,7 @@ describe('formatContentsResponse', () => {
       },
     ];
-    const result = formatContentsResponse(mockResponse, 'markdown');
+    const result = formatContentsResponse(mockResponse, ['markdown']);
     expect(result).toHaveProperty('content');
     expect(result).toHaveProperty('structuredContent');
@@ -99,12 +99,13 @@ describe('formatContentsResponse', () => {
     const text = result.content[0]?.text;
     expect(text).toContain('Example Page');
     expect(text).toContain('https://example.com');
-    expect(text).toContain('Format: markdown');
+    expect(text).toContain('Formats: markdown');
     expect(text).toContain('# Hello');
     expect(text).toContain('This is a test page with some content.');
     expect(result.structuredContent).toHaveProperty('count', 1);
-    expect(result.structuredContent).toHaveProperty('format', 'markdown');
+    expect(result.structuredContent).toHaveProperty('formats');
+    expect(result.structuredContent.formats).toEqual(['markdown']);
     expect(result.structuredContent.items).toHaveLength(1);
     const item = result.structuredContent.items[0];
@@ -112,8 +113,7 @@ describe('formatContentsResponse', () => {
     expect(item).toHaveProperty('url', 'https://example.com');
     expect(item).toHaveProperty('title', 'Example Page');
-    expect(item).toHaveProperty('content', '# Hello\n\nThis is a test page with some content.');
-    expect(item?.contentLength).toBe('# Hello\n\nThis is a test page with some content.'.length);
+    expect(item).toHaveProperty('markdown', '# Hello\n\nThis is a test page with some content.');
   });
   test('formats multiple items correctly', () => {
@@ -130,7 +130,7 @@ describe('formatContentsResponse', () => {
       },
     ];
-    const result = formatContentsResponse(mockResponse, 'markdown');
+    const result = formatContentsResponse(mockResponse, ['markdown']);
     expect(result.structuredContent.count).toBe(2);
     expect(result.structuredContent.items).toHaveLength(2);
@@ -151,11 +151,11 @@ describe('formatContentsResponse', () => {
       },
     ];
-    const result = formatContentsResponse(mockResponse, 'html');
+    const result = formatContentsResponse(mockResponse, ['html']);
-    expect(result.structuredContent.format).toBe('html');
+    expect(result.structuredContent.formats).toEqual(['html']);
     const text = result.content[0]?.text;
-    expect(text).toContain('Format: html');
+    expect(text).toContain('Formats: html');
     expect(text).toContain('<html>');
   });
@@ -169,16 +169,15 @@ describe('formatContentsResponse', () => {
       },
     ];
-    const result = formatContentsResponse(mockResponse, 'markdown');
+    const result = formatContentsResponse(mockResponse, ['markdown']);
     const text = result.content[0]?.text;
     // Full content should be included (not truncated)
     expect(text).toContain(longContent);
-    // Structured content should have full content and correct length
+    // Structured content should have full markdown content
     const item = result.structuredContent.items[0];
-    expect(item?.content).toBe(longContent);
-    expect(item?.contentLength).toBe(1000);
+    expect(item?.markdown).toBe(longContent);
   });
   test('handles empty content gracefully', () => {
@@ -190,11 +189,11 @@ describe('formatContentsResponse', () => {
       },
     ];
-    const result = formatContentsResponse(mockResponse, 'markdown');
+    const result = formatContentsResponse(mockResponse, ['markdown']);
-    expect(result.structuredContent.items[0]?.contentLength).toBe(0);
+    expect(result.structuredContent.items[0]?.markdown).toBe('');
     const text = result.content[0]?.text;
     expect(text).toContain('Empty Page');
-    expect(text).toContain('Content Length: 0 characters');
+    // Empty content should still be handled gracefully
   });
 });

package/src/search/tests/search.utils.spec.ts CHANGED Viewed

@@ -68,7 +68,6 @@ describe('fetchSearchResults', () => {
       expect(Array.isArray(webResult?.snippets)).toBe(true);
       // Test that news results have required properties
-      // biome-ignore lint/style/noNonNullAssertion: Test
       // const newsResult = result.results.news![0];
       // expect(newsResult).toHaveProperty('url');
       // expect(newsResult).toHaveProperty('title');

package/src/tests/tool.spec.ts CHANGED Viewed

@@ -500,11 +500,12 @@ describe('registerContentsTool', () => {
       const text = content[0]?.text;
       expect(text).toContain('Successfully extracted content');
       expect(text).toContain('https://documentation.you.com/developer-resources/mcp-server');
-      expect(text).toContain('Format: markdown');
+      expect(text).toContain('Formats: markdown');
       const structuredContent = result.structuredContent as ContentsStructuredContent;
       expect(structuredContent).toHaveProperty('count', 1);
-      expect(structuredContent).toHaveProperty('format', 'markdown');
+      expect(structuredContent).toHaveProperty('formats');
+      expect(structuredContent.formats).toEqual(['markdown']);
       expect(structuredContent).toHaveProperty('items');
       expect(structuredContent.items).toHaveLength(1);
@@ -512,10 +513,9 @@ describe('registerContentsTool', () => {
       expect(item).toBeDefined();
       expect(item).toHaveProperty('url', 'https://documentation.you.com/developer-resources/mcp-server');
-      expect(item).toHaveProperty('content');
-      expect(item).toHaveProperty('contentLength');
-      expect(typeof item?.content).toBe('string');
-      expect(item?.content.length).toBeGreaterThan(0);
+      expect(item).toHaveProperty('markdown');
+      expect(typeof item?.markdown).toBe('string');
+      expect(item?.markdown?.length).toBeGreaterThan(0);
     },
     { retry: 2 },
   );
@@ -557,11 +557,11 @@ describe('registerContentsTool', () => {
       });
       const structuredContent = result.structuredContent as ContentsStructuredContent;
-      expect(structuredContent.format).toBe('html');
+      expect(structuredContent.formats).toEqual(['html']);
       const content = result.content as { type: string; text: string }[];
       const text = content[0]?.text;
-      expect(text).toContain('Format: html');
+      expect(text).toContain('Formats: html');
     },
     { retry: 2 },
   );
@@ -577,7 +577,7 @@ describe('registerContentsTool', () => {
       });
       const structuredContent = result.structuredContent as ContentsStructuredContent;
-      expect(structuredContent.format).toBe('markdown');
+      expect(structuredContent.formats).toEqual(['markdown']);
     },
     { retry: 2 },
   );