@youdotcom-oss/mcp 1.4.1 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,20 @@
1
1
  import * as z from 'zod';
2
2
  /**
3
3
  * Input schema for the you-contents tool
4
- * Accepts an array of URLs and optional format
4
+ * Accepts an array of URLs, optional formats array (or legacy format string), and optional crawl timeout
5
5
  */
6
6
  export declare const ContentsQuerySchema: z.ZodObject<{
7
7
  urls: z.ZodArray<z.ZodString>;
8
- format: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
8
+ formats: z.ZodOptional<z.ZodArray<z.ZodEnum<{
9
9
  markdown: "markdown";
10
10
  html: "html";
11
+ metadata: "metadata";
11
12
  }>>>;
13
+ format: z.ZodOptional<z.ZodEnum<{
14
+ markdown: "markdown";
15
+ html: "html";
16
+ }>>;
17
+ crawl_timeout: z.ZodOptional<z.ZodNumber>;
12
18
  }, z.core.$strip>;
13
19
  export type ContentsQuery = z.infer<typeof ContentsQuerySchema>;
14
20
  /**
@@ -20,6 +26,11 @@ export declare const ContentsApiResponseSchema: z.ZodArray<z.ZodObject<{
20
26
  title: z.ZodOptional<z.ZodString>;
21
27
  html: z.ZodOptional<z.ZodString>;
22
28
  markdown: z.ZodOptional<z.ZodString>;
29
+ metadata: z.ZodOptional<z.ZodObject<{
30
+ jsonld: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
31
+ opengraph: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
32
+ twitter: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
33
+ }, z.core.$strip>>;
23
34
  }, z.core.$strip>>;
24
35
  export type ContentsApiResponse = z.infer<typeof ContentsApiResponseSchema>;
25
36
  /**
@@ -28,12 +39,17 @@ export type ContentsApiResponse = z.infer<typeof ContentsApiResponseSchema>;
28
39
  */
29
40
  export declare const ContentsStructuredContentSchema: z.ZodObject<{
30
41
  count: z.ZodNumber;
31
- format: z.ZodString;
42
+ formats: z.ZodArray<z.ZodString>;
32
43
  items: z.ZodArray<z.ZodObject<{
33
44
  url: z.ZodString;
34
45
  title: z.ZodOptional<z.ZodString>;
35
- content: z.ZodString;
36
- contentLength: z.ZodNumber;
46
+ markdown: z.ZodOptional<z.ZodString>;
47
+ html: z.ZodOptional<z.ZodString>;
48
+ metadata: z.ZodOptional<z.ZodObject<{
49
+ jsonld: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
50
+ opengraph: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
51
+ twitter: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
52
+ }, z.core.$strip>>;
37
53
  }, z.core.$strip>>;
38
54
  }, z.core.$strip>;
39
55
  export type ContentsStructuredContent = z.infer<typeof ContentsStructuredContentSchema>;
@@ -7,7 +7,7 @@ import { type ContentsApiResponse, type ContentsQuery, type ContentsStructuredCo
7
7
  * @param getUserAgent - Function to get User-Agent string
8
8
  * @returns Parsed and validated API response
9
9
  */
10
- export declare const fetchContents: ({ contentsQuery: { urls, format }, YDC_API_KEY, getUserAgent, }: {
10
+ export declare const fetchContents: ({ contentsQuery: { urls, formats, format, crawl_timeout }, YDC_API_KEY, getUserAgent, }: {
11
11
  contentsQuery: ContentsQuery;
12
12
  YDC_API_KEY?: string;
13
13
  getUserAgent: () => string;
@@ -16,10 +16,10 @@ export declare const fetchContents: ({ contentsQuery: { urls, format }, YDC_API_
16
16
  * Format contents API response for MCP output
17
17
  * Returns full content in both text and structured formats
18
18
  * @param response - Validated API response
19
- * @param format - Format used for extraction
19
+ * @param formats - Formats used for extraction
20
20
  * @returns Formatted response with content and structuredContent
21
21
  */
22
- export declare const formatContentsResponse: (response: ContentsApiResponse, format: string) => {
22
+ export declare const formatContentsResponse: (response: ContentsApiResponse, formats: string[]) => {
23
23
  content: Array<{
24
24
  type: "text";
25
25
  text: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@youdotcom-oss/mcp",
3
- "version": "1.4.1",
3
+ "version": "1.5.1",
4
4
  "description": "You.com API Model Context Protocol Server",
5
5
  "license": "MIT",
6
6
  "engines": {
@@ -71,7 +71,7 @@
71
71
  "zod": "^4.3.5",
72
72
  "@hono/mcp": "^0.2.3",
73
73
  "@modelcontextprotocol/sdk": "^1.25.2",
74
- "hono": "^4.11.3"
74
+ "hono": "^4.11.4"
75
75
  },
76
76
  "devDependencies": {
77
77
  "@modelcontextprotocol/inspector": "0.18.0"
@@ -2,18 +2,19 @@ import * as z from 'zod';
2
2
 
3
3
  /**
4
4
  * Input schema for the you-contents tool
5
- * Accepts an array of URLs and optional format
5
+ * Accepts an array of URLs, optional formats array (or legacy format string), and optional crawl timeout
6
6
  */
7
7
  export const ContentsQuerySchema = z.object({
8
8
  urls: z
9
9
  .array(z.string().url())
10
10
  .min(1)
11
11
  .describe('Array of webpage URLs to extract content from (e.g., ["https://example.com"])'),
12
- format: z
13
- .enum(['markdown', 'html'])
12
+ formats: z
13
+ .array(z.enum(['markdown', 'html', 'metadata']))
14
14
  .optional()
15
- .default('markdown')
16
- .describe('Output format: markdown (text) or html (layout)'),
15
+ .describe('Output formats: array of "markdown" (text), "html" (layout), or "metadata" (structured data)'),
16
+ format: z.enum(['markdown', 'html']).optional().describe('(Deprecated) Output format - use formats array instead'),
17
+ crawl_timeout: z.number().min(1).max(60).optional().describe('Optional timeout in seconds (1-60) for page crawling'),
17
18
  });
18
19
 
19
20
  export type ContentsQuery = z.infer<typeof ContentsQuerySchema>;
@@ -26,6 +27,14 @@ const ContentsItemSchema = z.object({
26
27
  title: z.string().optional().describe('Title'),
27
28
  html: z.string().optional().describe('HTML content'),
28
29
  markdown: z.string().optional().describe('Markdown content'),
30
+ metadata: z
31
+ .object({
32
+ jsonld: z.array(z.record(z.string(), z.unknown())).optional().describe('JSON-LD structured data (Schema.org)'),
33
+ opengraph: z.record(z.string(), z.string()).optional().describe('OpenGraph meta tags'),
34
+ twitter: z.record(z.string(), z.string()).optional().describe('Twitter Card metadata'),
35
+ })
36
+ .optional()
37
+ .describe('Structured metadata when available'),
29
38
  });
30
39
 
31
40
  /**
@@ -42,14 +51,22 @@ export type ContentsApiResponse = z.infer<typeof ContentsApiResponseSchema>;
42
51
  */
43
52
  export const ContentsStructuredContentSchema = z.object({
44
53
  count: z.number().describe('URLs processed'),
45
- format: z.string().describe('Content format'),
54
+ formats: z.array(z.string()).describe('Content formats requested'),
46
55
  items: z
47
56
  .array(
48
57
  z.object({
49
58
  url: z.string().describe('URL'),
50
59
  title: z.string().optional().describe('Title'),
51
- content: z.string().describe('Extracted content'),
52
- contentLength: z.number().describe('Content length'),
60
+ markdown: z.string().optional().describe('Markdown content'),
61
+ html: z.string().optional().describe('HTML content'),
62
+ metadata: z
63
+ .object({
64
+ jsonld: z.array(z.record(z.string(), z.unknown())).optional(),
65
+ opengraph: z.record(z.string(), z.string()).optional(),
66
+ twitter: z.record(z.string(), z.string()).optional(),
67
+ })
68
+ .optional()
69
+ .describe('Structured metadata'),
53
70
  }),
54
71
  )
55
72
  .describe('Extracted items'),
@@ -16,7 +16,7 @@ import {
16
16
  * @returns Parsed and validated API response
17
17
  */
18
18
  export const fetchContents = async ({
19
- contentsQuery: { urls, format = 'markdown' },
19
+ contentsQuery: { urls, formats, format, crawl_timeout },
20
20
  YDC_API_KEY = process.env.YDC_API_KEY,
21
21
  getUserAgent,
22
22
  }: {
@@ -28,6 +28,23 @@ export const fetchContents = async ({
28
28
  throw new Error('YDC_API_KEY is required for Contents API');
29
29
  }
30
30
 
31
+ // Handle backward compatibility: prefer formats array, fallback to format string, default to ['markdown']
32
+ const requestFormats = formats || (format ? [format] : ['markdown']);
33
+
34
+ // Build request body
35
+ const requestBody: {
36
+ urls: string[];
37
+ formats: string[];
38
+ crawl_timeout?: number;
39
+ } = {
40
+ urls,
41
+ formats: requestFormats,
42
+ };
43
+
44
+ if (crawl_timeout !== undefined) {
45
+ requestBody.crawl_timeout = crawl_timeout;
46
+ }
47
+
31
48
  // Make single API call with all URLs
32
49
  const options = {
33
50
  method: 'POST',
@@ -36,10 +53,7 @@ export const fetchContents = async ({
36
53
  'Content-Type': 'application/json',
37
54
  'User-Agent': getUserAgent(),
38
55
  }),
39
- body: JSON.stringify({
40
- urls,
41
- format,
42
- }),
56
+ body: JSON.stringify(requestBody),
43
57
  };
44
58
 
45
59
  const response = await fetch(CONTENTS_API_URL, options);
@@ -91,40 +105,84 @@ export const fetchContents = async ({
91
105
  * Format contents API response for MCP output
92
106
  * Returns full content in both text and structured formats
93
107
  * @param response - Validated API response
94
- * @param format - Format used for extraction
108
+ * @param formats - Formats used for extraction
95
109
  * @returns Formatted response with content and structuredContent
96
110
  */
97
111
  export const formatContentsResponse = (
98
112
  response: ContentsApiResponse,
99
- format: string,
113
+ formats: string[],
100
114
  ): {
101
115
  content: Array<{ type: 'text'; text: string }>;
102
116
  structuredContent: ContentsStructuredContent;
103
117
  } => {
104
118
  // Build text content with full extracted content
105
119
  const textParts: string[] = [`Successfully extracted content from ${response.length} URL(s):\n`];
120
+ textParts.push(`Formats: ${formats.join(', ')}\n`);
106
121
 
107
122
  const items: ContentsStructuredContent['items'] = [];
108
123
 
109
124
  for (const item of response) {
110
- const contentField = format === 'html' ? item.html : item.markdown;
111
- const content = contentField || '';
112
-
113
- // Add full content for this item
114
- textParts.push(`\n## ${item.title}`);
115
- textParts.push(`URL: ${item.url}`);
116
- textParts.push(`Format: ${format}`);
117
- textParts.push(`Content Length: ${content.length} characters\n`);
125
+ // Add header for this item
126
+ textParts.push(`\n## ${item.title || 'Untitled'}`);
127
+ textParts.push(`URL: ${item.url}\n`);
118
128
  textParts.push('---\n');
119
- textParts.push(content);
129
+
130
+ // Add content based on requested formats
131
+ if (formats.includes('markdown') && item.markdown) {
132
+ textParts.push('\n### Markdown Content\n');
133
+ textParts.push(item.markdown);
134
+ textParts.push('\n');
135
+ }
136
+
137
+ if (formats.includes('html') && item.html) {
138
+ textParts.push('\n### HTML Content\n');
139
+ textParts.push(`Length: ${item.html.length} characters\n`);
140
+ textParts.push(item.html.substring(0, 500));
141
+ if (item.html.length > 500) {
142
+ textParts.push('...\n(truncated for display)');
143
+ }
144
+ textParts.push('\n');
145
+ }
146
+
147
+ if (formats.includes('metadata') && item.metadata) {
148
+ textParts.push('\n### Metadata\n');
149
+
150
+ if (item.metadata.jsonld && item.metadata.jsonld.length > 0) {
151
+ textParts.push('\n**JSON-LD:**\n');
152
+ const jsonldStr = JSON.stringify(item.metadata.jsonld, null, 2);
153
+ if (jsonldStr.length > 2000) {
154
+ textParts.push(jsonldStr.substring(0, 2000));
155
+ textParts.push('\n...(truncated for display, see structuredContent for full data)');
156
+ } else {
157
+ textParts.push(jsonldStr);
158
+ }
159
+ textParts.push('\n');
160
+ }
161
+
162
+ if (item.metadata.opengraph) {
163
+ textParts.push('\n**OpenGraph:**\n');
164
+ for (const [key, value] of Object.entries(item.metadata.opengraph)) {
165
+ textParts.push(`- ${key}: ${value}\n`);
166
+ }
167
+ }
168
+
169
+ if (item.metadata.twitter) {
170
+ textParts.push('\n**Twitter:**\n');
171
+ for (const [key, value] of Object.entries(item.metadata.twitter)) {
172
+ textParts.push(`- ${key}: ${value}\n`);
173
+ }
174
+ }
175
+ }
176
+
120
177
  textParts.push('\n---\n');
121
178
 
122
- // Add to structured content with full content
179
+ // Add to structured content
123
180
  items.push({
124
181
  url: item.url,
125
182
  title: item.title,
126
- content,
127
- contentLength: content.length,
183
+ markdown: item.markdown,
184
+ html: item.html,
185
+ metadata: item.metadata,
128
186
  });
129
187
  }
130
188
 
@@ -137,7 +195,7 @@ export const formatContentsResponse = (
137
195
  ],
138
196
  structuredContent: {
139
197
  count: response.length,
140
- format,
198
+ formats,
141
199
  items,
142
200
  },
143
201
  };
@@ -32,12 +32,16 @@ export const registerContentsTool = ({
32
32
  try {
33
33
  // Validate and parse input
34
34
  const contentsQuery = ContentsQuerySchema.parse(toolInput);
35
- const { urls, format = 'markdown' } = contentsQuery;
35
+ const { urls, formats, format, crawl_timeout } = contentsQuery;
36
+
37
+ // Handle backward compatibility: prefer formats array, fallback to format string, default to ['markdown']
38
+ const requestFormats = formats || (format ? [format] : ['markdown']);
36
39
 
37
40
  // Log the request
41
+ const timeoutInfo = crawl_timeout ? ` with timeout: ${crawl_timeout}s` : '';
38
42
  await logger({
39
43
  level: 'info',
40
- data: `Contents API call initiated for ${urls.length} URL(s) with format: ${format}`,
44
+ data: `Contents API call initiated for ${urls.length} URL(s) with formats: ${requestFormats.join(', ')}${timeoutInfo}`,
41
45
  });
42
46
 
43
47
  // Fetch contents from API
@@ -48,7 +52,7 @@ export const registerContentsTool = ({
48
52
  });
49
53
 
50
54
  // Format response with full content
51
- const { content, structuredContent } = formatContentsResponse(response, format);
55
+ const { content, structuredContent } = formatContentsResponse(response, requestFormats);
52
56
 
53
57
  // Log success
54
58
  await logger({
@@ -88,7 +88,7 @@ describe('formatContentsResponse', () => {
88
88
  },
89
89
  ];
90
90
 
91
- const result = formatContentsResponse(mockResponse, 'markdown');
91
+ const result = formatContentsResponse(mockResponse, ['markdown']);
92
92
 
93
93
  expect(result).toHaveProperty('content');
94
94
  expect(result).toHaveProperty('structuredContent');
@@ -99,12 +99,13 @@ describe('formatContentsResponse', () => {
99
99
  const text = result.content[0]?.text;
100
100
  expect(text).toContain('Example Page');
101
101
  expect(text).toContain('https://example.com');
102
- expect(text).toContain('Format: markdown');
102
+ expect(text).toContain('Formats: markdown');
103
103
  expect(text).toContain('# Hello');
104
104
  expect(text).toContain('This is a test page with some content.');
105
105
 
106
106
  expect(result.structuredContent).toHaveProperty('count', 1);
107
- expect(result.structuredContent).toHaveProperty('format', 'markdown');
107
+ expect(result.structuredContent).toHaveProperty('formats');
108
+ expect(result.structuredContent.formats).toEqual(['markdown']);
108
109
  expect(result.structuredContent.items).toHaveLength(1);
109
110
 
110
111
  const item = result.structuredContent.items[0];
@@ -112,8 +113,7 @@ describe('formatContentsResponse', () => {
112
113
 
113
114
  expect(item).toHaveProperty('url', 'https://example.com');
114
115
  expect(item).toHaveProperty('title', 'Example Page');
115
- expect(item).toHaveProperty('content', '# Hello\n\nThis is a test page with some content.');
116
- expect(item?.contentLength).toBe('# Hello\n\nThis is a test page with some content.'.length);
116
+ expect(item).toHaveProperty('markdown', '# Hello\n\nThis is a test page with some content.');
117
117
  });
118
118
 
119
119
  test('formats multiple items correctly', () => {
@@ -130,7 +130,7 @@ describe('formatContentsResponse', () => {
130
130
  },
131
131
  ];
132
132
 
133
- const result = formatContentsResponse(mockResponse, 'markdown');
133
+ const result = formatContentsResponse(mockResponse, ['markdown']);
134
134
 
135
135
  expect(result.structuredContent.count).toBe(2);
136
136
  expect(result.structuredContent.items).toHaveLength(2);
@@ -151,11 +151,11 @@ describe('formatContentsResponse', () => {
151
151
  },
152
152
  ];
153
153
 
154
- const result = formatContentsResponse(mockResponse, 'html');
154
+ const result = formatContentsResponse(mockResponse, ['html']);
155
155
 
156
- expect(result.structuredContent.format).toBe('html');
156
+ expect(result.structuredContent.formats).toEqual(['html']);
157
157
  const text = result.content[0]?.text;
158
- expect(text).toContain('Format: html');
158
+ expect(text).toContain('Formats: html');
159
159
  expect(text).toContain('<html>');
160
160
  });
161
161
 
@@ -169,16 +169,15 @@ describe('formatContentsResponse', () => {
169
169
  },
170
170
  ];
171
171
 
172
- const result = formatContentsResponse(mockResponse, 'markdown');
172
+ const result = formatContentsResponse(mockResponse, ['markdown']);
173
173
 
174
174
  const text = result.content[0]?.text;
175
175
  // Full content should be included (not truncated)
176
176
  expect(text).toContain(longContent);
177
177
 
178
- // Structured content should have full content and correct length
178
+ // Structured content should have full markdown content
179
179
  const item = result.structuredContent.items[0];
180
- expect(item?.content).toBe(longContent);
181
- expect(item?.contentLength).toBe(1000);
180
+ expect(item?.markdown).toBe(longContent);
182
181
  });
183
182
 
184
183
  test('handles empty content gracefully', () => {
@@ -190,11 +189,11 @@ describe('formatContentsResponse', () => {
190
189
  },
191
190
  ];
192
191
 
193
- const result = formatContentsResponse(mockResponse, 'markdown');
192
+ const result = formatContentsResponse(mockResponse, ['markdown']);
194
193
 
195
- expect(result.structuredContent.items[0]?.contentLength).toBe(0);
194
+ expect(result.structuredContent.items[0]?.markdown).toBe('');
196
195
  const text = result.content[0]?.text;
197
196
  expect(text).toContain('Empty Page');
198
- expect(text).toContain('Content Length: 0 characters');
197
+ // Empty content should still be handled gracefully
199
198
  });
200
199
  });
@@ -68,7 +68,6 @@ describe('fetchSearchResults', () => {
68
68
  expect(Array.isArray(webResult?.snippets)).toBe(true);
69
69
 
70
70
  // Test that news results have required properties
71
- // biome-ignore lint/style/noNonNullAssertion: Test
72
71
  // const newsResult = result.results.news![0];
73
72
  // expect(newsResult).toHaveProperty('url');
74
73
  // expect(newsResult).toHaveProperty('title');
@@ -500,11 +500,12 @@ describe('registerContentsTool', () => {
500
500
  const text = content[0]?.text;
501
501
  expect(text).toContain('Successfully extracted content');
502
502
  expect(text).toContain('https://documentation.you.com/developer-resources/mcp-server');
503
- expect(text).toContain('Format: markdown');
503
+ expect(text).toContain('Formats: markdown');
504
504
 
505
505
  const structuredContent = result.structuredContent as ContentsStructuredContent;
506
506
  expect(structuredContent).toHaveProperty('count', 1);
507
- expect(structuredContent).toHaveProperty('format', 'markdown');
507
+ expect(structuredContent).toHaveProperty('formats');
508
+ expect(structuredContent.formats).toEqual(['markdown']);
508
509
  expect(structuredContent).toHaveProperty('items');
509
510
  expect(structuredContent.items).toHaveLength(1);
510
511
 
@@ -512,10 +513,9 @@ describe('registerContentsTool', () => {
512
513
  expect(item).toBeDefined();
513
514
 
514
515
  expect(item).toHaveProperty('url', 'https://documentation.you.com/developer-resources/mcp-server');
515
- expect(item).toHaveProperty('content');
516
- expect(item).toHaveProperty('contentLength');
517
- expect(typeof item?.content).toBe('string');
518
- expect(item?.content.length).toBeGreaterThan(0);
516
+ expect(item).toHaveProperty('markdown');
517
+ expect(typeof item?.markdown).toBe('string');
518
+ expect(item?.markdown?.length).toBeGreaterThan(0);
519
519
  },
520
520
  { retry: 2 },
521
521
  );
@@ -557,11 +557,11 @@ describe('registerContentsTool', () => {
557
557
  });
558
558
 
559
559
  const structuredContent = result.structuredContent as ContentsStructuredContent;
560
- expect(structuredContent.format).toBe('html');
560
+ expect(structuredContent.formats).toEqual(['html']);
561
561
 
562
562
  const content = result.content as { type: string; text: string }[];
563
563
  const text = content[0]?.text;
564
- expect(text).toContain('Format: html');
564
+ expect(text).toContain('Formats: html');
565
565
  },
566
566
  { retry: 2 },
567
567
  );
@@ -577,7 +577,7 @@ describe('registerContentsTool', () => {
577
577
  });
578
578
 
579
579
  const structuredContent = result.structuredContent as ContentsStructuredContent;
580
- expect(structuredContent.format).toBe('markdown');
580
+ expect(structuredContent.formats).toEqual(['markdown']);
581
581
  },
582
582
  { retry: 2 },
583
583
  );