@youdotcom-oss/mcp 3.1.0 → 3.2.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/stdio.js +20005 -6467
- package/package.json +12 -6
- package/src/contents/contents.schemas.ts +30 -0
- package/src/contents/contents.utils.ts +85 -0
- package/src/contents/register-contents-tool.ts +93 -0
- package/src/contents/tests/contents.utils.spec.ts +123 -0
- package/src/get-mcp-server.ts +17 -0
- package/src/main.ts +8 -0
- package/src/research/register-research-tool.ts +69 -0
- package/src/research/research.schemas.ts +19 -0
- package/src/research/research.utils.ts +30 -0
- package/src/research/tests/research.utils.spec.ts +114 -0
- package/src/search/register-search-tool.ts +87 -0
- package/src/search/search.schemas.ts +38 -0
- package/src/search/search.utils.ts +70 -0
- package/src/search/tests/search.utils.spec.ts +156 -0
- package/src/shared/format-search-results-text.ts +49 -0
- package/src/shared/get-logger.ts +19 -0
- package/src/shared/tests/shared.utils.spec.ts +160 -0
- package/src/shared/use-client-version.ts +21 -0
- package/src/stdio-server.ts +24 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@youdotcom-oss/mcp",
|
|
3
|
-
"version": "3.
|
|
4
|
-
"description": "You.com MCP
|
|
3
|
+
"version": "3.2.0-next.1",
|
|
4
|
+
"description": "You.com MCP server — web search, AI research, and content extraction via You.com APIs",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"engines": {
|
|
7
7
|
"node": ">=18",
|
|
@@ -21,25 +21,29 @@
|
|
|
21
21
|
"mcp",
|
|
22
22
|
"web search",
|
|
23
23
|
"model context protocol",
|
|
24
|
-
"
|
|
24
|
+
"ai research"
|
|
25
25
|
],
|
|
26
26
|
"bin": "bin/stdio.js",
|
|
27
27
|
"type": "module",
|
|
28
|
+
"exports": {
|
|
29
|
+
".": "./src/main.ts"
|
|
30
|
+
},
|
|
28
31
|
"files": [
|
|
29
32
|
"bin/stdio.js",
|
|
33
|
+
"src",
|
|
30
34
|
"server.json"
|
|
31
35
|
],
|
|
32
36
|
"publishConfig": {
|
|
33
37
|
"access": "public"
|
|
34
38
|
},
|
|
35
39
|
"scripts": {
|
|
36
|
-
"build": "bun build ./src/stdio-
|
|
40
|
+
"build": "bun build ./src/stdio-server.ts --outfile ./bin/stdio.js --target=node",
|
|
37
41
|
"check": "bun run check:biome && bun run check:types && bun run check:package",
|
|
38
42
|
"check:biome": "biome check",
|
|
39
43
|
"check:package": "format-package --check",
|
|
40
44
|
"check:types": "tsc --noEmit",
|
|
41
45
|
"check:write": "biome check --write && bun run format:package",
|
|
42
|
-
"dev": "bun src/stdio-
|
|
46
|
+
"dev": "bun src/stdio-server.ts",
|
|
43
47
|
"format:package": "format-package --write",
|
|
44
48
|
"inspector": "test -n \"$YDC_API_KEY\" || (echo 'YDC_API_KEY is not set' && exit 1); mcp-inspector --config mcp-inspector.json -e YDC_API_KEY=$YDC_API_KEY",
|
|
45
49
|
"prepublishOnly": "bun run build",
|
|
@@ -48,7 +52,9 @@
|
|
|
48
52
|
},
|
|
49
53
|
"mcpName": "io.github.youdotcom-oss/mcp",
|
|
50
54
|
"dependencies": {
|
|
51
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
55
|
+
"@modelcontextprotocol/sdk": "^1.28.0",
|
|
56
|
+
"@youdotcom-oss/api": "0.5.1",
|
|
57
|
+
"zod": "^4.3.6"
|
|
52
58
|
},
|
|
53
59
|
"devDependencies": {
|
|
54
60
|
"@modelcontextprotocol/inspector": "0.21.1"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import * as z from 'zod'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Structured content schema for MCP response
|
|
5
|
+
* Includes full content and metadata for each URL
|
|
6
|
+
*/
|
|
7
|
+
export const ContentsStructuredContentSchema = z.object({
|
|
8
|
+
count: z.number().describe('URLs processed'),
|
|
9
|
+
formats: z.array(z.string()).describe('Content formats requested'),
|
|
10
|
+
items: z
|
|
11
|
+
.array(
|
|
12
|
+
z.object({
|
|
13
|
+
url: z.string().describe('URL'),
|
|
14
|
+
title: z.string().optional().describe('Title'),
|
|
15
|
+
markdown: z.string().optional().describe('Markdown content'),
|
|
16
|
+
html: z.string().optional().describe('HTML content'),
|
|
17
|
+
metadata: z
|
|
18
|
+
.object({
|
|
19
|
+
favicon_url: z.string().describe('Favicon URL'),
|
|
20
|
+
site_name: z.string().optional().nullable().describe('Site name'),
|
|
21
|
+
})
|
|
22
|
+
.optional()
|
|
23
|
+
.nullable()
|
|
24
|
+
.describe('Page metadata'),
|
|
25
|
+
}),
|
|
26
|
+
)
|
|
27
|
+
.describe('Extracted items'),
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
export type ContentsStructuredContent = z.infer<typeof ContentsStructuredContentSchema>
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import type { ContentsApiResponse } from '@youdotcom-oss/api'
|
|
2
|
+
import type { ContentsStructuredContent } from './contents.schemas.ts'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Format contents API response for MCP output
|
|
6
|
+
* Returns full content in both text and structured formats
|
|
7
|
+
* @param response - Validated API response
|
|
8
|
+
* @param formats - Formats used for extraction
|
|
9
|
+
* @returns Formatted response with content and structuredContent
|
|
10
|
+
*/
|
|
11
|
+
export const formatContentsResponse = (
|
|
12
|
+
response: ContentsApiResponse,
|
|
13
|
+
formats: string[],
|
|
14
|
+
): {
|
|
15
|
+
content: Array<{ type: 'text'; text: string }>
|
|
16
|
+
structuredContent: ContentsStructuredContent
|
|
17
|
+
} => {
|
|
18
|
+
// Build text content with full extracted content
|
|
19
|
+
const textParts: string[] = [`Successfully extracted content from ${response.length} URL(s):\n`]
|
|
20
|
+
textParts.push(`Formats: ${formats.join(', ')}\n`)
|
|
21
|
+
|
|
22
|
+
const items: ContentsStructuredContent['items'] = []
|
|
23
|
+
|
|
24
|
+
for (const item of response) {
|
|
25
|
+
// Add header for this item
|
|
26
|
+
textParts.push(`\n## ${item.title || 'Untitled'}`)
|
|
27
|
+
textParts.push(`URL: ${item.url}\n`)
|
|
28
|
+
textParts.push('---\n')
|
|
29
|
+
|
|
30
|
+
// Add content based on requested formats
|
|
31
|
+
if (formats.includes('markdown') && item.markdown) {
|
|
32
|
+
textParts.push('\n### Markdown Content\n')
|
|
33
|
+
textParts.push(item.markdown)
|
|
34
|
+
textParts.push('\n')
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (formats.includes('html') && item.html) {
|
|
38
|
+
// Text output is a brief preview only — full HTML is in structuredContent.items[].html
|
|
39
|
+
textParts.push('\n### HTML Content\n')
|
|
40
|
+
textParts.push(`Length: ${item.html.length} characters\n`)
|
|
41
|
+
textParts.push(item.html.substring(0, 500))
|
|
42
|
+
if (item.html.length > 500) {
|
|
43
|
+
textParts.push('...\n(truncated for display — full HTML available in structuredContent)')
|
|
44
|
+
}
|
|
45
|
+
textParts.push('\n')
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (formats.includes('metadata') && item.metadata) {
|
|
49
|
+
textParts.push('\n### Metadata\n')
|
|
50
|
+
|
|
51
|
+
if (item.metadata.site_name) {
|
|
52
|
+
textParts.push(`**Site Name:** ${item.metadata.site_name}\n`)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (item.metadata.favicon_url) {
|
|
56
|
+
textParts.push(`**Favicon:** ${item.metadata.favicon_url}\n`)
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
textParts.push('\n---\n')
|
|
61
|
+
|
|
62
|
+
// Add to structured content
|
|
63
|
+
items.push({
|
|
64
|
+
url: item.url,
|
|
65
|
+
title: item.title ?? undefined,
|
|
66
|
+
markdown: item.markdown ?? undefined,
|
|
67
|
+
html: item.html ?? undefined,
|
|
68
|
+
metadata: item.metadata ?? undefined,
|
|
69
|
+
})
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
content: [
|
|
74
|
+
{
|
|
75
|
+
type: 'text',
|
|
76
|
+
text: textParts.join('\n'),
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
structuredContent: {
|
|
80
|
+
count: response.length,
|
|
81
|
+
formats,
|
|
82
|
+
items,
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
|
2
|
+
import { ContentsQuerySchema, fetchContents, generateErrorReportLink } from '@youdotcom-oss/api'
|
|
3
|
+
import { getLogger } from '../shared/get-logger.ts'
|
|
4
|
+
import { ContentsStructuredContentSchema } from './contents.schemas.ts'
|
|
5
|
+
import { formatContentsResponse } from './contents.utils.ts'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Register the you-contents tool with the MCP server
|
|
9
|
+
* Extracts and returns full content from multiple URLs in markdown or HTML format
|
|
10
|
+
*/
|
|
11
|
+
export const registerContentsTool = ({
|
|
12
|
+
mcp,
|
|
13
|
+
YDC_API_KEY,
|
|
14
|
+
getUserAgent,
|
|
15
|
+
}: {
|
|
16
|
+
mcp: McpServer
|
|
17
|
+
YDC_API_KEY?: string
|
|
18
|
+
getUserAgent: () => string
|
|
19
|
+
}) => {
|
|
20
|
+
// Register the tool
|
|
21
|
+
mcp.registerTool(
|
|
22
|
+
'you-contents',
|
|
23
|
+
{
|
|
24
|
+
title: 'Extract Web Page Contents',
|
|
25
|
+
description: 'Extract page content in markdown or HTML',
|
|
26
|
+
inputSchema: ContentsQuerySchema.shape,
|
|
27
|
+
outputSchema: ContentsStructuredContentSchema.shape,
|
|
28
|
+
},
|
|
29
|
+
async (contentsQuery, { sendNotification }) => {
|
|
30
|
+
const logger = getLogger(sendNotification)
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
// Validate and parse input
|
|
34
|
+
const { urls, formats, format, crawl_timeout } = contentsQuery
|
|
35
|
+
|
|
36
|
+
// Handle backward compatibility: prefer formats array, fallback to format string, default to ['markdown']
|
|
37
|
+
const requestFormats = formats || (format ? [format] : ['markdown'])
|
|
38
|
+
|
|
39
|
+
// Log the request
|
|
40
|
+
const timeoutInfo = crawl_timeout ? ` with timeout: ${crawl_timeout}s` : ''
|
|
41
|
+
await logger({
|
|
42
|
+
level: 'info',
|
|
43
|
+
data: `Contents API call initiated for ${urls.length} URL(s) with formats: ${requestFormats.join(', ')}${timeoutInfo}`,
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
// Fetch contents from API
|
|
47
|
+
const response = await fetchContents({
|
|
48
|
+
contentsQuery,
|
|
49
|
+
YDC_API_KEY,
|
|
50
|
+
getUserAgent,
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
// Format response with full content
|
|
54
|
+
const { content, structuredContent } = formatContentsResponse(response, requestFormats)
|
|
55
|
+
|
|
56
|
+
// Log success
|
|
57
|
+
await logger({
|
|
58
|
+
level: 'info',
|
|
59
|
+
data: `Contents API call successful: extracted ${response.length} page(s)`,
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
content,
|
|
64
|
+
structuredContent,
|
|
65
|
+
}
|
|
66
|
+
} catch (err: unknown) {
|
|
67
|
+
// Handle and log errors
|
|
68
|
+
const errorMessage = err instanceof Error ? err.message : String(err)
|
|
69
|
+
const reportLink = generateErrorReportLink({
|
|
70
|
+
errorMessage,
|
|
71
|
+
tool: 'you-contents',
|
|
72
|
+
clientInfo: getUserAgent(),
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
await logger({
|
|
76
|
+
level: 'error',
|
|
77
|
+
data: `Contents API call failed: ${errorMessage}\n\nReport this issue: ${reportLink}`,
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
content: [
|
|
82
|
+
{
|
|
83
|
+
type: 'text' as const,
|
|
84
|
+
text: `Error extracting contents: ${errorMessage}`,
|
|
85
|
+
},
|
|
86
|
+
],
|
|
87
|
+
structuredContent: undefined,
|
|
88
|
+
isError: true,
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
)
|
|
93
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { describe, expect, test } from 'bun:test'
|
|
2
|
+
import type { ContentsApiResponse } from '@youdotcom-oss/api'
|
|
3
|
+
import { formatContentsResponse } from '../contents.utils.ts'
|
|
4
|
+
|
|
5
|
+
describe('formatContentsResponse', () => {
|
|
6
|
+
test('formats single markdown content correctly', () => {
|
|
7
|
+
const mockResponse: ContentsApiResponse = [
|
|
8
|
+
{
|
|
9
|
+
url: 'https://example.com',
|
|
10
|
+
title: 'Example Page',
|
|
11
|
+
markdown: '# Hello\n\nThis is a test page with some content.',
|
|
12
|
+
},
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
const result = formatContentsResponse(mockResponse, ['markdown'])
|
|
16
|
+
|
|
17
|
+
expect(result).toHaveProperty('content')
|
|
18
|
+
expect(result).toHaveProperty('structuredContent')
|
|
19
|
+
expect(Array.isArray(result.content)).toBe(true)
|
|
20
|
+
expect(result.content[0]).toHaveProperty('type', 'text')
|
|
21
|
+
expect(result.content[0]).toHaveProperty('text')
|
|
22
|
+
|
|
23
|
+
const text = result.content[0]?.text
|
|
24
|
+
expect(text).toContain('Example Page')
|
|
25
|
+
expect(text).toContain('https://example.com')
|
|
26
|
+
expect(text).toContain('Formats: markdown')
|
|
27
|
+
expect(text).toContain('# Hello')
|
|
28
|
+
expect(text).toContain('This is a test page with some content.')
|
|
29
|
+
|
|
30
|
+
expect(result.structuredContent).toHaveProperty('count', 1)
|
|
31
|
+
expect(result.structuredContent).toHaveProperty('formats')
|
|
32
|
+
expect(result.structuredContent.formats).toEqual(['markdown'])
|
|
33
|
+
expect(result.structuredContent.items).toHaveLength(1)
|
|
34
|
+
|
|
35
|
+
const item = result.structuredContent.items[0]
|
|
36
|
+
expect(item).toBeDefined()
|
|
37
|
+
|
|
38
|
+
expect(item).toHaveProperty('url', 'https://example.com')
|
|
39
|
+
expect(item).toHaveProperty('title', 'Example Page')
|
|
40
|
+
expect(item).toHaveProperty('markdown', '# Hello\n\nThis is a test page with some content.')
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
test('formats multiple items correctly', () => {
|
|
44
|
+
const mockResponse: ContentsApiResponse = [
|
|
45
|
+
{
|
|
46
|
+
url: 'https://example1.com',
|
|
47
|
+
title: 'Page 1',
|
|
48
|
+
markdown: 'Content 1',
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
url: 'https://example2.com',
|
|
52
|
+
title: 'Page 2',
|
|
53
|
+
markdown: 'Content 2',
|
|
54
|
+
},
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
const result = formatContentsResponse(mockResponse, ['markdown'])
|
|
58
|
+
|
|
59
|
+
expect(result.structuredContent.count).toBe(2)
|
|
60
|
+
expect(result.structuredContent.items).toHaveLength(2)
|
|
61
|
+
|
|
62
|
+
const text = result.content[0]?.text
|
|
63
|
+
expect(text).toContain('Page 1')
|
|
64
|
+
expect(text).toContain('Page 2')
|
|
65
|
+
expect(text).toContain('https://example1.com')
|
|
66
|
+
expect(text).toContain('https://example2.com')
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
test('handles html format', () => {
|
|
70
|
+
const mockResponse: ContentsApiResponse = [
|
|
71
|
+
{
|
|
72
|
+
url: 'https://example.com',
|
|
73
|
+
title: 'HTML Page',
|
|
74
|
+
html: '<html><body><h1>Hello</h1></body></html>',
|
|
75
|
+
},
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
const result = formatContentsResponse(mockResponse, ['html'])
|
|
79
|
+
|
|
80
|
+
expect(result.structuredContent.formats).toEqual(['html'])
|
|
81
|
+
const text = result.content[0]?.text
|
|
82
|
+
expect(text).toContain('Formats: html')
|
|
83
|
+
expect(text).toContain('<html>')
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
test('includes full content for long text', () => {
|
|
87
|
+
const longContent = 'a'.repeat(1000)
|
|
88
|
+
const mockResponse: ContentsApiResponse = [
|
|
89
|
+
{
|
|
90
|
+
url: 'https://example.com',
|
|
91
|
+
title: 'Long Page',
|
|
92
|
+
markdown: longContent,
|
|
93
|
+
},
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
const result = formatContentsResponse(mockResponse, ['markdown'])
|
|
97
|
+
|
|
98
|
+
const text = result.content[0]?.text
|
|
99
|
+
// Full content should be included (not truncated)
|
|
100
|
+
expect(text).toContain(longContent)
|
|
101
|
+
|
|
102
|
+
// Structured content should have full markdown content
|
|
103
|
+
const item = result.structuredContent.items[0]
|
|
104
|
+
expect(item?.markdown).toBe(longContent)
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
test('handles empty content gracefully', () => {
|
|
108
|
+
const mockResponse: ContentsApiResponse = [
|
|
109
|
+
{
|
|
110
|
+
url: 'https://example.com',
|
|
111
|
+
title: 'Empty Page',
|
|
112
|
+
markdown: '',
|
|
113
|
+
},
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
const result = formatContentsResponse(mockResponse, ['markdown'])
|
|
117
|
+
|
|
118
|
+
expect(result.structuredContent.items[0]?.markdown).toBe('')
|
|
119
|
+
const text = result.content[0]?.text
|
|
120
|
+
expect(text).toContain('Empty Page')
|
|
121
|
+
// Empty content should still be handled gracefully
|
|
122
|
+
})
|
|
123
|
+
})
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
|
2
|
+
import packageJson from '../package.json' with { type: 'json' }
|
|
3
|
+
|
|
4
|
+
export const getMcpServer = () =>
|
|
5
|
+
new McpServer(
|
|
6
|
+
{
|
|
7
|
+
name: 'You.com',
|
|
8
|
+
version: packageJson.version,
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
capabilities: {
|
|
12
|
+
logging: {},
|
|
13
|
+
tools: { listChanged: true },
|
|
14
|
+
},
|
|
15
|
+
instructions: `Use this server to search the web, get AI-powered answers with web context, and extract content from web pages using You.com. The you-contents tool extracts page content and returns it in markdown or HTML format. Use HTML format for layout preservation, interactive content, and visual fidelity; use markdown for text extraction and simpler consumption.`,
|
|
16
|
+
},
|
|
17
|
+
)
|
package/src/main.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export type { ContentsStructuredContent } from './contents/contents.schemas.ts'
|
|
2
|
+
export { registerContentsTool } from './contents/register-contents-tool.ts'
|
|
3
|
+
export { getMcpServer } from './get-mcp-server.ts'
|
|
4
|
+
export { registerResearchTool } from './research/register-research-tool.ts'
|
|
5
|
+
export type { ResearchStructuredContent } from './research/research.schemas.ts'
|
|
6
|
+
export { registerSearchTool } from './search/register-search-tool.ts'
|
|
7
|
+
export type { SearchStructuredContent } from './search/search.schemas.ts'
|
|
8
|
+
export { useGetClientVersion } from './shared/use-client-version.ts'
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
|
|
2
|
+
import { callResearch, generateErrorReportLink, ResearchQuerySchema } from '@youdotcom-oss/api'
|
|
3
|
+
import { getLogger } from '../shared/get-logger.ts'
|
|
4
|
+
import { ResearchStructuredContentSchema } from './research.schemas.ts'
|
|
5
|
+
import { formatResearchResults } from './research.utils.ts'
|
|
6
|
+
|
|
7
|
+
export const registerResearchTool = ({
|
|
8
|
+
mcp,
|
|
9
|
+
YDC_API_KEY,
|
|
10
|
+
getUserAgent,
|
|
11
|
+
}: {
|
|
12
|
+
mcp: McpServer
|
|
13
|
+
YDC_API_KEY?: string
|
|
14
|
+
getUserAgent: () => string
|
|
15
|
+
}) => {
|
|
16
|
+
mcp.registerTool(
|
|
17
|
+
'you-research',
|
|
18
|
+
{
|
|
19
|
+
title: 'Research',
|
|
20
|
+
description:
|
|
21
|
+
'Research a topic with comprehensive answers and cited sources. Configurable effort levels (lite, standard, deep, exhaustive).',
|
|
22
|
+
inputSchema: ResearchQuerySchema.shape,
|
|
23
|
+
outputSchema: ResearchStructuredContentSchema.shape,
|
|
24
|
+
},
|
|
25
|
+
async (researchQuery, { sendNotification }) => {
|
|
26
|
+
const logger = getLogger(sendNotification)
|
|
27
|
+
try {
|
|
28
|
+
const response = await callResearch({
|
|
29
|
+
researchQuery,
|
|
30
|
+
YDC_API_KEY,
|
|
31
|
+
getUserAgent,
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
const sourceCount = response.output.sources.length
|
|
35
|
+
|
|
36
|
+
await logger({
|
|
37
|
+
level: 'info',
|
|
38
|
+
data: `Research for "${researchQuery.input.substring(0, 100)}" complete: ${sourceCount} source(s)`,
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
const { content, structuredContent } = formatResearchResults(response)
|
|
42
|
+
return { content, structuredContent }
|
|
43
|
+
} catch (err: unknown) {
|
|
44
|
+
const errorMessage = err instanceof Error ? err.message : String(err)
|
|
45
|
+
const reportLink = generateErrorReportLink({
|
|
46
|
+
errorMessage,
|
|
47
|
+
tool: 'you-research',
|
|
48
|
+
clientInfo: getUserAgent(),
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
await logger({
|
|
52
|
+
level: 'error',
|
|
53
|
+
data: `Research API call failed: ${errorMessage}\n\nReport this issue: ${reportLink}`,
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
content: [
|
|
58
|
+
{
|
|
59
|
+
type: 'text' as const,
|
|
60
|
+
text: `Error: ${errorMessage}`,
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
structuredContent: undefined,
|
|
64
|
+
isError: true,
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import * as z from 'zod'
|
|
2
|
+
|
|
3
|
+
// Minimal schema for structuredContent (reduces payload duplication)
|
|
4
|
+
// Full research content is in the text content field
|
|
5
|
+
export const ResearchStructuredContentSchema = z.object({
|
|
6
|
+
contentType: z.string().describe('Format of the content field'),
|
|
7
|
+
sourceCount: z.number().describe('Number of sources used'),
|
|
8
|
+
sources: z
|
|
9
|
+
.array(
|
|
10
|
+
z.object({
|
|
11
|
+
url: z.string().describe('Source URL'),
|
|
12
|
+
title: z.string().optional().describe('Source title'),
|
|
13
|
+
snippetCount: z.number().describe('Number of excerpts from this source'),
|
|
14
|
+
}),
|
|
15
|
+
)
|
|
16
|
+
.describe('Sources used in the research answer'),
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
export type ResearchStructuredContent = z.infer<typeof ResearchStructuredContentSchema>
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { ResearchResponse } from '@youdotcom-oss/api'
|
|
2
|
+
import { formatResearchResponse } from '@youdotcom-oss/api'
|
|
3
|
+
import type { ResearchStructuredContent } from './research.schemas.ts'
|
|
4
|
+
|
|
5
|
+
export const formatResearchResults = (
|
|
6
|
+
response: ResearchResponse,
|
|
7
|
+
): {
|
|
8
|
+
content: Array<{ type: 'text'; text: string }>
|
|
9
|
+
structuredContent: ResearchStructuredContent
|
|
10
|
+
} => {
|
|
11
|
+
const text = formatResearchResponse(response)
|
|
12
|
+
|
|
13
|
+
return {
|
|
14
|
+
content: [
|
|
15
|
+
{
|
|
16
|
+
type: 'text',
|
|
17
|
+
text,
|
|
18
|
+
},
|
|
19
|
+
],
|
|
20
|
+
structuredContent: {
|
|
21
|
+
contentType: response.output.content_type,
|
|
22
|
+
sourceCount: response.output.sources.length,
|
|
23
|
+
sources: response.output.sources.map((source) => ({
|
|
24
|
+
url: source.url,
|
|
25
|
+
title: source.title,
|
|
26
|
+
snippetCount: source.snippets?.length ?? 0,
|
|
27
|
+
})),
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { describe, expect, test } from 'bun:test'
|
|
2
|
+
import type { ResearchResponse } from '@youdotcom-oss/api'
|
|
3
|
+
import { formatResearchResults } from '../research.utils.ts'
|
|
4
|
+
|
|
5
|
+
describe('formatResearchResults', () => {
|
|
6
|
+
test('formats research response with sources correctly', () => {
|
|
7
|
+
const mockResponse: ResearchResponse = {
|
|
8
|
+
output: {
|
|
9
|
+
content: '# Research Answer\n\nThis is a comprehensive answer about the topic.',
|
|
10
|
+
content_type: 'text',
|
|
11
|
+
sources: [
|
|
12
|
+
{
|
|
13
|
+
url: 'https://example.com/source1',
|
|
14
|
+
title: 'Source One',
|
|
15
|
+
snippets: ['First snippet', 'Second snippet'],
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
url: 'https://example.com/source2',
|
|
19
|
+
title: 'Source Two',
|
|
20
|
+
snippets: ['Another snippet'],
|
|
21
|
+
},
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const result = formatResearchResults(mockResponse)
|
|
27
|
+
|
|
28
|
+
expect(result).toHaveProperty('content')
|
|
29
|
+
expect(result).toHaveProperty('structuredContent')
|
|
30
|
+
expect(Array.isArray(result.content)).toBe(true)
|
|
31
|
+
expect(result.content[0]).toHaveProperty('type', 'text')
|
|
32
|
+
expect(result.content[0]).toHaveProperty('text')
|
|
33
|
+
|
|
34
|
+
const text = result.content[0]?.text
|
|
35
|
+
expect(text).toContain('Research Answer')
|
|
36
|
+
expect(text).toContain('Source One')
|
|
37
|
+
expect(text).toContain('https://example.com/source1')
|
|
38
|
+
|
|
39
|
+
expect(result.structuredContent.contentType).toBe('text')
|
|
40
|
+
expect(result.structuredContent.sourceCount).toBe(2)
|
|
41
|
+
expect(result.structuredContent.sources).toHaveLength(2)
|
|
42
|
+
expect(result.structuredContent.sources[0]).toMatchObject({
|
|
43
|
+
url: 'https://example.com/source1',
|
|
44
|
+
title: 'Source One',
|
|
45
|
+
snippetCount: 2,
|
|
46
|
+
})
|
|
47
|
+
expect(result.structuredContent.sources[1]).toMatchObject({
|
|
48
|
+
url: 'https://example.com/source2',
|
|
49
|
+
title: 'Source Two',
|
|
50
|
+
snippetCount: 1,
|
|
51
|
+
})
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
test('handles source with undefined title', () => {
|
|
55
|
+
const mockResponse: ResearchResponse = {
|
|
56
|
+
output: {
|
|
57
|
+
content: 'Answer text',
|
|
58
|
+
content_type: 'text',
|
|
59
|
+
sources: [
|
|
60
|
+
{
|
|
61
|
+
url: 'https://example.com/no-title',
|
|
62
|
+
snippets: ['A snippet'],
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const result = formatResearchResults(mockResponse)
|
|
69
|
+
|
|
70
|
+
expect(result.structuredContent.sourceCount).toBe(1)
|
|
71
|
+
expect(result.structuredContent.sources[0]).toMatchObject({
|
|
72
|
+
url: 'https://example.com/no-title',
|
|
73
|
+
title: undefined,
|
|
74
|
+
snippetCount: 1,
|
|
75
|
+
})
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
test('handles source with empty snippets array', () => {
|
|
79
|
+
const mockResponse: ResearchResponse = {
|
|
80
|
+
output: {
|
|
81
|
+
content: 'Answer with no-snippet source',
|
|
82
|
+
content_type: 'text',
|
|
83
|
+
sources: [
|
|
84
|
+
{
|
|
85
|
+
url: 'https://example.com/empty-snippets',
|
|
86
|
+
title: 'Empty Snippets Source',
|
|
87
|
+
snippets: [],
|
|
88
|
+
},
|
|
89
|
+
],
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const result = formatResearchResults(mockResponse)
|
|
94
|
+
|
|
95
|
+
expect(result.structuredContent.sourceCount).toBe(1)
|
|
96
|
+
expect(result.structuredContent.sources[0]?.snippetCount).toBe(0)
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
test('handles response with zero sources', () => {
|
|
100
|
+
const mockResponse: ResearchResponse = {
|
|
101
|
+
output: {
|
|
102
|
+
content: 'An answer with no cited sources.',
|
|
103
|
+
content_type: 'text',
|
|
104
|
+
sources: [],
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const result = formatResearchResults(mockResponse)
|
|
109
|
+
|
|
110
|
+
expect(result.structuredContent.sourceCount).toBe(0)
|
|
111
|
+
expect(result.structuredContent.sources).toHaveLength(0)
|
|
112
|
+
expect(result.content[0]?.text).toContain('An answer with no cited sources.')
|
|
113
|
+
})
|
|
114
|
+
})
|