@youdotcom-oss/mcp 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/stdio.js +70 -165
- package/package.json +4 -3
- package/server.json +3 -3
- package/src/contents/contents.utils.ts +8 -34
- package/src/contents/register-contents-tool.ts +15 -13
- package/src/contents/tests/contents.utils.spec.ts +9 -33
- package/src/main.ts +0 -3
- package/src/research/register-research-tool.ts +5 -6
- package/src/research/research.utils.ts +6 -24
- package/src/research/tests/research.utils.spec.ts +5 -68
- package/src/search/register-search-tool.ts +13 -14
- package/src/search/search.utils.ts +7 -50
- package/src/search/tests/register-search-tool.spec.ts +119 -0
- package/src/search/tests/search.utils.spec.ts +104 -68
- package/src/shared/format-search-results-text.ts +20 -0
- package/src/shared/tests/format-search-results-text.spec.ts +95 -0
- package/src/contents/contents.schemas.ts +0 -30
- package/src/research/research.schemas.ts +0 -19
- package/src/search/search.schemas.ts +0 -38
|
@@ -27,31 +27,13 @@ describe('formatSearchResults', () => {
|
|
|
27
27
|
|
|
28
28
|
const result = formatSearchResults(mockResponse)
|
|
29
29
|
|
|
30
|
-
expect(result).
|
|
31
|
-
expect(result).toHaveProperty('
|
|
32
|
-
expect(result).toHaveProperty('
|
|
33
|
-
expect(
|
|
34
|
-
expect(result
|
|
35
|
-
expect(result
|
|
36
|
-
expect(result
|
|
37
|
-
expect(result.content[0]?.text).toContain('Test Title')
|
|
38
|
-
// URL and page_age should be in text content
|
|
39
|
-
expect(result.content[0]?.text).toContain('URL: https://example.com')
|
|
40
|
-
expect(result.content[0]?.text).toContain('Published: 2023-01-01T00:00:00')
|
|
41
|
-
expect(result.structuredContent).toHaveProperty('resultCounts')
|
|
42
|
-
expect(result.structuredContent.resultCounts).toHaveProperty('web', 1)
|
|
43
|
-
expect(result.structuredContent.resultCounts).toHaveProperty('news', 0)
|
|
44
|
-
expect(result.structuredContent.resultCounts).toHaveProperty('total', 1)
|
|
45
|
-
// All fields should be in structuredContent.results
|
|
46
|
-
expect(result.structuredContent).toHaveProperty('results')
|
|
47
|
-
expect(result.structuredContent.results?.web).toBeDefined()
|
|
48
|
-
expect(result.structuredContent.results?.web?.length).toBe(1)
|
|
49
|
-
expect(result.structuredContent.results?.web?.[0]).toMatchObject({
|
|
50
|
-
url: 'https://example.com',
|
|
51
|
-
title: 'Test Title',
|
|
52
|
-
page_age: '2023-01-01T00:00:00',
|
|
53
|
-
})
|
|
54
|
-
expect(result.fullResponse).toBe(mockResponse)
|
|
30
|
+
expect(Array.isArray(result)).toBe(true)
|
|
31
|
+
expect(result[0]).toHaveProperty('type', 'text')
|
|
32
|
+
expect(result[0]).toHaveProperty('text')
|
|
33
|
+
expect(result[0]?.text).toContain('WEB RESULTS:')
|
|
34
|
+
expect(result[0]?.text).toContain('Test Title')
|
|
35
|
+
expect(result[0]?.text).toContain('URL: https://example.com')
|
|
36
|
+
expect(result[0]?.text).toContain('Published: 2023-01-01T00:00:00')
|
|
55
37
|
})
|
|
56
38
|
|
|
57
39
|
test('formats news results correctly', () => {
|
|
@@ -76,24 +58,11 @@ describe('formatSearchResults', () => {
|
|
|
76
58
|
|
|
77
59
|
const result = formatSearchResults(mockResponse)
|
|
78
60
|
|
|
79
|
-
expect(result
|
|
80
|
-
expect(result
|
|
81
|
-
expect(result
|
|
82
|
-
|
|
83
|
-
expect(result
|
|
84
|
-
expect(result.structuredContent).toHaveProperty('resultCounts')
|
|
85
|
-
expect(result.structuredContent.resultCounts).toHaveProperty('web', 0)
|
|
86
|
-
expect(result.structuredContent.resultCounts).toHaveProperty('news', 1)
|
|
87
|
-
expect(result.structuredContent.resultCounts).toHaveProperty('total', 1)
|
|
88
|
-
// All fields should be in structuredContent.results
|
|
89
|
-
expect(result.structuredContent).toHaveProperty('results')
|
|
90
|
-
expect(result.structuredContent.results?.news).toBeDefined()
|
|
91
|
-
expect(result.structuredContent.results?.news?.length).toBe(1)
|
|
92
|
-
expect(result.structuredContent.results?.news?.[0]).toMatchObject({
|
|
93
|
-
url: 'https://news.com/article',
|
|
94
|
-
title: 'News Title',
|
|
95
|
-
page_age: '2023-01-01T00:00:00',
|
|
96
|
-
})
|
|
61
|
+
expect(result[0]?.text).toContain('NEWS RESULTS:')
|
|
62
|
+
expect(result[0]?.text).toContain('News Title')
|
|
63
|
+
expect(result[0]?.text).toContain('Published: 2023-01-01T00:00:00')
|
|
64
|
+
expect(result[0]?.text).toContain('URL: https://news.com/article')
|
|
65
|
+
expect(result[0]?.text).toContain('Description: News description')
|
|
97
66
|
})
|
|
98
67
|
|
|
99
68
|
test('formats both web and news results', () => {
|
|
@@ -127,30 +96,97 @@ describe('formatSearchResults', () => {
|
|
|
127
96
|
|
|
128
97
|
const result = formatSearchResults(mockResponse)
|
|
129
98
|
|
|
130
|
-
expect(result
|
|
131
|
-
expect(result
|
|
132
|
-
expect(result
|
|
133
|
-
|
|
134
|
-
expect(result
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
99
|
+
expect(result[0]?.text).toContain('WEB RESULTS:')
|
|
100
|
+
expect(result[0]?.text).toContain('NEWS RESULTS:')
|
|
101
|
+
expect(result[0]?.text).toContain(`=${'='.repeat(49)}`)
|
|
102
|
+
expect(result[0]?.text).toContain('URL: https://web.com')
|
|
103
|
+
expect(result[0]?.text).toContain('URL: https://news.com/article')
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
test('includes page content indicator when livecrawl returns contents', () => {
|
|
107
|
+
const mockResponse: SearchResponse = {
|
|
108
|
+
results: {
|
|
109
|
+
web: [
|
|
110
|
+
{
|
|
111
|
+
url: 'https://example.com',
|
|
112
|
+
title: 'Livecrawl Title',
|
|
113
|
+
description: 'A page with content',
|
|
114
|
+
snippets: ['snippet'],
|
|
115
|
+
page_age: '2023-01-01T00:00:00',
|
|
116
|
+
authors: [],
|
|
117
|
+
contents: {
|
|
118
|
+
markdown: 'Full page content in markdown format.',
|
|
119
|
+
html: '<p>Full page content in HTML format.</p>',
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
],
|
|
123
|
+
news: [],
|
|
124
|
+
},
|
|
125
|
+
metadata: {
|
|
126
|
+
search_uuid: 'test-uuid',
|
|
127
|
+
query: 'livecrawl test',
|
|
128
|
+
latency: 0.5,
|
|
129
|
+
},
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const result = formatSearchResults(mockResponse)
|
|
133
|
+
|
|
134
|
+
expect(result[0]?.text).toContain('Page content available:')
|
|
135
|
+
expect(result[0]?.text).toContain('chars (markdown)')
|
|
136
|
+
expect(result[0]?.text).toContain('chars (html)')
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
test('omits content indicator when livecrawl contents absent', () => {
|
|
140
|
+
const mockResponse: SearchResponse = {
|
|
141
|
+
results: {
|
|
142
|
+
web: [
|
|
143
|
+
{
|
|
144
|
+
url: 'https://example.com',
|
|
145
|
+
title: 'No Content',
|
|
146
|
+
description: 'A page without livecrawl',
|
|
147
|
+
snippets: ['snippet'],
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
news: [],
|
|
151
|
+
},
|
|
152
|
+
metadata: {
|
|
153
|
+
search_uuid: 'test-uuid',
|
|
154
|
+
query: 'test',
|
|
155
|
+
latency: 0.1,
|
|
156
|
+
},
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const result = formatSearchResults(mockResponse)
|
|
160
|
+
|
|
161
|
+
expect(result[0]?.text).not.toContain('Page content available:')
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
test('includes content indicator for news results with livecrawl', () => {
|
|
165
|
+
const mockResponse: SearchResponse = {
|
|
166
|
+
results: {
|
|
167
|
+
web: [],
|
|
168
|
+
news: [
|
|
169
|
+
{
|
|
170
|
+
title: 'News with Content',
|
|
171
|
+
description: 'Breaking news',
|
|
172
|
+
page_age: '2023-01-01T00:00:00',
|
|
173
|
+
url: 'https://news.com/article',
|
|
174
|
+
contents: {
|
|
175
|
+
markdown: 'Full news article content in markdown.',
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
],
|
|
179
|
+
},
|
|
180
|
+
metadata: {
|
|
181
|
+
search_uuid: 'test-uuid',
|
|
182
|
+
query: 'news livecrawl test',
|
|
183
|
+
latency: 0.4,
|
|
184
|
+
},
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const result = formatSearchResults(mockResponse)
|
|
188
|
+
|
|
189
|
+
expect(result[0]?.text).toContain('Page content available:')
|
|
190
|
+
expect(result[0]?.text).toContain('chars (markdown)')
|
|
155
191
|
})
|
|
156
192
|
})
|
|
@@ -9,8 +9,14 @@ type GenericSearchResult = {
|
|
|
9
9
|
snippet?: string
|
|
10
10
|
snippets?: string[]
|
|
11
11
|
page_age?: string
|
|
12
|
+
contents?: { html?: string; markdown?: string }
|
|
12
13
|
}
|
|
13
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Format a character count with locale-aware number formatting
|
|
17
|
+
*/
|
|
18
|
+
const formatCharCount = (count: number): string => count.toLocaleString()
|
|
19
|
+
|
|
14
20
|
/**
|
|
15
21
|
* Format array of search results into display text
|
|
16
22
|
* Used by search result formatting
|
|
@@ -43,6 +49,20 @@ export const formatSearchResultsText = (results: GenericSearchResult[]): string
|
|
|
43
49
|
parts.push(`Snippet: ${result.snippet}`)
|
|
44
50
|
}
|
|
45
51
|
|
|
52
|
+
// Add contents indicator if livecrawl returned page content
|
|
53
|
+
if (result.contents) {
|
|
54
|
+
const formats: string[] = []
|
|
55
|
+
if (result.contents.markdown) {
|
|
56
|
+
formats.push(`${formatCharCount(result.contents.markdown.length)} chars (markdown)`)
|
|
57
|
+
}
|
|
58
|
+
if (result.contents.html) {
|
|
59
|
+
formats.push(`${formatCharCount(result.contents.html.length)} chars (html)`)
|
|
60
|
+
}
|
|
61
|
+
if (formats.length > 0) {
|
|
62
|
+
parts.push(`Page content available: ${formats.join(', ')}`)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
46
66
|
return parts.join('\n')
|
|
47
67
|
})
|
|
48
68
|
.join('\n\n')
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { describe, expect, test } from 'bun:test'
|
|
2
|
+
import { formatSearchResultsText } from '../format-search-results-text.ts'
|
|
3
|
+
|
|
4
|
+
describe('formatSearchResultsText', () => {
|
|
5
|
+
test('formats basic search results with title and URL', () => {
|
|
6
|
+
const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test' }])
|
|
7
|
+
|
|
8
|
+
expect(result).toContain('Title: Test')
|
|
9
|
+
expect(result).toContain('URL: https://example.com')
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
test('includes page_age when present', () => {
|
|
13
|
+
const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', page_age: '2023-01-01' }])
|
|
14
|
+
|
|
15
|
+
expect(result).toContain('Published: 2023-01-01')
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
test('includes description when present', () => {
|
|
19
|
+
const result = formatSearchResultsText([
|
|
20
|
+
{ url: 'https://example.com', title: 'Test', description: 'A description' },
|
|
21
|
+
])
|
|
22
|
+
|
|
23
|
+
expect(result).toContain('Description: A description')
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
test('includes snippets array when present', () => {
|
|
27
|
+
const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', snippets: ['one', 'two'] }])
|
|
28
|
+
|
|
29
|
+
expect(result).toContain('Snippets:')
|
|
30
|
+
expect(result).toContain('- one')
|
|
31
|
+
expect(result).toContain('- two')
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
test('includes single snippet when present', () => {
|
|
35
|
+
const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', snippet: 'a snippet' }])
|
|
36
|
+
|
|
37
|
+
expect(result).toContain('Snippet: a snippet')
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test('formats multiple results with separator', () => {
|
|
41
|
+
const result = formatSearchResultsText([
|
|
42
|
+
{ url: 'https://a.com', title: 'A' },
|
|
43
|
+
{ url: 'https://b.com', title: 'B' },
|
|
44
|
+
])
|
|
45
|
+
|
|
46
|
+
expect(result).toContain('Title: A')
|
|
47
|
+
expect(result).toContain('Title: B')
|
|
48
|
+
expect(result).toContain('\n\n')
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
test('handles empty results array', () => {
|
|
52
|
+
const result = formatSearchResultsText([])
|
|
53
|
+
|
|
54
|
+
expect(result).toBe('')
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
test('includes contents indicator when markdown content is present', () => {
|
|
58
|
+
const result = formatSearchResultsText([
|
|
59
|
+
{
|
|
60
|
+
url: 'https://example.com',
|
|
61
|
+
title: 'Test',
|
|
62
|
+
contents: { markdown: 'A'.repeat(4523) },
|
|
63
|
+
},
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
expect(result).toContain('Page content available:')
|
|
67
|
+
expect(result).toContain('4,523 chars (markdown)')
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
test('includes contents indicator for both markdown and html', () => {
|
|
71
|
+
const result = formatSearchResultsText([
|
|
72
|
+
{
|
|
73
|
+
url: 'https://example.com',
|
|
74
|
+
title: 'Test',
|
|
75
|
+
contents: { markdown: 'markdown content', html: '<p>html content</p>' },
|
|
76
|
+
},
|
|
77
|
+
])
|
|
78
|
+
|
|
79
|
+
expect(result).toContain('Page content available:')
|
|
80
|
+
expect(result).toContain('chars (markdown)')
|
|
81
|
+
expect(result).toContain('chars (html)')
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
test('omits contents indicator when contents object has no content', () => {
|
|
85
|
+
const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', contents: {} }])
|
|
86
|
+
|
|
87
|
+
expect(result).not.toContain('Page content available:')
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
test('omits contents indicator when contents is not present', () => {
|
|
91
|
+
const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test' }])
|
|
92
|
+
|
|
93
|
+
expect(result).not.toContain('Page content available:')
|
|
94
|
+
})
|
|
95
|
+
})
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import * as z from 'zod'
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Structured content schema for MCP response
|
|
5
|
-
* Includes full content and metadata for each URL
|
|
6
|
-
*/
|
|
7
|
-
export const ContentsStructuredContentSchema = z.object({
|
|
8
|
-
count: z.number().describe('URLs processed'),
|
|
9
|
-
formats: z.array(z.string()).describe('Content formats requested'),
|
|
10
|
-
items: z
|
|
11
|
-
.array(
|
|
12
|
-
z.object({
|
|
13
|
-
url: z.string().describe('URL'),
|
|
14
|
-
title: z.string().optional().describe('Title'),
|
|
15
|
-
markdown: z.string().optional().describe('Markdown content'),
|
|
16
|
-
html: z.string().optional().describe('HTML content'),
|
|
17
|
-
metadata: z
|
|
18
|
-
.object({
|
|
19
|
-
favicon_url: z.string().describe('Favicon URL'),
|
|
20
|
-
site_name: z.string().optional().nullable().describe('Site name'),
|
|
21
|
-
})
|
|
22
|
-
.optional()
|
|
23
|
-
.nullable()
|
|
24
|
-
.describe('Page metadata'),
|
|
25
|
-
}),
|
|
26
|
-
)
|
|
27
|
-
.describe('Extracted items'),
|
|
28
|
-
})
|
|
29
|
-
|
|
30
|
-
export type ContentsStructuredContent = z.infer<typeof ContentsStructuredContentSchema>
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import * as z from 'zod'
|
|
2
|
-
|
|
3
|
-
// Minimal schema for structuredContent (reduces payload duplication)
|
|
4
|
-
// Full research content is in the text content field
|
|
5
|
-
export const ResearchStructuredContentSchema = z.object({
|
|
6
|
-
contentType: z.string().describe('Format of the content field'),
|
|
7
|
-
sourceCount: z.number().describe('Number of sources used'),
|
|
8
|
-
sources: z
|
|
9
|
-
.array(
|
|
10
|
-
z.object({
|
|
11
|
-
url: z.string().describe('Source URL'),
|
|
12
|
-
title: z.string().optional().describe('Source title'),
|
|
13
|
-
snippetCount: z.number().describe('Number of excerpts from this source'),
|
|
14
|
-
}),
|
|
15
|
-
)
|
|
16
|
-
.describe('Sources used in the research answer'),
|
|
17
|
-
})
|
|
18
|
-
|
|
19
|
-
export type ResearchStructuredContent = z.infer<typeof ResearchStructuredContentSchema>
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import * as z from 'zod'
|
|
2
|
-
|
|
3
|
-
// Minimal schema for structuredContent (reduces payload duplication)
|
|
4
|
-
// Excludes metadata (query, search_uuid, latency) as these are not actionable by LLM
|
|
5
|
-
export const SearchStructuredContentSchema = z.object({
|
|
6
|
-
resultCounts: z.object({
|
|
7
|
-
web: z.number().describe('Web results'),
|
|
8
|
-
news: z.number().describe('News results'),
|
|
9
|
-
total: z.number().describe('Total results'),
|
|
10
|
-
}),
|
|
11
|
-
results: z
|
|
12
|
-
.object({
|
|
13
|
-
web: z
|
|
14
|
-
.array(
|
|
15
|
-
z.object({
|
|
16
|
-
url: z.string().describe('URL'),
|
|
17
|
-
title: z.string().describe('Title'),
|
|
18
|
-
page_age: z.string().optional().describe('Publication timestamp'),
|
|
19
|
-
}),
|
|
20
|
-
)
|
|
21
|
-
.optional()
|
|
22
|
-
.describe('Web results'),
|
|
23
|
-
news: z
|
|
24
|
-
.array(
|
|
25
|
-
z.object({
|
|
26
|
-
url: z.string().describe('URL'),
|
|
27
|
-
title: z.string().describe('Title'),
|
|
28
|
-
page_age: z.string().describe('Publication timestamp'),
|
|
29
|
-
}),
|
|
30
|
-
)
|
|
31
|
-
.optional()
|
|
32
|
-
.describe('News results'),
|
|
33
|
-
})
|
|
34
|
-
.optional()
|
|
35
|
-
.describe('Search results'),
|
|
36
|
-
})
|
|
37
|
-
|
|
38
|
-
export type SearchStructuredContent = z.infer<typeof SearchStructuredContentSchema>
|