@youdotcom-oss/mcp 3.2.1 → 3.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/stdio.js CHANGED
@@ -12572,7 +12572,7 @@ var ResearchQuerySchema = object({
12572
12572
  var ResearchSourceSchema = object({
12573
12573
  url: string2().describe("Source webpage URL"),
12574
12574
  title: string2().optional().describe("Source webpage title"),
12575
- snippets: array(string2()).describe("Relevant excerpts from the source page used in generating the answer")
12575
+ snippets: array(string2()).optional().describe("Relevant excerpts from the source page used in generating the answer")
12576
12576
  });
12577
12577
  var ResearchOutputSchema = object({
12578
12578
  content: string2().describe("Comprehensive response with inline citations, formatted in Markdown"),
@@ -12634,7 +12634,7 @@ var formatResearchResponse = (response) => {
12634
12634
  `);
12635
12635
  parts.push(`**URL:** ${source.url}
12636
12636
  `);
12637
- if (source.snippets.length > 0) {
12637
+ if (source.snippets?.length) {
12638
12638
  parts.push(`
12639
12639
  **Key Excerpts:**
12640
12640
  `);
@@ -12747,8 +12747,17 @@ var SearchQuerySchema = object({
12747
12747
  ]).optional().describe("Country code"),
12748
12748
  safesearch: _enum(["off", "moderate", "strict"]).optional().describe("Filter level"),
12749
12749
  livecrawl: _enum(["web", "news", "all"]).optional().describe("Live-crawl sections for full content"),
12750
- livecrawl_formats: _enum(["html", "markdown"]).optional().describe("Format for crawled content")
12750
+ livecrawl_formats: array(_enum(["html", "markdown"])).optional().describe("Formats for crawled content"),
12751
+ language: LanguageSchema.optional().describe("Language code (BCP 47 format)"),
12752
+ include_domains: array(string2()).max(500).optional().describe("Domains to include in results (up to 500)"),
12753
+ exclude_domains: array(string2()).max(500).optional().describe("Domains to exclude from results (up to 500)"),
12754
+ crawl_timeout: number2().int().min(1).max(60).optional().describe("Crawl timeout in seconds (1-60)")
12751
12755
  });
12756
+ var validateSearchQuery = (searchQuery) => {
12757
+ if (searchQuery.include_domains && searchQuery.exclude_domains) {
12758
+ throw new Error("Cannot combine include_domains and exclude_domains");
12759
+ }
12760
+ };
12752
12761
  var WebResultSchema = object({
12753
12762
  url: string2().describe("URL"),
12754
12763
  title: string2().describe("Title"),
@@ -12807,23 +12816,18 @@ var fetchSearchResults = async ({
12807
12816
  getUserAgent,
12808
12817
  customHeaders
12809
12818
  }) => {
12810
- const url = new URL(SEARCH_API_URL);
12811
- const searchParams = new URLSearchParams;
12812
- for (const [name, value] of Object.entries(searchQuery)) {
12813
- if (value !== undefined && value !== null) {
12814
- searchParams.append(name, `${value}`);
12815
- }
12816
- }
12817
- url.search = searchParams.toString();
12819
+ validateSearchQuery(searchQuery);
12818
12820
  const options = {
12819
- method: "GET",
12821
+ method: "POST",
12820
12822
  headers: new Headers({
12821
12823
  ...customHeaders,
12822
12824
  "X-API-Key": YDC_API_KEY || "",
12825
+ "Content-Type": "application/json",
12823
12826
  "User-Agent": getUserAgent()
12824
- })
12827
+ }),
12828
+ body: JSON.stringify(searchQuery)
12825
12829
  };
12826
- const response = await fetch(url, options);
12830
+ const response = await fetch(SEARCH_API_URL, options);
12827
12831
  if (!response.ok) {
12828
12832
  const errorCode = response.status;
12829
12833
  if (errorCode === 429) {
@@ -20460,7 +20464,7 @@ var EMPTY_COMPLETION_RESULT = {
20460
20464
  // package.json
20461
20465
  var package_default = {
20462
20466
  name: "@youdotcom-oss/mcp",
20463
- version: "3.2.1",
20467
+ version: "3.2.3",
20464
20468
  description: "You.com MCP server — web search, AI research, and content extraction via You.com APIs",
20465
20469
  license: "MIT",
20466
20470
  engines: {
@@ -20513,7 +20517,7 @@ var package_default = {
20513
20517
  mcpName: "io.github.youdotcom-oss/mcp",
20514
20518
  dependencies: {
20515
20519
  "@modelcontextprotocol/sdk": "^1.28.0",
20516
- "@youdotcom-oss/api": "0.5.1",
20520
+ "@youdotcom-oss/api": "0.5.2",
20517
20521
  zod: "^4.3.6"
20518
20522
  },
20519
20523
  devDependencies: {
@@ -20630,17 +20634,27 @@ var SearchStructuredContentSchema = object({
20630
20634
  web: array(object({
20631
20635
  url: string2().describe("URL"),
20632
20636
  title: string2().describe("Title"),
20633
- page_age: string2().optional().describe("Publication timestamp")
20637
+ page_age: string2().optional().describe("Publication timestamp"),
20638
+ snippets: array(string2()).optional().describe("Content snippets"),
20639
+ contents: object({
20640
+ html: string2().optional().describe("Full HTML content"),
20641
+ markdown: string2().optional().describe("Full Markdown content")
20642
+ }).optional().describe("Livecrawled page content")
20634
20643
  })).optional().describe("Web results"),
20635
20644
  news: array(object({
20636
20645
  url: string2().describe("URL"),
20637
20646
  title: string2().describe("Title"),
20638
- page_age: string2().describe("Publication timestamp")
20647
+ page_age: string2().describe("Publication timestamp"),
20648
+ contents: object({
20649
+ html: string2().optional().describe("Full HTML content"),
20650
+ markdown: string2().optional().describe("Full Markdown content")
20651
+ }).optional().describe("Livecrawled page content")
20639
20652
  })).optional().describe("News results")
20640
20653
  }).optional().describe("Search results")
20641
20654
  });
20642
20655
 
20643
20656
  // src/shared/format-search-results-text.ts
20657
+ var formatCharCount = (count) => count.toLocaleString();
20644
20658
  var formatSearchResultsText = (results) => {
20645
20659
  return results.map((result) => {
20646
20660
  const parts = [`Title: ${result.title}`];
@@ -20658,6 +20672,18 @@ var formatSearchResultsText = (results) => {
20658
20672
  } else if (result.snippet) {
20659
20673
  parts.push(`Snippet: ${result.snippet}`);
20660
20674
  }
20675
+ if (result.contents) {
20676
+ const formats = [];
20677
+ if (result.contents.markdown) {
20678
+ formats.push(`${formatCharCount(result.contents.markdown.length)} chars (markdown)`);
20679
+ }
20680
+ if (result.contents.html) {
20681
+ formats.push(`${formatCharCount(result.contents.html.length)} chars (html)`);
20682
+ }
20683
+ if (formats.length > 0) {
20684
+ parts.push(`Page content available: ${formats.join(", ")}`);
20685
+ }
20686
+ }
20661
20687
  return parts.join(`
20662
20688
  `);
20663
20689
  }).join(`
@@ -20675,14 +20701,7 @@ var formatSearchResults = (response) => {
20675
20701
  ${webResults}`;
20676
20702
  }
20677
20703
  if (response.results.news?.length) {
20678
- const newsResults = response.results.news.map((article) => `Title: ${article.title}
20679
- URL: ${article.url}
20680
- Description: ${article.description}
20681
- Published: ${article.page_age}`).join(`
20682
-
20683
- ---
20684
-
20685
- `);
20704
+ const newsResults = formatSearchResultsText(response.results.news);
20686
20705
  if (formattedResults) {
20687
20706
  formattedResults += `
20688
20707
 
@@ -20703,15 +20722,24 @@ ${newsResults}`;
20703
20722
  };
20704
20723
  if (result.page_age)
20705
20724
  item.page_age = result.page_age;
20725
+ if (result.snippets?.length)
20726
+ item.snippets = result.snippets;
20727
+ if (result.contents)
20728
+ item.contents = result.contents ?? undefined;
20706
20729
  return item;
20707
20730
  });
20708
20731
  }
20709
20732
  if (response.results.news?.length) {
20710
- structuredResults.news = response.results.news.map((article) => ({
20711
- url: article.url,
20712
- title: article.title,
20713
- page_age: article.page_age
20714
- }));
20733
+ structuredResults.news = response.results.news.map((article) => {
20734
+ const item = {
20735
+ url: article.url,
20736
+ title: article.title,
20737
+ page_age: article.page_age
20738
+ };
20739
+ if (article.contents)
20740
+ item.contents = article.contents ?? undefined;
20741
+ return item;
20742
+ });
20715
20743
  }
20716
20744
  return {
20717
20745
  content: [
@@ -20742,7 +20770,7 @@ var registerSearchTool = ({
20742
20770
  }) => {
20743
20771
  mcp.registerTool("you-search", {
20744
20772
  title: "Web Search",
20745
- description: "Web and news search via You.com",
20773
+ description: "Web and news search via You.com. Supports domain filtering, language selection, livecrawl for full page content, and date freshness controls.",
20746
20774
  inputSchema: SearchQuerySchema.shape,
20747
20775
  outputSchema: SearchStructuredContentSchema.shape
20748
20776
  }, async (searchQuery, { sendNotification }) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@youdotcom-oss/mcp",
3
- "version": "3.2.1",
3
+ "version": "3.2.3",
4
4
  "description": "You.com MCP server — web search, AI research, and content extraction via You.com APIs",
5
5
  "license": "MIT",
6
6
  "engines": {
@@ -53,7 +53,7 @@
53
53
  "mcpName": "io.github.youdotcom-oss/mcp",
54
54
  "dependencies": {
55
55
  "@modelcontextprotocol/sdk": "^1.28.0",
56
- "@youdotcom-oss/api": "0.5.1",
56
+ "@youdotcom-oss/api": "0.5.2",
57
57
  "zod": "^4.3.6"
58
58
  },
59
59
  "devDependencies": {
package/server.json CHANGED
@@ -39,7 +39,7 @@
39
39
  "description": "Remote MCP server URL (defaults to https://api.you.com/mcp)",
40
40
  "isRequired": false,
41
41
  "isSecret": false,
42
- "format": "uri"
42
+ "format": "string"
43
43
  }
44
44
  ]
45
45
  }
@@ -96,6 +96,20 @@ describe('formatResearchResults', () => {
96
96
  expect(result.structuredContent.sources[0]?.snippetCount).toBe(0)
97
97
  })
98
98
 
99
+ test('handles source with undefined snippets', () => {
100
+ const mockResponse: ResearchResponse = {
101
+ output: {
102
+ content: 'Answer',
103
+ content_type: 'text',
104
+ sources: [{ url: 'https://example.com/no-snippets', title: 'No Snippets' }],
105
+ },
106
+ }
107
+
108
+ const result = formatResearchResults(mockResponse)
109
+
110
+ expect(result.structuredContent.sources[0]?.snippetCount).toBe(0)
111
+ })
112
+
99
113
  test('handles response with zero sources', () => {
100
114
  const mockResponse: ResearchResponse = {
101
115
  output: {
@@ -17,7 +17,8 @@ export const registerSearchTool = ({
17
17
  'you-search',
18
18
  {
19
19
  title: 'Web Search',
20
- description: 'Web and news search via You.com',
20
+ description:
21
+ 'Web and news search via You.com. Supports domain filtering, language selection, livecrawl for full page content, and date freshness controls.',
21
22
  inputSchema: SearchQuerySchema.shape,
22
23
  outputSchema: SearchStructuredContentSchema.shape,
23
24
  },
@@ -16,6 +16,14 @@ export const SearchStructuredContentSchema = z.object({
16
16
  url: z.string().describe('URL'),
17
17
  title: z.string().describe('Title'),
18
18
  page_age: z.string().optional().describe('Publication timestamp'),
19
+ snippets: z.array(z.string()).optional().describe('Content snippets'),
20
+ contents: z
21
+ .object({
22
+ html: z.string().optional().describe('Full HTML content'),
23
+ markdown: z.string().optional().describe('Full Markdown content'),
24
+ })
25
+ .optional()
26
+ .describe('Livecrawled page content'),
19
27
  }),
20
28
  )
21
29
  .optional()
@@ -26,6 +34,13 @@ export const SearchStructuredContentSchema = z.object({
26
34
  url: z.string().describe('URL'),
27
35
  title: z.string().describe('Title'),
28
36
  page_age: z.string().describe('Publication timestamp'),
37
+ contents: z
38
+ .object({
39
+ html: z.string().optional().describe('Full HTML content'),
40
+ markdown: z.string().optional().describe('Full Markdown content'),
41
+ })
42
+ .optional()
43
+ .describe('Livecrawled page content'),
29
44
  }),
30
45
  )
31
46
  .optional()
@@ -1,4 +1,4 @@
1
- import type { NewsResult, SearchResponse } from '@youdotcom-oss/api'
1
+ import type { SearchResponse } from '@youdotcom-oss/api'
2
2
  import { formatSearchResultsText } from '../shared/format-search-results-text.ts'
3
3
 
4
4
  export const formatSearchResults = (response: SearchResponse) => {
@@ -10,14 +10,9 @@ export const formatSearchResults = (response: SearchResponse) => {
10
10
  formattedResults += `WEB RESULTS:\n\n${webResults}`
11
11
  }
12
12
 
13
- // Format news results
13
+ // Format news results using shared utility (consistent with web formatting)
14
14
  if (response.results.news?.length) {
15
- const newsResults = response.results.news
16
- .map(
17
- (article: NewsResult) =>
18
- `Title: ${article.title}\nURL: ${article.url}\nDescription: ${article.description}\nPublished: ${article.page_age}`,
19
- )
20
- .join('\n\n---\n\n')
15
+ const newsResults = formatSearchResultsText(response.results.news)
21
16
 
22
17
  if (formattedResults) {
23
18
  formattedResults += `\n\n${'='.repeat(50)}\n\n`
@@ -27,27 +22,45 @@ export const formatSearchResults = (response: SearchResponse) => {
27
22
 
28
23
  // Extract fields for structuredContent
29
24
  const structuredResults: {
30
- web?: Array<{ url: string; title: string; page_age?: string }>
31
- news?: Array<{ url: string; title: string; page_age: string }>
25
+ web?: Array<{
26
+ url: string
27
+ title: string
28
+ page_age?: string
29
+ snippets?: string[]
30
+ contents?: { html?: string; markdown?: string }
31
+ }>
32
+ news?: Array<{ url: string; title: string; page_age: string; contents?: { html?: string; markdown?: string } }>
32
33
  } = {}
33
34
 
34
35
  if (response.results.web?.length) {
35
36
  structuredResults.web = response.results.web.map((result) => {
36
- const item: { url: string; title: string; page_age?: string } = {
37
+ const item: {
38
+ url: string
39
+ title: string
40
+ page_age?: string
41
+ snippets?: string[]
42
+ contents?: { html?: string; markdown?: string }
43
+ } = {
37
44
  url: result.url,
38
45
  title: result.title,
39
46
  }
40
47
  if (result.page_age) item.page_age = result.page_age
48
+ if (result.snippets?.length) item.snippets = result.snippets
49
+ if (result.contents) item.contents = result.contents ?? undefined
41
50
  return item
42
51
  })
43
52
  }
44
53
 
45
54
  if (response.results.news?.length) {
46
- structuredResults.news = response.results.news.map((article) => ({
47
- url: article.url,
48
- title: article.title,
49
- page_age: article.page_age,
50
- }))
55
+ structuredResults.news = response.results.news.map((article) => {
56
+ const item: { url: string; title: string; page_age: string; contents?: { html?: string; markdown?: string } } = {
57
+ url: article.url,
58
+ title: article.title,
59
+ page_age: article.page_age,
60
+ }
61
+ if (article.contents) item.contents = article.contents ?? undefined
62
+ return item
63
+ })
51
64
  }
52
65
 
53
66
  return {
@@ -0,0 +1,123 @@
1
+ import { afterEach, beforeEach, describe, expect, spyOn, test } from 'bun:test'
2
+ import { Client } from '@modelcontextprotocol/sdk/client/index.js'
3
+ import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'
4
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
5
+ import type { SearchResponse } from '@youdotcom-oss/api'
6
+ import * as api from '@youdotcom-oss/api'
7
+ import { registerSearchTool } from '../register-search-tool.ts'
8
+
9
+ const emptyResponse: SearchResponse = {
10
+ results: { web: [], news: [] },
11
+ metadata: { search_uuid: 'test', query: 'test', latency: 0 },
12
+ }
13
+
14
+ const oneResultResponse: SearchResponse = {
15
+ results: {
16
+ web: [
17
+ {
18
+ url: 'https://example.com',
19
+ title: 'Example',
20
+ description: 'A test result',
21
+ snippets: ['snippet'],
22
+ page_age: '2025-01-01T00:00:00',
23
+ authors: [],
24
+ },
25
+ ],
26
+ news: [],
27
+ },
28
+ metadata: { search_uuid: 'test', query: 'test', latency: 0.1 },
29
+ }
30
+
31
+ let mockFetchResponse: SearchResponse | Error = emptyResponse
32
+ let fetchSearchResultsSpy: ReturnType<typeof spyOn<typeof api, 'fetchSearchResults'>> | undefined
33
+ let generateErrorReportLinkSpy: ReturnType<typeof spyOn<typeof api, 'generateErrorReportLink'>> | undefined
34
+
35
+ type Cleanup = () => Promise<void>
36
+
37
+ const setupMcpClient = async (): Promise<{ client: Client; cleanup: Cleanup }> => {
38
+ const server = new McpServer({ name: 'test', version: '0.0.0' }, { capabilities: { logging: {}, tools: {} } })
39
+ registerSearchTool({
40
+ mcp: server,
41
+ YDC_API_KEY: 'test-key',
42
+ getUserAgent: () => 'test-agent',
43
+ })
44
+
45
+ const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair()
46
+ await server.connect(serverTransport)
47
+
48
+ const client = new Client({ name: 'test-client', version: '0.0.0' })
49
+ await client.connect(clientTransport)
50
+
51
+ const cleanup = async () => {
52
+ await client.close()
53
+ await server.close()
54
+ }
55
+
56
+ return { client, cleanup }
57
+ }
58
+
59
+ describe('registerSearchTool', () => {
60
+ let cleanup: Cleanup | undefined
61
+
62
+ beforeEach(() => {
63
+ mockFetchResponse = emptyResponse
64
+ fetchSearchResultsSpy = spyOn(api, 'fetchSearchResults').mockImplementation(async () => {
65
+ if (mockFetchResponse instanceof Error) throw mockFetchResponse
66
+ return mockFetchResponse
67
+ })
68
+ generateErrorReportLinkSpy = spyOn(api, 'generateErrorReportLink').mockImplementation(
69
+ () => 'https://example.com/report',
70
+ )
71
+ })
72
+
73
+ afterEach(async () => {
74
+ if (cleanup) {
75
+ await cleanup()
76
+ cleanup = undefined
77
+ }
78
+ fetchSearchResultsSpy?.mockRestore()
79
+ fetchSearchResultsSpy = undefined
80
+ generateErrorReportLinkSpy?.mockRestore()
81
+ generateErrorReportLinkSpy = undefined
82
+ })
83
+
84
+ test('handles empty search results gracefully', async () => {
85
+ const result = await setupMcpClient()
86
+ cleanup = result.cleanup
87
+
88
+ const toolResult = await result.client.callTool({ name: 'you-search', arguments: { query: 'nonexistent' } })
89
+
90
+ expect(toolResult.content).toEqual([{ type: 'text', text: 'No results found.' }])
91
+ expect(toolResult.structuredContent).toEqual({
92
+ resultCounts: { web: 0, news: 0, total: 0 },
93
+ })
94
+ })
95
+
96
+ test('returns formatted results for successful search', async () => {
97
+ mockFetchResponse = oneResultResponse
98
+ const result = await setupMcpClient()
99
+ cleanup = result.cleanup
100
+
101
+ const toolResult = await result.client.callTool({ name: 'you-search', arguments: { query: 'example' } })
102
+
103
+ const text = (toolResult.content as Array<{ type: string; text: string }>)[0]?.text
104
+ expect(text).toContain('Example')
105
+ expect(text).toContain('https://example.com')
106
+
107
+ const structured = toolResult.structuredContent as Record<string, unknown>
108
+ expect(structured).toHaveProperty('resultCounts')
109
+ expect((structured as { resultCounts: { total: number } }).resultCounts.total).toBe(1)
110
+ })
111
+
112
+ test('returns error when API call fails', async () => {
113
+ mockFetchResponse = new Error('API rate limit exceeded')
114
+ const result = await setupMcpClient()
115
+ cleanup = result.cleanup
116
+
117
+ const toolResult = await result.client.callTool({ name: 'you-search', arguments: { query: 'test' } })
118
+
119
+ expect(toolResult.isError).toBe(true)
120
+ const text = (toolResult.content as Array<{ type: string; text: string }>)[0]?.text
121
+ expect(text).toContain('API rate limit exceeded')
122
+ })
123
+ })
@@ -50,6 +50,7 @@ describe('formatSearchResults', () => {
50
50
  url: 'https://example.com',
51
51
  title: 'Test Title',
52
52
  page_age: '2023-01-01T00:00:00',
53
+ snippets: ['snippet 1', 'snippet 2'],
53
54
  })
54
55
  expect(result.fullResponse).toBe(mockResponse)
55
56
  })
@@ -79,8 +80,9 @@ describe('formatSearchResults', () => {
79
80
  expect(result.content[0]?.text).toContain('NEWS RESULTS:')
80
81
  expect(result.content[0]?.text).toContain('News Title')
81
82
  expect(result.content[0]?.text).toContain('Published: 2023-01-01T00:00:00')
82
- // URL should be in text content
83
+ // URL and Description should be in text content (routed through formatSearchResultsText)
83
84
  expect(result.content[0]?.text).toContain('URL: https://news.com/article')
85
+ expect(result.content[0]?.text).toContain('Description: News description')
84
86
  expect(result.structuredContent).toHaveProperty('resultCounts')
85
87
  expect(result.structuredContent.resultCounts).toHaveProperty('web', 0)
86
88
  expect(result.structuredContent.resultCounts).toHaveProperty('news', 1)
@@ -146,6 +148,7 @@ describe('formatSearchResults', () => {
146
148
  url: 'https://web.com',
147
149
  title: 'Web Title',
148
150
  page_age: '2023-01-01T00:00:00',
151
+ snippets: ['web snippet'],
149
152
  })
150
153
  expect(result.structuredContent.results?.news?.[0]).toMatchObject({
151
154
  url: 'https://news.com/article',
@@ -153,4 +156,136 @@ describe('formatSearchResults', () => {
153
156
  page_age: '2023-01-01T00:00:00',
154
157
  })
155
158
  })
159
+
160
+ test('includes contents in structuredContent and text indicator when livecrawl returns page content', () => {
161
+ const mockResponse: SearchResponse = {
162
+ results: {
163
+ web: [
164
+ {
165
+ url: 'https://example.com',
166
+ title: 'Livecrawl Title',
167
+ description: 'A page with content',
168
+ snippets: ['snippet'],
169
+ page_age: '2023-01-01T00:00:00',
170
+ authors: [],
171
+ contents: {
172
+ markdown: 'Full page content in markdown format.',
173
+ html: '<p>Full page content in HTML format.</p>',
174
+ },
175
+ },
176
+ ],
177
+ news: [],
178
+ },
179
+ metadata: {
180
+ search_uuid: 'test-uuid',
181
+ query: 'livecrawl test',
182
+ latency: 0.5,
183
+ },
184
+ }
185
+
186
+ const result = formatSearchResults(mockResponse)
187
+
188
+ // Text content should include the contents indicator
189
+ expect(result.content[0]?.text).toContain('Page content available:')
190
+ expect(result.content[0]?.text).toContain('chars (markdown)')
191
+ expect(result.content[0]?.text).toContain('chars (html)')
192
+
193
+ // structuredContent should include contents
194
+ expect(result.structuredContent.results?.web?.[0]).toMatchObject({
195
+ url: 'https://example.com',
196
+ title: 'Livecrawl Title',
197
+ contents: {
198
+ markdown: 'Full page content in markdown format.',
199
+ html: '<p>Full page content in HTML format.</p>',
200
+ },
201
+ })
202
+ })
203
+
204
+ test('omits contents when not present in response', () => {
205
+ const mockResponse: SearchResponse = {
206
+ results: {
207
+ web: [
208
+ {
209
+ url: 'https://example.com',
210
+ title: 'No Content',
211
+ description: 'A page without livecrawl',
212
+ snippets: ['snippet'],
213
+ },
214
+ ],
215
+ news: [],
216
+ },
217
+ metadata: {
218
+ search_uuid: 'test-uuid',
219
+ query: 'test',
220
+ latency: 0.1,
221
+ },
222
+ }
223
+
224
+ const result = formatSearchResults(mockResponse)
225
+
226
+ expect(result.content[0]?.text).not.toContain('Page content available:')
227
+ expect(result.structuredContent.results?.web?.[0]?.contents).toBeUndefined()
228
+ })
229
+
230
+ test('includes contents indicator for news results with livecrawl', () => {
231
+ const mockResponse: SearchResponse = {
232
+ results: {
233
+ web: [],
234
+ news: [
235
+ {
236
+ title: 'News with Content',
237
+ description: 'Breaking news',
238
+ page_age: '2023-01-01T00:00:00',
239
+ url: 'https://news.com/article',
240
+ contents: {
241
+ markdown: 'Full news article content in markdown.',
242
+ },
243
+ },
244
+ ],
245
+ },
246
+ metadata: {
247
+ search_uuid: 'test-uuid',
248
+ query: 'news livecrawl test',
249
+ latency: 0.4,
250
+ },
251
+ }
252
+
253
+ const result = formatSearchResults(mockResponse)
254
+
255
+ // Text content should include the contents indicator for news too
256
+ expect(result.content[0]?.text).toContain('Page content available:')
257
+ expect(result.content[0]?.text).toContain('chars (markdown)')
258
+
259
+ // structuredContent should include contents for news
260
+ expect(result.structuredContent.results?.news?.[0]).toMatchObject({
261
+ url: 'https://news.com/article',
262
+ title: 'News with Content',
263
+ contents: { markdown: 'Full news article content in markdown.' },
264
+ })
265
+ })
266
+
267
+ test('includes snippets in structuredContent for web results', () => {
268
+ const mockResponse: SearchResponse = {
269
+ results: {
270
+ web: [
271
+ {
272
+ url: 'https://example.com',
273
+ title: 'With Snippets',
274
+ description: 'Has snippets',
275
+ snippets: ['first snippet', 'second snippet'],
276
+ },
277
+ ],
278
+ news: [],
279
+ },
280
+ metadata: {
281
+ search_uuid: 'test-uuid',
282
+ query: 'test',
283
+ latency: 0.1,
284
+ },
285
+ }
286
+
287
+ const result = formatSearchResults(mockResponse)
288
+
289
+ expect(result.structuredContent.results?.web?.[0]?.snippets).toEqual(['first snippet', 'second snippet'])
290
+ })
156
291
  })
@@ -9,8 +9,14 @@ type GenericSearchResult = {
9
9
  snippet?: string
10
10
  snippets?: string[]
11
11
  page_age?: string
12
+ contents?: { html?: string; markdown?: string }
12
13
  }
13
14
 
15
+ /**
16
+ * Format a character count with locale-aware number formatting
17
+ */
18
+ const formatCharCount = (count: number): string => count.toLocaleString()
19
+
14
20
  /**
15
21
  * Format array of search results into display text
16
22
  * Used by search result formatting
@@ -43,6 +49,20 @@ export const formatSearchResultsText = (results: GenericSearchResult[]): string
43
49
  parts.push(`Snippet: ${result.snippet}`)
44
50
  }
45
51
 
52
+ // Add contents indicator if livecrawl returned page content
53
+ if (result.contents) {
54
+ const formats: string[] = []
55
+ if (result.contents.markdown) {
56
+ formats.push(`${formatCharCount(result.contents.markdown.length)} chars (markdown)`)
57
+ }
58
+ if (result.contents.html) {
59
+ formats.push(`${formatCharCount(result.contents.html.length)} chars (html)`)
60
+ }
61
+ if (formats.length > 0) {
62
+ parts.push(`Page content available: ${formats.join(', ')}`)
63
+ }
64
+ }
65
+
46
66
  return parts.join('\n')
47
67
  })
48
68
  .join('\n\n')
@@ -0,0 +1,95 @@
1
+ import { describe, expect, test } from 'bun:test'
2
+ import { formatSearchResultsText } from '../format-search-results-text.ts'
3
+
4
+ describe('formatSearchResultsText', () => {
5
+ test('formats basic search results with title and URL', () => {
6
+ const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test' }])
7
+
8
+ expect(result).toContain('Title: Test')
9
+ expect(result).toContain('URL: https://example.com')
10
+ })
11
+
12
+ test('includes page_age when present', () => {
13
+ const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', page_age: '2023-01-01' }])
14
+
15
+ expect(result).toContain('Published: 2023-01-01')
16
+ })
17
+
18
+ test('includes description when present', () => {
19
+ const result = formatSearchResultsText([
20
+ { url: 'https://example.com', title: 'Test', description: 'A description' },
21
+ ])
22
+
23
+ expect(result).toContain('Description: A description')
24
+ })
25
+
26
+ test('includes snippets array when present', () => {
27
+ const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', snippets: ['one', 'two'] }])
28
+
29
+ expect(result).toContain('Snippets:')
30
+ expect(result).toContain('- one')
31
+ expect(result).toContain('- two')
32
+ })
33
+
34
+ test('includes single snippet when present', () => {
35
+ const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', snippet: 'a snippet' }])
36
+
37
+ expect(result).toContain('Snippet: a snippet')
38
+ })
39
+
40
+ test('formats multiple results with separator', () => {
41
+ const result = formatSearchResultsText([
42
+ { url: 'https://a.com', title: 'A' },
43
+ { url: 'https://b.com', title: 'B' },
44
+ ])
45
+
46
+ expect(result).toContain('Title: A')
47
+ expect(result).toContain('Title: B')
48
+ expect(result).toContain('\n\n')
49
+ })
50
+
51
+ test('handles empty results array', () => {
52
+ const result = formatSearchResultsText([])
53
+
54
+ expect(result).toBe('')
55
+ })
56
+
57
+ test('includes contents indicator when markdown content is present', () => {
58
+ const result = formatSearchResultsText([
59
+ {
60
+ url: 'https://example.com',
61
+ title: 'Test',
62
+ contents: { markdown: 'A'.repeat(4523) },
63
+ },
64
+ ])
65
+
66
+ expect(result).toContain('Page content available:')
67
+ expect(result).toContain('4,523 chars (markdown)')
68
+ })
69
+
70
+ test('includes contents indicator for both markdown and html', () => {
71
+ const result = formatSearchResultsText([
72
+ {
73
+ url: 'https://example.com',
74
+ title: 'Test',
75
+ contents: { markdown: 'markdown content', html: '<p>html content</p>' },
76
+ },
77
+ ])
78
+
79
+ expect(result).toContain('Page content available:')
80
+ expect(result).toContain('chars (markdown)')
81
+ expect(result).toContain('chars (html)')
82
+ })
83
+
84
+ test('omits contents indicator when contents object has no content', () => {
85
+ const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test', contents: {} }])
86
+
87
+ expect(result).not.toContain('Page content available:')
88
+ })
89
+
90
+ test('omits contents indicator when contents is not present', () => {
91
+ const result = formatSearchResultsText([{ url: 'https://example.com', title: 'Test' }])
92
+
93
+ expect(result).not.toContain('Page content available:')
94
+ })
95
+ })