@librechat/agents 2.4.31 → 2.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/events.cjs +3 -3
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +2 -1
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +5 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs +23 -0
- package/dist/cjs/messages/ids.cjs.map +1 -0
- package/dist/cjs/splitStream.cjs +2 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +87 -154
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +14 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +144 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -0
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +23 -41
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +161 -74
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -12
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +43 -36
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +150 -69
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +247 -58
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +66 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/events.mjs +1 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +2 -1
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs +21 -0
- package/dist/esm/messages/ids.mjs.map +1 -0
- package/dist/esm/splitStream.mjs +2 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +87 -152
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +14 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +141 -0
- package/dist/esm/tools/handlers.mjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +24 -41
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +161 -74
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -12
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +43 -36
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +150 -69
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +246 -57
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +61 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/graphs/Graph.d.ts +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/ids.d.ts +3 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/stream.d.ts +0 -8
- package/dist/types/tools/ToolNode.d.ts +6 -0
- package/dist/types/tools/example.d.ts +23 -3
- package/dist/types/tools/handlers.d.ts +8 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +7 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/rerankers.d.ts +8 -4
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/search.d.ts +2 -2
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +25 -4
- package/dist/types/tools/search/types.d.ts +443 -53
- package/dist/types/tools/search/utils.d.ts +10 -0
- package/package.json +9 -7
- package/src/events.ts +49 -15
- package/src/graphs/Graph.ts +6 -2
- package/src/index.ts +1 -0
- package/src/messages/ids.ts +26 -0
- package/src/messages/index.ts +1 -0
- package/src/scripts/search.ts +8 -3
- package/src/splitStream.test.ts +132 -71
- package/src/splitStream.ts +2 -1
- package/src/stream.ts +94 -183
- package/src/tools/ToolNode.ts +37 -14
- package/src/tools/handlers.ts +167 -0
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +36 -148
- package/src/tools/search/format.ts +205 -74
- package/src/tools/search/highlights.ts +99 -16
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/rerankers.ts +57 -36
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +230 -117
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +363 -87
- package/src/tools/search/types.ts +503 -61
- package/src/tools/search/utils.ts +79 -0
- package/src/utils/llmConfig.ts +1 -1
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/* eslint-disable no-console */
|
|
2
|
+
// processWikipedia.ts
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import { processContent } from './content';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Process a Wikipedia article (HTML and Markdown) and create a referenced version
|
|
9
|
+
*/
|
|
10
|
+
async function processWikipediaArticle(): Promise<void> {
|
|
11
|
+
try {
|
|
12
|
+
console.log('Starting Wikipedia article processing...');
|
|
13
|
+
|
|
14
|
+
// Define file paths - adapt these to your specific file locations
|
|
15
|
+
const htmlPath = path.resolve('./test.html');
|
|
16
|
+
const markdownPath = path.resolve('./test.md');
|
|
17
|
+
const outputPath = path.resolve('./output.md');
|
|
18
|
+
|
|
19
|
+
// Check if input files exist
|
|
20
|
+
if (!fs.existsSync(htmlPath)) {
|
|
21
|
+
throw new Error(`Wikipedia HTML file not found at ${htmlPath}`);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (!fs.existsSync(markdownPath)) {
|
|
25
|
+
throw new Error(`Wikipedia Markdown file not found at ${markdownPath}`);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
console.log('Reading Wikipedia article files...');
|
|
29
|
+
const html = fs.readFileSync(htmlPath, 'utf-8');
|
|
30
|
+
const markdown = fs.readFileSync(markdownPath, 'utf-8');
|
|
31
|
+
|
|
32
|
+
// Extract article title for logging
|
|
33
|
+
const titleMatch = /<h1[^>]*>([^<]+)<\/h1>/i.exec(html);
|
|
34
|
+
const articleTitle = titleMatch
|
|
35
|
+
? titleMatch[1].trim()
|
|
36
|
+
: 'Wikipedia article';
|
|
37
|
+
|
|
38
|
+
console.log(`Processing "${articleTitle}"...`);
|
|
39
|
+
|
|
40
|
+
// Measure processing time
|
|
41
|
+
const startTime = process.hrtime();
|
|
42
|
+
|
|
43
|
+
// Process content
|
|
44
|
+
const result = processContent(html, markdown);
|
|
45
|
+
|
|
46
|
+
// Calculate processing time
|
|
47
|
+
const elapsed = process.hrtime(startTime);
|
|
48
|
+
const timeInMs = elapsed[0] * 1000 + elapsed[1] / 1000000;
|
|
49
|
+
|
|
50
|
+
// Generate reference appendix
|
|
51
|
+
const appendix = generateReferenceAppendix(result);
|
|
52
|
+
|
|
53
|
+
// Create complete output with the processed content and appendix
|
|
54
|
+
const completeOutput = result.markdown + appendix;
|
|
55
|
+
|
|
56
|
+
// Write to output file
|
|
57
|
+
fs.writeFileSync(outputPath, completeOutput);
|
|
58
|
+
|
|
59
|
+
// Print processing statistics
|
|
60
|
+
console.log('\nWikipedia article processing complete! ✓');
|
|
61
|
+
console.log('-'.repeat(60));
|
|
62
|
+
console.log(`Article: ${articleTitle}`);
|
|
63
|
+
console.log(`Processing time: ${timeInMs.toFixed(2)}ms`);
|
|
64
|
+
console.log('Media references replaced:');
|
|
65
|
+
console.log(` - Links: ${result.links.length}`);
|
|
66
|
+
console.log(` - Images: ${result.images.length}`);
|
|
67
|
+
console.log(` - Videos: ${result.videos.length}`);
|
|
68
|
+
console.log(
|
|
69
|
+
` - Total: ${result.links.length + result.images.length + result.videos.length}`
|
|
70
|
+
);
|
|
71
|
+
console.log(`Output saved to: ${outputPath}`);
|
|
72
|
+
console.log('-'.repeat(60));
|
|
73
|
+
|
|
74
|
+
// Print sample of the transformation
|
|
75
|
+
const sampleLines = result.markdown.split('\n').slice(0, 10).join('\n');
|
|
76
|
+
console.log('\nSample of transformed content:');
|
|
77
|
+
console.log('-'.repeat(30));
|
|
78
|
+
console.log(sampleLines);
|
|
79
|
+
console.log('-'.repeat(30));
|
|
80
|
+
console.log('... (continued in output file)');
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error('Error processing Wikipedia article:', error);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Generate a comprehensive reference appendix with all media links
|
|
89
|
+
*/
|
|
90
|
+
function generateReferenceAppendix(result: {
|
|
91
|
+
links: Array<{ originalUrl: string; title?: string; text?: string }>;
|
|
92
|
+
images: Array<{ originalUrl: string; title?: string }>;
|
|
93
|
+
videos: Array<{ originalUrl: string; title?: string }>;
|
|
94
|
+
}): string {
|
|
95
|
+
let appendix = '\n\n' + '---'.repeat(10) + '\n\n';
|
|
96
|
+
appendix += '# References\n\n';
|
|
97
|
+
|
|
98
|
+
if (result.links.length > 0) {
|
|
99
|
+
appendix += '## Links\n\n';
|
|
100
|
+
result.links.forEach((link, index) => {
|
|
101
|
+
// Clean and format text for display
|
|
102
|
+
let displayText = '';
|
|
103
|
+
if (link.text != null && link.text.trim()) {
|
|
104
|
+
// Limit length for very long link text
|
|
105
|
+
let cleanText = link.text.trim();
|
|
106
|
+
if (cleanText.length > 50) {
|
|
107
|
+
cleanText = cleanText.substring(0, 47) + '...';
|
|
108
|
+
}
|
|
109
|
+
displayText = ` - "${cleanText}"`;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
appendix += `**link#${index + 1}**: ${link.originalUrl}${displayText}\n\n`;
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (result.images.length > 0) {
|
|
117
|
+
appendix += '## Images\n\n';
|
|
118
|
+
result.images.forEach((image, index) => {
|
|
119
|
+
const displayTitle =
|
|
120
|
+
image.title != null && image.title.trim()
|
|
121
|
+
? ` - ${image.title.trim()}`
|
|
122
|
+
: '';
|
|
123
|
+
appendix += `**image#${index + 1}**: ${image.originalUrl}${displayTitle}\n\n`;
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (result.videos.length > 0) {
|
|
128
|
+
appendix += '## Videos\n\n';
|
|
129
|
+
result.videos.forEach((video, index) => {
|
|
130
|
+
const displayTitle =
|
|
131
|
+
video.title != null && video.title.trim()
|
|
132
|
+
? ` - ${video.title.trim()}`
|
|
133
|
+
: '';
|
|
134
|
+
appendix += `**video#${index + 1}**: ${video.originalUrl}${displayTitle}\n\n`;
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Add a category breakdown to show what types of links were found
|
|
139
|
+
const totalRefs =
|
|
140
|
+
result.links.length + result.images.length + result.videos.length;
|
|
141
|
+
|
|
142
|
+
appendix += '## Summary\n\n';
|
|
143
|
+
appendix += `Total references: **${totalRefs}**\n\n`;
|
|
144
|
+
appendix += `- Links: ${result.links.length}\n`;
|
|
145
|
+
appendix += `- Images: ${result.images.length}\n`;
|
|
146
|
+
appendix += `- Videos: ${result.videos.length}\n`;
|
|
147
|
+
|
|
148
|
+
return appendix;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Using async IIFE to allow for better error handling
|
|
152
|
+
(async (): Promise<void> => {
|
|
153
|
+
try {
|
|
154
|
+
await processWikipediaArticle();
|
|
155
|
+
} catch (error) {
|
|
156
|
+
console.error('Unhandled error:', error);
|
|
157
|
+
process.exit(1);
|
|
158
|
+
}
|
|
159
|
+
})();
|
package/src/tools/search/tool.ts
CHANGED
|
@@ -1,25 +1,339 @@
|
|
|
1
|
-
/* eslint-disable no-console */
|
|
2
1
|
import { z } from 'zod';
|
|
3
2
|
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
3
|
+
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
4
4
|
import type * as t from './types';
|
|
5
|
+
import {
|
|
6
|
+
DATE_RANGE,
|
|
7
|
+
querySchema,
|
|
8
|
+
dateSchema,
|
|
9
|
+
countrySchema,
|
|
10
|
+
imagesSchema,
|
|
11
|
+
videosSchema,
|
|
12
|
+
newsSchema,
|
|
13
|
+
} from './schema';
|
|
5
14
|
import { createSearchAPI, createSourceProcessor } from './search';
|
|
6
15
|
import { createFirecrawlScraper } from './firecrawl';
|
|
7
16
|
import { expandHighlights } from './highlights';
|
|
8
17
|
import { formatResultsForLLM } from './format';
|
|
18
|
+
import { createDefaultLogger } from './utils';
|
|
9
19
|
import { createReranker } from './rerankers';
|
|
10
20
|
import { Constants } from '@/common';
|
|
11
21
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Executes parallel searches and merges the results
|
|
24
|
+
*/
|
|
25
|
+
async function executeParallelSearches({
|
|
26
|
+
searchAPI,
|
|
27
|
+
query,
|
|
28
|
+
date,
|
|
29
|
+
country,
|
|
30
|
+
safeSearch,
|
|
31
|
+
images,
|
|
32
|
+
videos,
|
|
33
|
+
news,
|
|
34
|
+
logger,
|
|
35
|
+
}: {
|
|
36
|
+
searchAPI: ReturnType<typeof createSearchAPI>;
|
|
37
|
+
query: string;
|
|
38
|
+
date?: DATE_RANGE;
|
|
39
|
+
country?: string;
|
|
40
|
+
safeSearch: t.SearchToolConfig['safeSearch'];
|
|
41
|
+
images: boolean;
|
|
42
|
+
videos: boolean;
|
|
43
|
+
news: boolean;
|
|
44
|
+
logger: t.Logger;
|
|
45
|
+
}): Promise<t.SearchResult> {
|
|
46
|
+
// Prepare all search tasks to run in parallel
|
|
47
|
+
const searchTasks: Promise<t.SearchResult>[] = [
|
|
48
|
+
// Main search
|
|
49
|
+
searchAPI.getSources({
|
|
50
|
+
query,
|
|
51
|
+
date,
|
|
52
|
+
country,
|
|
53
|
+
safeSearch,
|
|
54
|
+
}),
|
|
55
|
+
];
|
|
19
56
|
|
|
57
|
+
if (images) {
|
|
58
|
+
searchTasks.push(
|
|
59
|
+
searchAPI
|
|
60
|
+
.getSources({
|
|
61
|
+
query,
|
|
62
|
+
date,
|
|
63
|
+
country,
|
|
64
|
+
safeSearch,
|
|
65
|
+
type: 'images',
|
|
66
|
+
})
|
|
67
|
+
.catch((error) => {
|
|
68
|
+
logger.error('Error fetching images:', error);
|
|
69
|
+
return {
|
|
70
|
+
success: false,
|
|
71
|
+
error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
72
|
+
};
|
|
73
|
+
})
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
if (videos) {
|
|
77
|
+
searchTasks.push(
|
|
78
|
+
searchAPI
|
|
79
|
+
.getSources({
|
|
80
|
+
query,
|
|
81
|
+
date,
|
|
82
|
+
country,
|
|
83
|
+
safeSearch,
|
|
84
|
+
type: 'videos',
|
|
85
|
+
})
|
|
86
|
+
.catch((error) => {
|
|
87
|
+
logger.error('Error fetching videos:', error);
|
|
88
|
+
return {
|
|
89
|
+
success: false,
|
|
90
|
+
error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
91
|
+
};
|
|
92
|
+
})
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
if (news) {
|
|
96
|
+
searchTasks.push(
|
|
97
|
+
searchAPI
|
|
98
|
+
.getSources({
|
|
99
|
+
query,
|
|
100
|
+
date,
|
|
101
|
+
country,
|
|
102
|
+
safeSearch,
|
|
103
|
+
type: 'news',
|
|
104
|
+
})
|
|
105
|
+
.catch((error) => {
|
|
106
|
+
logger.error('Error fetching news:', error);
|
|
107
|
+
return {
|
|
108
|
+
success: false,
|
|
109
|
+
error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
110
|
+
};
|
|
111
|
+
})
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Run all searches in parallel
|
|
116
|
+
const results = await Promise.all(searchTasks);
|
|
117
|
+
|
|
118
|
+
// Get the main search result (first result)
|
|
119
|
+
const mainResult = results[0];
|
|
120
|
+
if (!mainResult.success) {
|
|
121
|
+
throw new Error(mainResult.error ?? 'Search failed');
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Merge additional results with the main results
|
|
125
|
+
const mergedResults = { ...mainResult.data };
|
|
126
|
+
|
|
127
|
+
// Convert existing news to topStories if present
|
|
128
|
+
if (mergedResults.news !== undefined && mergedResults.news.length > 0) {
|
|
129
|
+
const existingNewsAsTopStories = mergedResults.news
|
|
130
|
+
.filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')
|
|
131
|
+
.map((newsItem) => ({
|
|
132
|
+
title: newsItem.title ?? '',
|
|
133
|
+
link: newsItem.link ?? '',
|
|
134
|
+
source: newsItem.source ?? '',
|
|
135
|
+
date: newsItem.date ?? '',
|
|
136
|
+
imageUrl: newsItem.imageUrl ?? '',
|
|
137
|
+
processed: false,
|
|
138
|
+
}));
|
|
139
|
+
mergedResults.topStories = [
|
|
140
|
+
...(mergedResults.topStories ?? []),
|
|
141
|
+
...existingNewsAsTopStories,
|
|
142
|
+
];
|
|
143
|
+
delete mergedResults.news;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
results.slice(1).forEach((result) => {
|
|
147
|
+
if (result.success && result.data !== undefined) {
|
|
148
|
+
if (result.data.images !== undefined && result.data.images.length > 0) {
|
|
149
|
+
mergedResults.images = [
|
|
150
|
+
...(mergedResults.images ?? []),
|
|
151
|
+
...result.data.images,
|
|
152
|
+
];
|
|
153
|
+
}
|
|
154
|
+
if (result.data.videos !== undefined && result.data.videos.length > 0) {
|
|
155
|
+
mergedResults.videos = [
|
|
156
|
+
...(mergedResults.videos ?? []),
|
|
157
|
+
...result.data.videos,
|
|
158
|
+
];
|
|
159
|
+
}
|
|
160
|
+
if (result.data.news !== undefined && result.data.news.length > 0) {
|
|
161
|
+
const newsAsTopStories = result.data.news.map((newsItem) => ({
|
|
162
|
+
...newsItem,
|
|
163
|
+
link: newsItem.link ?? '',
|
|
164
|
+
}));
|
|
165
|
+
mergedResults.topStories = [
|
|
166
|
+
...(mergedResults.topStories ?? []),
|
|
167
|
+
...newsAsTopStories,
|
|
168
|
+
];
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
return { success: true, data: mergedResults };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function createSearchProcessor({
|
|
177
|
+
searchAPI,
|
|
178
|
+
safeSearch,
|
|
179
|
+
sourceProcessor,
|
|
180
|
+
onGetHighlights,
|
|
181
|
+
logger,
|
|
182
|
+
}: {
|
|
183
|
+
safeSearch: t.SearchToolConfig['safeSearch'];
|
|
184
|
+
searchAPI: ReturnType<typeof createSearchAPI>;
|
|
185
|
+
sourceProcessor: ReturnType<typeof createSourceProcessor>;
|
|
186
|
+
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
187
|
+
logger: t.Logger;
|
|
188
|
+
}) {
|
|
189
|
+
return async function ({
|
|
190
|
+
query,
|
|
191
|
+
date,
|
|
192
|
+
country,
|
|
193
|
+
proMode = true,
|
|
194
|
+
maxSources = 5,
|
|
195
|
+
onSearchResults,
|
|
196
|
+
images = false,
|
|
197
|
+
videos = false,
|
|
198
|
+
news = false,
|
|
199
|
+
}: {
|
|
200
|
+
query: string;
|
|
201
|
+
country?: string;
|
|
202
|
+
date?: DATE_RANGE;
|
|
203
|
+
proMode?: boolean;
|
|
204
|
+
maxSources?: number;
|
|
205
|
+
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
206
|
+
images?: boolean;
|
|
207
|
+
videos?: boolean;
|
|
208
|
+
news?: boolean;
|
|
209
|
+
}): Promise<t.SearchResultData> {
|
|
210
|
+
try {
|
|
211
|
+
// Execute parallel searches and merge results
|
|
212
|
+
const searchResult = await executeParallelSearches({
|
|
213
|
+
searchAPI,
|
|
214
|
+
query,
|
|
215
|
+
date,
|
|
216
|
+
country,
|
|
217
|
+
safeSearch,
|
|
218
|
+
images,
|
|
219
|
+
videos,
|
|
220
|
+
news,
|
|
221
|
+
logger,
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
onSearchResults?.(searchResult);
|
|
225
|
+
|
|
226
|
+
const processedSources = await sourceProcessor.processSources({
|
|
227
|
+
query,
|
|
228
|
+
news,
|
|
229
|
+
result: searchResult,
|
|
230
|
+
proMode,
|
|
231
|
+
onGetHighlights,
|
|
232
|
+
numElements: maxSources,
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
return expandHighlights(processedSources);
|
|
236
|
+
} catch (error) {
|
|
237
|
+
logger.error('Error in search:', error);
|
|
238
|
+
return {
|
|
239
|
+
organic: [],
|
|
240
|
+
topStories: [],
|
|
241
|
+
images: [],
|
|
242
|
+
videos: [],
|
|
243
|
+
news: [],
|
|
244
|
+
relatedSearches: [],
|
|
245
|
+
error: error instanceof Error ? error.message : String(error),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function createOnSearchResults({
|
|
252
|
+
runnableConfig,
|
|
253
|
+
onSearchResults,
|
|
254
|
+
}: {
|
|
255
|
+
runnableConfig: RunnableConfig;
|
|
256
|
+
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
257
|
+
}) {
|
|
258
|
+
return function (results: t.SearchResult): void {
|
|
259
|
+
if (!onSearchResults) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
onSearchResults(results, runnableConfig);
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function createTool({
|
|
267
|
+
schema,
|
|
268
|
+
search,
|
|
269
|
+
onSearchResults: _onSearchResults,
|
|
270
|
+
}: {
|
|
271
|
+
schema: t.SearchToolSchema;
|
|
272
|
+
search: ReturnType<typeof createSearchProcessor>;
|
|
273
|
+
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
274
|
+
}): DynamicStructuredTool<typeof schema> {
|
|
275
|
+
return tool<typeof schema>(
|
|
276
|
+
async (params, runnableConfig) => {
|
|
277
|
+
const { query, date, country: _c, images, videos, news } = params;
|
|
278
|
+
const country = typeof _c === 'string' && _c ? _c : undefined;
|
|
279
|
+
const searchResult = await search({
|
|
280
|
+
query,
|
|
281
|
+
date,
|
|
282
|
+
country,
|
|
283
|
+
images,
|
|
284
|
+
videos,
|
|
285
|
+
news,
|
|
286
|
+
onSearchResults: createOnSearchResults({
|
|
287
|
+
runnableConfig,
|
|
288
|
+
onSearchResults: _onSearchResults,
|
|
289
|
+
}),
|
|
290
|
+
});
|
|
291
|
+
const turn = runnableConfig.toolCall?.turn ?? 0;
|
|
292
|
+
const { output, references } = formatResultsForLLM(turn, searchResult);
|
|
293
|
+
const data: t.SearchResultData = { turn, ...searchResult, references };
|
|
294
|
+
return [output, { [Constants.WEB_SEARCH]: data }];
|
|
295
|
+
},
|
|
296
|
+
{
|
|
297
|
+
name: Constants.WEB_SEARCH,
|
|
298
|
+
description: `Real-time search. Results have required citation anchors.
|
|
299
|
+
|
|
300
|
+
Note: Use ONCE per reply unless instructed otherwise.
|
|
301
|
+
|
|
302
|
+
Anchors:
|
|
303
|
+
- \\ue202turnXtypeY
|
|
304
|
+
- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
|
|
305
|
+
|
|
306
|
+
Special Markers:
|
|
307
|
+
- \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
|
|
308
|
+
- \\ue200...\\ue201 — group block (e.g. \\ue200\\ue202turn0search1\\ue202turn0news2\\ue201)
|
|
309
|
+
|
|
310
|
+
**CITE EVERY NON-OBVIOUS FACT/QUOTE:**
|
|
311
|
+
Use anchor marker(s) immediately after the statement:
|
|
312
|
+
- Standalone: "Pure functions produce same output. \\ue202turn0search0"
|
|
313
|
+
- Standalone (multiple): "Today's News \\ue202turn0search0\\ue202turn0news0"
|
|
314
|
+
- Highlight: "\\ue203Highlight text.\\ue204\\ue202turn0news1"
|
|
315
|
+
- Group: "Sources. \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
|
|
316
|
+
- Group Highlight: "\\ue203Highlight for group.\\ue204 \\ue200\\ue202turn0search0\\ue202turn0news1\\ue201"
|
|
317
|
+
- Image: "See photo \\ue202turn0image0."
|
|
318
|
+
|
|
319
|
+
**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**
|
|
320
|
+
`.trim(),
|
|
321
|
+
schema: schema,
|
|
322
|
+
responseFormat: Constants.CONTENT_AND_ARTIFACT,
|
|
323
|
+
}
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Creates a search tool with a schema that dynamically includes the country field
|
|
329
|
+
* only when the searchProvider is 'serper'.
|
|
330
|
+
*
|
|
331
|
+
* @param config - The search tool configuration
|
|
332
|
+
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
333
|
+
*/
|
|
20
334
|
export const createSearchTool = (
|
|
21
335
|
config: t.SearchToolConfig = {}
|
|
22
|
-
): DynamicStructuredTool<typeof
|
|
336
|
+
): DynamicStructuredTool<typeof toolSchema> => {
|
|
23
337
|
const {
|
|
24
338
|
searchProvider = 'serper',
|
|
25
339
|
serperApiKey,
|
|
@@ -29,14 +343,39 @@ export const createSearchTool = (
|
|
|
29
343
|
topResults = 5,
|
|
30
344
|
strategies = ['no_extraction'],
|
|
31
345
|
filterContent = true,
|
|
346
|
+
safeSearch = 1,
|
|
32
347
|
firecrawlApiKey,
|
|
33
348
|
firecrawlApiUrl,
|
|
34
349
|
firecrawlFormats = ['markdown', 'html'],
|
|
35
350
|
jinaApiKey,
|
|
36
351
|
cohereApiKey,
|
|
37
352
|
onSearchResults: _onSearchResults,
|
|
353
|
+
onGetHighlights,
|
|
38
354
|
} = config;
|
|
39
355
|
|
|
356
|
+
const logger = config.logger || createDefaultLogger();
|
|
357
|
+
|
|
358
|
+
const schemaObject: {
|
|
359
|
+
query: z.ZodString;
|
|
360
|
+
date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;
|
|
361
|
+
country?: z.ZodOptional<z.ZodString>;
|
|
362
|
+
images: z.ZodOptional<z.ZodBoolean>;
|
|
363
|
+
videos: z.ZodOptional<z.ZodBoolean>;
|
|
364
|
+
news: z.ZodOptional<z.ZodBoolean>;
|
|
365
|
+
} = {
|
|
366
|
+
query: querySchema,
|
|
367
|
+
date: dateSchema,
|
|
368
|
+
images: imagesSchema,
|
|
369
|
+
videos: videosSchema,
|
|
370
|
+
news: newsSchema,
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
if (searchProvider === 'serper') {
|
|
374
|
+
schemaObject.country = countrySchema;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const toolSchema = z.object(schemaObject);
|
|
378
|
+
|
|
40
379
|
const searchAPI = createSearchAPI({
|
|
41
380
|
searchProvider,
|
|
42
381
|
serperApiKey,
|
|
@@ -54,10 +393,11 @@ export const createSearchTool = (
|
|
|
54
393
|
rerankerType,
|
|
55
394
|
jinaApiKey,
|
|
56
395
|
cohereApiKey,
|
|
396
|
+
logger,
|
|
57
397
|
});
|
|
58
398
|
|
|
59
399
|
if (!selectedReranker) {
|
|
60
|
-
|
|
400
|
+
logger.warn('No reranker selected. Using default ranking.');
|
|
61
401
|
}
|
|
62
402
|
|
|
63
403
|
const sourceProcessor = createSourceProcessor(
|
|
@@ -66,86 +406,22 @@ export const createSearchTool = (
|
|
|
66
406
|
topResults,
|
|
67
407
|
strategies,
|
|
68
408
|
filterContent,
|
|
409
|
+
logger,
|
|
69
410
|
},
|
|
70
411
|
firecrawlScraper
|
|
71
412
|
);
|
|
72
413
|
|
|
73
|
-
const search =
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
proMode?: boolean;
|
|
81
|
-
maxSources?: number;
|
|
82
|
-
onSearchResults?: (sources: t.SearchResult) => void;
|
|
83
|
-
}): Promise<t.SearchResultData> => {
|
|
84
|
-
try {
|
|
85
|
-
const sources = await searchAPI.getSources(query);
|
|
86
|
-
onSearchResults?.(sources);
|
|
87
|
-
|
|
88
|
-
if (!sources.success) {
|
|
89
|
-
throw new Error(sources.error ?? 'Search failed');
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
const processedSources = await sourceProcessor.processSources(
|
|
93
|
-
sources,
|
|
94
|
-
maxSources,
|
|
95
|
-
query,
|
|
96
|
-
proMode
|
|
97
|
-
);
|
|
98
|
-
return expandHighlights(processedSources);
|
|
99
|
-
} catch (error) {
|
|
100
|
-
console.error('Error in search:', error);
|
|
101
|
-
return {
|
|
102
|
-
organic: [],
|
|
103
|
-
topStories: [],
|
|
104
|
-
images: [],
|
|
105
|
-
relatedSearches: [],
|
|
106
|
-
error: error instanceof Error ? error.message : String(error),
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
};
|
|
110
|
-
|
|
111
|
-
return tool<typeof SearchToolSchema>(
|
|
112
|
-
async ({ query }, runnableConfig) => {
|
|
113
|
-
const searchResult = await search({
|
|
114
|
-
query,
|
|
115
|
-
onSearchResults: _onSearchResults
|
|
116
|
-
? (result): void => {
|
|
117
|
-
_onSearchResults(result, runnableConfig);
|
|
118
|
-
}
|
|
119
|
-
: undefined,
|
|
120
|
-
});
|
|
121
|
-
const output = formatResultsForLLM(searchResult);
|
|
122
|
-
return [output, searchResult];
|
|
123
|
-
},
|
|
124
|
-
{
|
|
125
|
-
name: Constants.WEB_SEARCH,
|
|
126
|
-
description: `
|
|
127
|
-
Real-time search. Results have required unique citation anchors.
|
|
128
|
-
|
|
129
|
-
Anchors:
|
|
130
|
-
- \\ue202turn0searchN (web), \\ue202turn0newsN (news), \\ue202turn0imageN (image)
|
|
131
|
-
|
|
132
|
-
Special Markers:
|
|
133
|
-
- \\ue203...\\ue204 — mark start/end of cited span
|
|
134
|
-
- \\ue200...\\ue201 — composite/group block (e.g. \\ue200cite\\ue202turn0search1\\ue202turn0news2\\ue201)
|
|
135
|
-
- \\ue206 — marks grouped/summary citation areas
|
|
136
|
-
|
|
137
|
-
**CITE EVERY NON-OBVIOUS FACT/QUOTE:**
|
|
138
|
-
Insert the anchor marker(s) immediately after the statement:
|
|
139
|
-
- "Pure functions produce same output \\ue202turn0search0."
|
|
140
|
-
- Multiple: "Benefits \\ue202turn0search0\\ue202turn0news0."
|
|
141
|
-
- Span: \\ue203Key: first-class functions\\ue204\\ue202turn0news1
|
|
142
|
-
- Group: "Functional languages."\\ue206 or \\ue200cite\\ue202turn0search0\\ue202turn0news1\\ue201
|
|
143
|
-
- Image: "See photo \\ue202turn0image0."
|
|
414
|
+
const search = createSearchProcessor({
|
|
415
|
+
searchAPI,
|
|
416
|
+
safeSearch,
|
|
417
|
+
sourceProcessor,
|
|
418
|
+
onGetHighlights,
|
|
419
|
+
logger,
|
|
420
|
+
});
|
|
144
421
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
);
|
|
422
|
+
return createTool({
|
|
423
|
+
search,
|
|
424
|
+
schema: toolSchema,
|
|
425
|
+
onSearchResults: _onSearchResults,
|
|
426
|
+
});
|
|
151
427
|
};
|