@librechat/agents 2.4.316 → 2.4.318
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +17 -37
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +79 -29
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -13
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +13 -15
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +44 -12
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +35 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +18 -37
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +79 -29
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -13
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +12 -14
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +44 -12
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +32 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +6 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +12 -4
- package/dist/types/tools/search/types.d.ts +380 -46
- package/dist/types/tools/search/utils.d.ts +3 -0
- package/package.json +3 -2
- package/src/scripts/search.ts +5 -3
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +27 -144
- package/src/tools/search/format.ts +89 -31
- package/src/tools/search/highlights.ts +99 -17
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/search.ts +42 -54
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +54 -15
- package/src/tools/search/types.ts +430 -52
- package/src/tools/search/utils.ts +43 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/* eslint-disable no-console */
|
|
2
|
+
// processWikipedia.ts
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import { processContent } from './content';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Process a Wikipedia article (HTML and Markdown) and create a referenced version
|
|
9
|
+
*/
|
|
10
|
+
async function processWikipediaArticle(): Promise<void> {
|
|
11
|
+
try {
|
|
12
|
+
console.log('Starting Wikipedia article processing...');
|
|
13
|
+
|
|
14
|
+
// Define file paths - adapt these to your specific file locations
|
|
15
|
+
const htmlPath = path.resolve('./test.html');
|
|
16
|
+
const markdownPath = path.resolve('./test.md');
|
|
17
|
+
const outputPath = path.resolve('./output.md');
|
|
18
|
+
|
|
19
|
+
// Check if input files exist
|
|
20
|
+
if (!fs.existsSync(htmlPath)) {
|
|
21
|
+
throw new Error(`Wikipedia HTML file not found at ${htmlPath}`);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (!fs.existsSync(markdownPath)) {
|
|
25
|
+
throw new Error(`Wikipedia Markdown file not found at ${markdownPath}`);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
console.log('Reading Wikipedia article files...');
|
|
29
|
+
const html = fs.readFileSync(htmlPath, 'utf-8');
|
|
30
|
+
const markdown = fs.readFileSync(markdownPath, 'utf-8');
|
|
31
|
+
|
|
32
|
+
// Extract article title for logging
|
|
33
|
+
const titleMatch = /<h1[^>]*>([^<]+)<\/h1>/i.exec(html);
|
|
34
|
+
const articleTitle = titleMatch
|
|
35
|
+
? titleMatch[1].trim()
|
|
36
|
+
: 'Wikipedia article';
|
|
37
|
+
|
|
38
|
+
console.log(`Processing "${articleTitle}"...`);
|
|
39
|
+
|
|
40
|
+
// Measure processing time
|
|
41
|
+
const startTime = process.hrtime();
|
|
42
|
+
|
|
43
|
+
// Process content
|
|
44
|
+
const result = processContent(html, markdown);
|
|
45
|
+
|
|
46
|
+
// Calculate processing time
|
|
47
|
+
const elapsed = process.hrtime(startTime);
|
|
48
|
+
const timeInMs = elapsed[0] * 1000 + elapsed[1] / 1000000;
|
|
49
|
+
|
|
50
|
+
// Generate reference appendix
|
|
51
|
+
const appendix = generateReferenceAppendix(result);
|
|
52
|
+
|
|
53
|
+
// Create complete output with the processed content and appendix
|
|
54
|
+
const completeOutput = result.markdown + appendix;
|
|
55
|
+
|
|
56
|
+
// Write to output file
|
|
57
|
+
fs.writeFileSync(outputPath, completeOutput);
|
|
58
|
+
|
|
59
|
+
// Print processing statistics
|
|
60
|
+
console.log('\nWikipedia article processing complete! ✓');
|
|
61
|
+
console.log('-'.repeat(60));
|
|
62
|
+
console.log(`Article: ${articleTitle}`);
|
|
63
|
+
console.log(`Processing time: ${timeInMs.toFixed(2)}ms`);
|
|
64
|
+
console.log('Media references replaced:');
|
|
65
|
+
console.log(` - Links: ${result.links.length}`);
|
|
66
|
+
console.log(` - Images: ${result.images.length}`);
|
|
67
|
+
console.log(` - Videos: ${result.videos.length}`);
|
|
68
|
+
console.log(
|
|
69
|
+
` - Total: ${result.links.length + result.images.length + result.videos.length}`
|
|
70
|
+
);
|
|
71
|
+
console.log(`Output saved to: ${outputPath}`);
|
|
72
|
+
console.log('-'.repeat(60));
|
|
73
|
+
|
|
74
|
+
// Print sample of the transformation
|
|
75
|
+
const sampleLines = result.markdown.split('\n').slice(0, 10).join('\n');
|
|
76
|
+
console.log('\nSample of transformed content:');
|
|
77
|
+
console.log('-'.repeat(30));
|
|
78
|
+
console.log(sampleLines);
|
|
79
|
+
console.log('-'.repeat(30));
|
|
80
|
+
console.log('... (continued in output file)');
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error('Error processing Wikipedia article:', error);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Generate a comprehensive reference appendix with all media links
|
|
89
|
+
*/
|
|
90
|
+
function generateReferenceAppendix(result: {
|
|
91
|
+
links: Array<{ originalUrl: string; title?: string; text?: string }>;
|
|
92
|
+
images: Array<{ originalUrl: string; title?: string }>;
|
|
93
|
+
videos: Array<{ originalUrl: string; title?: string }>;
|
|
94
|
+
}): string {
|
|
95
|
+
let appendix = '\n\n' + '---'.repeat(10) + '\n\n';
|
|
96
|
+
appendix += '# References\n\n';
|
|
97
|
+
|
|
98
|
+
if (result.links.length > 0) {
|
|
99
|
+
appendix += '## Links\n\n';
|
|
100
|
+
result.links.forEach((link, index) => {
|
|
101
|
+
// Clean and format text for display
|
|
102
|
+
let displayText = '';
|
|
103
|
+
if (link.text != null && link.text.trim()) {
|
|
104
|
+
// Limit length for very long link text
|
|
105
|
+
let cleanText = link.text.trim();
|
|
106
|
+
if (cleanText.length > 50) {
|
|
107
|
+
cleanText = cleanText.substring(0, 47) + '...';
|
|
108
|
+
}
|
|
109
|
+
displayText = ` - "${cleanText}"`;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
appendix += `**link#${index + 1}**: ${link.originalUrl}${displayText}\n\n`;
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (result.images.length > 0) {
|
|
117
|
+
appendix += '## Images\n\n';
|
|
118
|
+
result.images.forEach((image, index) => {
|
|
119
|
+
const displayTitle =
|
|
120
|
+
image.title != null && image.title.trim()
|
|
121
|
+
? ` - ${image.title.trim()}`
|
|
122
|
+
: '';
|
|
123
|
+
appendix += `**image#${index + 1}**: ${image.originalUrl}${displayTitle}\n\n`;
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (result.videos.length > 0) {
|
|
128
|
+
appendix += '## Videos\n\n';
|
|
129
|
+
result.videos.forEach((video, index) => {
|
|
130
|
+
const displayTitle =
|
|
131
|
+
video.title != null && video.title.trim()
|
|
132
|
+
? ` - ${video.title.trim()}`
|
|
133
|
+
: '';
|
|
134
|
+
appendix += `**video#${index + 1}**: ${video.originalUrl}${displayTitle}\n\n`;
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Add a category breakdown to show what types of links were found
|
|
139
|
+
const totalRefs =
|
|
140
|
+
result.links.length + result.images.length + result.videos.length;
|
|
141
|
+
|
|
142
|
+
appendix += '## Summary\n\n';
|
|
143
|
+
appendix += `Total references: **${totalRefs}**\n\n`;
|
|
144
|
+
appendix += `- Links: ${result.links.length}\n`;
|
|
145
|
+
appendix += `- Images: ${result.images.length}\n`;
|
|
146
|
+
appendix += `- Videos: ${result.videos.length}\n`;
|
|
147
|
+
|
|
148
|
+
return appendix;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Using async IIFE to allow for better error handling
|
|
152
|
+
(async (): Promise<void> => {
|
|
153
|
+
try {
|
|
154
|
+
await processWikipediaArticle();
|
|
155
|
+
} catch (error) {
|
|
156
|
+
console.error('Unhandled error:', error);
|
|
157
|
+
process.exit(1);
|
|
158
|
+
}
|
|
159
|
+
})();
|
package/src/tools/search/tool.ts
CHANGED
|
@@ -9,9 +9,7 @@ import { formatResultsForLLM } from './format';
|
|
|
9
9
|
import { createReranker } from './rerankers';
|
|
10
10
|
import { Constants } from '@/common';
|
|
11
11
|
|
|
12
|
-
const
|
|
13
|
-
query: z.string().describe(
|
|
14
|
-
`
|
|
12
|
+
const DEFAULT_QUERY_DESCRIPTION = `
|
|
15
13
|
GUIDELINES:
|
|
16
14
|
- Start broad, then narrow: Begin with key concepts, then refine with specifics
|
|
17
15
|
- Think like sources: Use terminology experts would use in the field
|
|
@@ -29,10 +27,24 @@ TECHNIQUES (combine for power searches):
|
|
|
29
27
|
- SPECIFIC QUESTIONS: Use who/what/when/where/why/how
|
|
30
28
|
- DOMAIN TERMS: Include technical terminology for specialized topics
|
|
31
29
|
- CONCISE TERMS: Prioritize keywords over sentences
|
|
32
|
-
`.trim()
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
`.trim();
|
|
31
|
+
|
|
32
|
+
const DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.
|
|
33
|
+
Use standard 2-letter country codes: "us", "uk", "ca", "de", "fr", "jp", "br", etc.
|
|
34
|
+
Provide this when the search should return results specific to a particular country.
|
|
35
|
+
Examples:
|
|
36
|
+
- "us" for United States (default)
|
|
37
|
+
- "de" for Germany
|
|
38
|
+
- "in" for India
|
|
39
|
+
`.trim();
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Creates a search tool with a schema that dynamically includes the country field
|
|
43
|
+
* only when the searchProvider is 'serper'.
|
|
44
|
+
*
|
|
45
|
+
* @param config - The search tool configuration
|
|
46
|
+
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
47
|
+
*/
|
|
36
48
|
export const createSearchTool = (
|
|
37
49
|
config: t.SearchToolConfig = {}
|
|
38
50
|
): DynamicStructuredTool<typeof SearchToolSchema> => {
|
|
@@ -53,6 +65,23 @@ export const createSearchTool = (
|
|
|
53
65
|
onSearchResults: _onSearchResults,
|
|
54
66
|
} = config;
|
|
55
67
|
|
|
68
|
+
const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);
|
|
69
|
+
const schemaObject: {
|
|
70
|
+
query: z.ZodString;
|
|
71
|
+
country?: z.ZodOptional<z.ZodString>;
|
|
72
|
+
} = {
|
|
73
|
+
query: querySchema,
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
if (searchProvider === 'serper') {
|
|
77
|
+
schemaObject.country = z
|
|
78
|
+
.string()
|
|
79
|
+
.optional()
|
|
80
|
+
.describe(DEFAULT_COUNTRY_DESCRIPTION);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const SearchToolSchema = z.object(schemaObject);
|
|
84
|
+
|
|
56
85
|
const searchAPI = createSearchAPI({
|
|
57
86
|
searchProvider,
|
|
58
87
|
serperApiKey,
|
|
@@ -88,17 +117,19 @@ export const createSearchTool = (
|
|
|
88
117
|
|
|
89
118
|
const search = async ({
|
|
90
119
|
query,
|
|
120
|
+
country,
|
|
91
121
|
proMode = true,
|
|
92
122
|
maxSources = 5,
|
|
93
123
|
onSearchResults,
|
|
94
124
|
}: {
|
|
95
125
|
query: string;
|
|
96
|
-
|
|
126
|
+
country?: string;
|
|
97
127
|
maxSources?: number;
|
|
128
|
+
proMode?: boolean;
|
|
98
129
|
onSearchResults?: (sources: t.SearchResult) => void;
|
|
99
130
|
}): Promise<t.SearchResultData> => {
|
|
100
131
|
try {
|
|
101
|
-
const sources = await searchAPI.getSources(query);
|
|
132
|
+
const sources = await searchAPI.getSources({ query, country });
|
|
102
133
|
onSearchResults?.(sources);
|
|
103
134
|
|
|
104
135
|
if (!sources.success) {
|
|
@@ -125,9 +156,12 @@ export const createSearchTool = (
|
|
|
125
156
|
};
|
|
126
157
|
|
|
127
158
|
return tool<typeof SearchToolSchema>(
|
|
128
|
-
async (
|
|
159
|
+
async (params, runnableConfig) => {
|
|
160
|
+
const { query, country: _c } = params;
|
|
161
|
+
const country = typeof _c === 'string' && _c ? _c : undefined;
|
|
129
162
|
const searchResult = await search({
|
|
130
163
|
query,
|
|
164
|
+
country,
|
|
131
165
|
onSearchResults: _onSearchResults
|
|
132
166
|
? (result): void => {
|
|
133
167
|
_onSearchResults(result, runnableConfig);
|
|
@@ -135,17 +169,22 @@ export const createSearchTool = (
|
|
|
135
169
|
: undefined,
|
|
136
170
|
});
|
|
137
171
|
const turn = runnableConfig.toolCall?.turn ?? 0;
|
|
138
|
-
const output = formatResultsForLLM(turn, searchResult);
|
|
139
|
-
return [
|
|
172
|
+
const { output, references } = formatResultsForLLM(turn, searchResult);
|
|
173
|
+
return [
|
|
174
|
+
output,
|
|
175
|
+
{ [Constants.WEB_SEARCH]: { turn, ...searchResult, references } },
|
|
176
|
+
];
|
|
140
177
|
},
|
|
141
178
|
{
|
|
142
179
|
name: Constants.WEB_SEARCH,
|
|
143
180
|
description: `
|
|
144
|
-
Real-time search. Results have required
|
|
181
|
+
Real-time search. Results have required citation anchors.
|
|
182
|
+
|
|
183
|
+
Note: Use ONCE per reply unless instructed otherwise.
|
|
145
184
|
|
|
146
185
|
Anchors:
|
|
147
|
-
- \\
|
|
148
|
-
- X = turn, Y = item
|
|
186
|
+
- \\ue202turnXtypeY
|
|
187
|
+
- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
|
|
149
188
|
|
|
150
189
|
Special Markers:
|
|
151
190
|
- \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
|