@librechat/agents 2.4.320 → 2.4.322
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/firecrawl.cjs +6 -4
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +117 -80
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +43 -36
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +125 -52
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +162 -47
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +34 -5
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +6 -4
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +118 -81
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +43 -36
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +126 -53
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +161 -46
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +33 -6
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +1 -0
- package/dist/types/tools/search/rerankers.d.ts +8 -4
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/tool.d.ts +13 -0
- package/dist/types/tools/search/types.d.ts +36 -0
- package/dist/types/tools/search/utils.d.ts +9 -2
- package/package.json +3 -2
- package/src/scripts/search.ts +3 -0
- package/src/tools/search/firecrawl.ts +9 -4
- package/src/tools/search/format.ts +157 -87
- package/src/tools/search/rerankers.ts +57 -36
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +165 -52
- package/src/tools/search/tool.ts +217 -44
- package/src/tools/search/types.ts +37 -0
- package/src/tools/search/utils.ts +37 -5
- package/src/utils/llmConfig.ts +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@librechat/agents",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.322",
|
|
4
4
|
"main": "./dist/cjs/main.cjs",
|
|
5
5
|
"module": "./dist/esm/main.mjs",
|
|
6
6
|
"types": "./dist/types/index.d.ts",
|
|
@@ -122,7 +122,8 @@
|
|
|
122
122
|
"tsc-alias": "^1.8.10",
|
|
123
123
|
"tsconfig-paths": "^4.2.0",
|
|
124
124
|
"tslib": "^2.6.3",
|
|
125
|
-
"typescript": "^5.5.3"
|
|
125
|
+
"typescript": "^5.5.3",
|
|
126
|
+
"winston": "^3.17.0"
|
|
126
127
|
},
|
|
127
128
|
"lint-staged": {
|
|
128
129
|
"*.{js,ts}": [
|
package/src/scripts/search.ts
CHANGED
|
@@ -112,6 +112,9 @@ async function testStandardStreaming(): Promise<void> {
|
|
|
112
112
|
// const userMessage = 'Are massage guns good?';
|
|
113
113
|
// const userMessage = 'What is functional programming?';
|
|
114
114
|
const userMessage = "Get me today's trending news.";
|
|
115
|
+
// const userMessage = "search recent italy earthquake volcano activity";
|
|
116
|
+
// const userMessage =
|
|
117
|
+
// "use 'Trump' as the exact search query and tell me what you find.";
|
|
115
118
|
|
|
116
119
|
conversationHistory.push(new HumanMessage(userMessage));
|
|
117
120
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
/* eslint-disable no-console */
|
|
2
1
|
import axios from 'axios';
|
|
3
2
|
import { processContent } from './content';
|
|
4
3
|
import type * as t from './types';
|
|
4
|
+
import { createDefaultLogger } from './utils';
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Firecrawl scraper implementation
|
|
@@ -12,6 +12,7 @@ export class FirecrawlScraper {
|
|
|
12
12
|
private apiUrl: string;
|
|
13
13
|
private defaultFormats: string[];
|
|
14
14
|
private timeout: number;
|
|
15
|
+
private logger: t.Logger;
|
|
15
16
|
|
|
16
17
|
constructor(config: t.FirecrawlScraperConfig = {}) {
|
|
17
18
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
@@ -25,11 +26,15 @@ export class FirecrawlScraper {
|
|
|
25
26
|
this.defaultFormats = config.formats ?? ['markdown', 'html'];
|
|
26
27
|
this.timeout = config.timeout ?? 15000;
|
|
27
28
|
|
|
29
|
+
this.logger = config.logger || createDefaultLogger();
|
|
30
|
+
|
|
28
31
|
if (!this.apiKey) {
|
|
29
|
-
|
|
32
|
+
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
30
33
|
}
|
|
31
34
|
|
|
32
|
-
|
|
35
|
+
this.logger.debug(
|
|
36
|
+
`Firecrawl scraper initialized with API URL: ${this.apiUrl}`
|
|
37
|
+
);
|
|
33
38
|
}
|
|
34
39
|
|
|
35
40
|
/**
|
|
@@ -107,7 +112,7 @@ export class FirecrawlScraper {
|
|
|
107
112
|
);
|
|
108
113
|
return [markdown, rest];
|
|
109
114
|
} catch (error) {
|
|
110
|
-
|
|
115
|
+
this.logger.error('Error processing content:', error);
|
|
111
116
|
return [response.data.markdown, undefined];
|
|
112
117
|
}
|
|
113
118
|
} else if (response.data.markdown != null) {
|
|
@@ -1,107 +1,172 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
|
-
import { getDomainName } from './utils';
|
|
2
|
+
import { getDomainName, fileExtRegex } from './utils';
|
|
3
|
+
|
|
4
|
+
function addHighlightSection(): string[] {
|
|
5
|
+
return ['\n## Highlights', ''];
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
// Helper function to format a source (organic or top story)
|
|
9
|
+
function formatSource(
|
|
10
|
+
source: t.ValidSource,
|
|
11
|
+
index: number,
|
|
12
|
+
turn: number,
|
|
13
|
+
sourceType: 'search' | 'news',
|
|
14
|
+
references: t.ResultReference[]
|
|
15
|
+
): string {
|
|
16
|
+
/** Array of all lines to include in the output */
|
|
17
|
+
const outputLines: string[] = [];
|
|
18
|
+
|
|
19
|
+
// Add the title
|
|
20
|
+
outputLines.push(
|
|
21
|
+
`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`
|
|
22
|
+
);
|
|
23
|
+
outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
|
|
24
|
+
outputLines.push(`URL: ${source.link}`);
|
|
25
|
+
|
|
26
|
+
// Add optional fields
|
|
27
|
+
if ('snippet' in source && source.snippet != null) {
|
|
28
|
+
outputLines.push(`Summary: ${source.snippet}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (source.date != null) {
|
|
32
|
+
outputLines.push(`Date: ${source.date}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (source.attribution != null) {
|
|
36
|
+
outputLines.push(`Source: ${source.attribution}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Add highlight section or empty line
|
|
40
|
+
if ((source.highlights?.length ?? 0) > 0) {
|
|
41
|
+
outputLines.push(...addHighlightSection());
|
|
42
|
+
} else {
|
|
43
|
+
outputLines.push('');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Process highlights if they exist
|
|
47
|
+
(source.highlights ?? [])
|
|
48
|
+
.filter((h) => h.text.trim().length > 0)
|
|
49
|
+
.forEach((h, hIndex) => {
|
|
50
|
+
outputLines.push(
|
|
51
|
+
`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`
|
|
52
|
+
);
|
|
53
|
+
outputLines.push('');
|
|
54
|
+
outputLines.push('```text');
|
|
55
|
+
outputLines.push(h.text.trim());
|
|
56
|
+
outputLines.push('```');
|
|
57
|
+
outputLines.push('');
|
|
58
|
+
|
|
59
|
+
if (h.references != null && h.references.length) {
|
|
60
|
+
let hasHeader = false;
|
|
61
|
+
const refLines: string[] = [];
|
|
62
|
+
|
|
63
|
+
for (let j = 0; j < h.references.length; j++) {
|
|
64
|
+
const ref = h.references[j];
|
|
65
|
+
if (ref.reference.originalUrl.includes('mailto:')) {
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
references.push({
|
|
69
|
+
type: ref.type,
|
|
70
|
+
link: ref.reference.originalUrl,
|
|
71
|
+
attribution: getDomainName(ref.reference.originalUrl),
|
|
72
|
+
title: (
|
|
73
|
+
((ref.reference.title ?? '') || ref.reference.text) ??
|
|
74
|
+
''
|
|
75
|
+
).split('\n')[0],
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
if (ref.type !== 'link') {
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (fileExtRegex.test(ref.reference.originalUrl)) {
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (!hasHeader) {
|
|
87
|
+
refLines.push('Core References:');
|
|
88
|
+
hasHeader = true;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
refLines.push(
|
|
92
|
+
`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`
|
|
93
|
+
);
|
|
94
|
+
refLines.push(
|
|
95
|
+
`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (hasHeader) {
|
|
100
|
+
outputLines.push(...refLines);
|
|
101
|
+
outputLines.push('');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (hIndex < (source.highlights?.length ?? 0) - 1) {
|
|
106
|
+
outputLines.push('---');
|
|
107
|
+
outputLines.push('');
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
outputLines.push('');
|
|
112
|
+
return outputLines.join('\n');
|
|
113
|
+
}
|
|
3
114
|
|
|
4
115
|
export function formatResultsForLLM(
|
|
5
116
|
turn: number,
|
|
6
117
|
results: t.SearchResultData
|
|
7
118
|
): { output: string; references: t.ResultReference[] } {
|
|
8
|
-
|
|
119
|
+
/** Array to collect all output lines */
|
|
120
|
+
const outputLines: string[] = [];
|
|
9
121
|
|
|
10
122
|
const addSection = (title: string): void => {
|
|
11
|
-
|
|
123
|
+
outputLines.push('');
|
|
124
|
+
outputLines.push(`=== ${title} ===`);
|
|
125
|
+
outputLines.push('');
|
|
12
126
|
};
|
|
13
127
|
|
|
14
128
|
const references: t.ResultReference[] = [];
|
|
129
|
+
|
|
15
130
|
// Organic (web) results
|
|
16
131
|
if (results.organic?.length != null && results.organic.length > 0) {
|
|
17
132
|
addSection(`Web Results, Turn ${turn}`);
|
|
18
133
|
for (let i = 0; i < results.organic.length; i++) {
|
|
19
134
|
const r = results.organic[i];
|
|
20
|
-
|
|
21
|
-
`# Source ${i}: "${r.title ?? '(no title)'}"`,
|
|
22
|
-
`Anchor: \\ue202turn${turn}search${i}`,
|
|
23
|
-
`URL: ${r.link}`,
|
|
24
|
-
r.snippet != null ? `Summary: ${r.snippet}` : '',
|
|
25
|
-
r.date != null ? `Date: ${r.date}` : '',
|
|
26
|
-
r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
27
|
-
'',
|
|
28
|
-
'\n## Highlights\n\n',
|
|
29
|
-
'',
|
|
30
|
-
'',
|
|
31
|
-
]
|
|
32
|
-
.filter(Boolean)
|
|
33
|
-
.join('\n');
|
|
34
|
-
|
|
35
|
-
(r.highlights ?? [])
|
|
36
|
-
.filter((h) => h.text.trim().length > 0)
|
|
37
|
-
.forEach((h, hIndex) => {
|
|
38
|
-
output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
|
|
39
|
-
output += '```text\n' + h.text.trim() + '\n```\n\n';
|
|
40
|
-
|
|
41
|
-
if (h.references != null && h.references.length) {
|
|
42
|
-
output += 'Core References:\n';
|
|
43
|
-
output += h.references
|
|
44
|
-
.map((ref) => {
|
|
45
|
-
references.push({
|
|
46
|
-
link: ref.reference.originalUrl,
|
|
47
|
-
attribution: getDomainName(ref.reference.originalUrl),
|
|
48
|
-
title: (
|
|
49
|
-
((ref.reference.title ?? '') || ref.reference.text) ??
|
|
50
|
-
''
|
|
51
|
-
).split('\n')[0],
|
|
52
|
-
});
|
|
53
|
-
return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
|
|
54
|
-
})
|
|
55
|
-
.join('\n');
|
|
56
|
-
output += '\n\n';
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
if (hIndex < (r.highlights?.length ?? 0) - 1) {
|
|
60
|
-
output += '---\n\n';
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
|
|
135
|
+
outputLines.push(formatSource(r, i, turn, 'search', references));
|
|
64
136
|
delete results.organic[i].highlights;
|
|
65
|
-
output += '\n';
|
|
66
137
|
}
|
|
67
138
|
}
|
|
68
139
|
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
82
|
-
// ''
|
|
83
|
-
// ].filter(Boolean).join('\n');
|
|
84
|
-
// });
|
|
85
|
-
// }
|
|
140
|
+
// Top stories (news)
|
|
141
|
+
const topStories = results.topStories ?? [];
|
|
142
|
+
if (topStories.length) {
|
|
143
|
+
addSection('News Results');
|
|
144
|
+
for (let i = 0; i < topStories.length; i++) {
|
|
145
|
+
const r = topStories[i];
|
|
146
|
+
outputLines.push(formatSource(r, i, turn, 'news', references));
|
|
147
|
+
if (results.topStories?.[i]?.highlights) {
|
|
148
|
+
delete results.topStories[i].highlights;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
86
152
|
|
|
87
153
|
// // Images
|
|
88
154
|
// const images = results.images ?? [];
|
|
89
155
|
// if (images.length) {
|
|
90
156
|
// addSection('Image Results');
|
|
91
|
-
// images.
|
|
92
|
-
//
|
|
93
|
-
//
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
// });
|
|
157
|
+
// const imageLines = images.map((img, i) => [
|
|
158
|
+
// `Anchor: \ue202turn0image${i}`,
|
|
159
|
+
// `Title: ${img.title ?? '(no title)'}`,
|
|
160
|
+
// `Image URL: ${img.imageUrl}`,
|
|
161
|
+
// ''
|
|
162
|
+
// ].join('\n'));
|
|
163
|
+
// outputLines.push(imageLines.join('\n'));
|
|
99
164
|
// }
|
|
100
165
|
|
|
101
166
|
// Knowledge Graph
|
|
102
167
|
if (results.knowledgeGraph != null) {
|
|
103
168
|
addSection('Knowledge Graph');
|
|
104
|
-
|
|
169
|
+
const kgLines = [
|
|
105
170
|
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
106
171
|
results.knowledgeGraph.type != null
|
|
107
172
|
? `**Type:** ${results.knowledgeGraph.type}`
|
|
@@ -129,15 +194,15 @@ export function formatResultsForLLM(
|
|
|
129
194
|
)}\n\`\`\``
|
|
130
195
|
: '',
|
|
131
196
|
'',
|
|
132
|
-
]
|
|
133
|
-
|
|
134
|
-
|
|
197
|
+
].filter(Boolean);
|
|
198
|
+
|
|
199
|
+
outputLines.push(kgLines.join('\n\n'));
|
|
135
200
|
}
|
|
136
201
|
|
|
137
202
|
// Answer Box
|
|
138
203
|
if (results.answerBox != null) {
|
|
139
204
|
addSection('Answer Box');
|
|
140
|
-
|
|
205
|
+
const abLines = [
|
|
141
206
|
results.answerBox.title != null
|
|
142
207
|
? `**Title:** ${results.answerBox.title}`
|
|
143
208
|
: '',
|
|
@@ -153,30 +218,35 @@ export function formatResultsForLLM(
|
|
|
153
218
|
? `**Link:** ${results.answerBox.link}`
|
|
154
219
|
: '',
|
|
155
220
|
'',
|
|
156
|
-
]
|
|
157
|
-
|
|
158
|
-
|
|
221
|
+
].filter(Boolean);
|
|
222
|
+
|
|
223
|
+
outputLines.push(abLines.join('\n\n'));
|
|
159
224
|
}
|
|
160
225
|
|
|
161
226
|
// People also ask
|
|
162
227
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
163
228
|
if (peopleAlsoAsk.length) {
|
|
164
229
|
addSection('People Also Ask');
|
|
230
|
+
|
|
231
|
+
const paaLines: string[] = [];
|
|
165
232
|
peopleAlsoAsk.forEach((p, i) => {
|
|
166
|
-
|
|
233
|
+
const questionLines = [
|
|
167
234
|
`### Question ${i + 1}:`,
|
|
168
235
|
`"${p.question}"`,
|
|
169
|
-
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}
|
|
236
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
|
|
170
237
|
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
171
238
|
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
172
239
|
'',
|
|
173
|
-
]
|
|
174
|
-
|
|
175
|
-
|
|
240
|
+
].filter(Boolean);
|
|
241
|
+
|
|
242
|
+
paaLines.push(questionLines.join('\n\n'));
|
|
176
243
|
});
|
|
244
|
+
|
|
245
|
+
outputLines.push(paaLines.join(''));
|
|
177
246
|
}
|
|
247
|
+
|
|
178
248
|
return {
|
|
179
|
-
output:
|
|
249
|
+
output: outputLines.join('\n').trim(),
|
|
180
250
|
references,
|
|
181
251
|
};
|
|
182
252
|
}
|
|
@@ -1,12 +1,14 @@
|
|
|
1
|
-
/* eslint-disable no-console */
|
|
2
1
|
import axios from 'axios';
|
|
3
2
|
import type * as t from './types';
|
|
3
|
+
import { createDefaultLogger } from './utils';
|
|
4
4
|
|
|
5
5
|
export abstract class BaseReranker {
|
|
6
6
|
protected apiKey: string | undefined;
|
|
7
|
+
protected logger: t.Logger;
|
|
7
8
|
|
|
8
|
-
constructor() {
|
|
9
|
+
constructor(logger?: t.Logger) {
|
|
9
10
|
// Each specific reranker will set its API key
|
|
11
|
+
this.logger = logger || createDefaultLogger();
|
|
10
12
|
}
|
|
11
13
|
|
|
12
14
|
abstract rerank(
|
|
@@ -25,16 +27,22 @@ export abstract class BaseReranker {
|
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
protected logDocumentSamples(documents: string[]): void {
|
|
28
|
-
|
|
30
|
+
this.logger.debug('Sample documents being sent to API:');
|
|
29
31
|
for (let i = 0; i < Math.min(3, documents.length); i++) {
|
|
30
|
-
|
|
32
|
+
this.logger.debug(`Document ${i}: ${documents[i].substring(0, 100)}...`);
|
|
31
33
|
}
|
|
32
34
|
}
|
|
33
35
|
}
|
|
34
36
|
|
|
35
37
|
export class JinaReranker extends BaseReranker {
|
|
36
|
-
constructor({
|
|
37
|
-
|
|
38
|
+
constructor({
|
|
39
|
+
apiKey = process.env.JINA_API_KEY,
|
|
40
|
+
logger,
|
|
41
|
+
}: {
|
|
42
|
+
apiKey?: string;
|
|
43
|
+
logger?: t.Logger;
|
|
44
|
+
}) {
|
|
45
|
+
super(logger);
|
|
38
46
|
this.apiKey = apiKey;
|
|
39
47
|
}
|
|
40
48
|
|
|
@@ -43,11 +51,11 @@ export class JinaReranker extends BaseReranker {
|
|
|
43
51
|
documents: string[],
|
|
44
52
|
topK: number = 5
|
|
45
53
|
): Promise<t.Highlight[]> {
|
|
46
|
-
|
|
54
|
+
this.logger.debug(`Reranking ${documents.length} documents with Jina`);
|
|
47
55
|
|
|
48
56
|
try {
|
|
49
57
|
if (this.apiKey == null || this.apiKey === '') {
|
|
50
|
-
|
|
58
|
+
this.logger.warn('JINA_API_KEY is not set. Using default ranking.');
|
|
51
59
|
return this.getDefaultRanking(documents, topK);
|
|
52
60
|
}
|
|
53
61
|
|
|
@@ -73,14 +81,14 @@ export class JinaReranker extends BaseReranker {
|
|
|
73
81
|
);
|
|
74
82
|
|
|
75
83
|
// Log the response data structure
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
84
|
+
this.logger.debug('Jina API response structure:');
|
|
85
|
+
this.logger.debug('Model:', response.data?.model);
|
|
86
|
+
this.logger.debug('Usage:', response.data?.usage);
|
|
87
|
+
this.logger.debug('Results count:', response.data?.results.length);
|
|
80
88
|
|
|
81
89
|
// Log a sample of the results
|
|
82
90
|
if ((response.data?.results.length ?? 0) > 0) {
|
|
83
|
-
|
|
91
|
+
this.logger.debug(
|
|
84
92
|
'Sample result:',
|
|
85
93
|
JSON.stringify(response.data?.results[0], null, 2)
|
|
86
94
|
);
|
|
@@ -108,13 +116,13 @@ export class JinaReranker extends BaseReranker {
|
|
|
108
116
|
return { text, score };
|
|
109
117
|
});
|
|
110
118
|
} else {
|
|
111
|
-
|
|
119
|
+
this.logger.warn(
|
|
112
120
|
'Unexpected response format from Jina API. Using default ranking.'
|
|
113
121
|
);
|
|
114
122
|
return this.getDefaultRanking(documents, topK);
|
|
115
123
|
}
|
|
116
124
|
} catch (error) {
|
|
117
|
-
|
|
125
|
+
this.logger.error('Error using Jina reranker:', error);
|
|
118
126
|
// Fallback to default ranking on error
|
|
119
127
|
return this.getDefaultRanking(documents, topK);
|
|
120
128
|
}
|
|
@@ -122,8 +130,14 @@ export class JinaReranker extends BaseReranker {
|
|
|
122
130
|
}
|
|
123
131
|
|
|
124
132
|
export class CohereReranker extends BaseReranker {
|
|
125
|
-
constructor({
|
|
126
|
-
|
|
133
|
+
constructor({
|
|
134
|
+
apiKey = process.env.COHERE_API_KEY,
|
|
135
|
+
logger,
|
|
136
|
+
}: {
|
|
137
|
+
apiKey?: string;
|
|
138
|
+
logger?: t.Logger;
|
|
139
|
+
}) {
|
|
140
|
+
super(logger);
|
|
127
141
|
this.apiKey = apiKey;
|
|
128
142
|
}
|
|
129
143
|
|
|
@@ -132,11 +146,11 @@ export class CohereReranker extends BaseReranker {
|
|
|
132
146
|
documents: string[],
|
|
133
147
|
topK: number = 5
|
|
134
148
|
): Promise<t.Highlight[]> {
|
|
135
|
-
|
|
149
|
+
this.logger.debug(`Reranking ${documents.length} documents with Cohere`);
|
|
136
150
|
|
|
137
151
|
try {
|
|
138
152
|
if (this.apiKey == null || this.apiKey === '') {
|
|
139
|
-
|
|
153
|
+
this.logger.warn('COHERE_API_KEY is not set. Using default ranking.');
|
|
140
154
|
return this.getDefaultRanking(documents, topK);
|
|
141
155
|
}
|
|
142
156
|
|
|
@@ -161,14 +175,14 @@ export class CohereReranker extends BaseReranker {
|
|
|
161
175
|
);
|
|
162
176
|
|
|
163
177
|
// Log the response data structure
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
178
|
+
this.logger.debug('Cohere API response structure:');
|
|
179
|
+
this.logger.debug('ID:', response.data?.id);
|
|
180
|
+
this.logger.debug('Meta:', response.data?.meta);
|
|
181
|
+
this.logger.debug('Results count:', response.data?.results.length);
|
|
168
182
|
|
|
169
183
|
// Log a sample of the results
|
|
170
184
|
if ((response.data?.results.length ?? 0) > 0) {
|
|
171
|
-
|
|
185
|
+
this.logger.debug(
|
|
172
186
|
'Sample result:',
|
|
173
187
|
JSON.stringify(response.data?.results[0], null, 2)
|
|
174
188
|
);
|
|
@@ -182,13 +196,13 @@ export class CohereReranker extends BaseReranker {
|
|
|
182
196
|
return { text, score };
|
|
183
197
|
});
|
|
184
198
|
} else {
|
|
185
|
-
|
|
199
|
+
this.logger.warn(
|
|
186
200
|
'Unexpected response format from Cohere API. Using default ranking.'
|
|
187
201
|
);
|
|
188
202
|
return this.getDefaultRanking(documents, topK);
|
|
189
203
|
}
|
|
190
204
|
} catch (error) {
|
|
191
|
-
|
|
205
|
+
this.logger.error('Error using Cohere reranker:', error);
|
|
192
206
|
// Fallback to default ranking on error
|
|
193
207
|
return this.getDefaultRanking(documents, topK);
|
|
194
208
|
}
|
|
@@ -196,8 +210,8 @@ export class CohereReranker extends BaseReranker {
|
|
|
196
210
|
}
|
|
197
211
|
|
|
198
212
|
export class InfinityReranker extends BaseReranker {
|
|
199
|
-
constructor() {
|
|
200
|
-
super();
|
|
213
|
+
constructor(logger?: t.Logger) {
|
|
214
|
+
super(logger);
|
|
201
215
|
// No API key needed for the placeholder implementation
|
|
202
216
|
}
|
|
203
217
|
|
|
@@ -206,7 +220,7 @@ export class InfinityReranker extends BaseReranker {
|
|
|
206
220
|
documents: string[],
|
|
207
221
|
topK: number = 5
|
|
208
222
|
): Promise<t.Highlight[]> {
|
|
209
|
-
|
|
223
|
+
this.logger.debug(
|
|
210
224
|
`Reranking ${documents.length} documents with Infinity (placeholder)`
|
|
211
225
|
);
|
|
212
226
|
// This would be replaced with actual Infinity reranker implementation
|
|
@@ -221,24 +235,31 @@ export const createReranker = (config: {
|
|
|
221
235
|
rerankerType: t.RerankerType;
|
|
222
236
|
jinaApiKey?: string;
|
|
223
237
|
cohereApiKey?: string;
|
|
238
|
+
logger?: t.Logger;
|
|
224
239
|
}): BaseReranker | undefined => {
|
|
225
|
-
const { rerankerType, jinaApiKey, cohereApiKey } = config;
|
|
240
|
+
const { rerankerType, jinaApiKey, cohereApiKey, logger } = config;
|
|
241
|
+
|
|
242
|
+
// Create a default logger if none is provided
|
|
243
|
+
const defaultLogger = logger || createDefaultLogger();
|
|
226
244
|
|
|
227
245
|
switch (rerankerType.toLowerCase()) {
|
|
228
246
|
case 'jina':
|
|
229
|
-
return new JinaReranker({ apiKey: jinaApiKey });
|
|
247
|
+
return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });
|
|
230
248
|
case 'cohere':
|
|
231
|
-
return new CohereReranker({
|
|
249
|
+
return new CohereReranker({
|
|
250
|
+
apiKey: cohereApiKey,
|
|
251
|
+
logger: defaultLogger,
|
|
252
|
+
});
|
|
232
253
|
case 'infinity':
|
|
233
|
-
return new InfinityReranker();
|
|
254
|
+
return new InfinityReranker(defaultLogger);
|
|
234
255
|
case 'none':
|
|
235
|
-
|
|
256
|
+
defaultLogger.debug('Skipping reranking as reranker is set to "none"');
|
|
236
257
|
return undefined;
|
|
237
258
|
default:
|
|
238
|
-
|
|
259
|
+
defaultLogger.warn(
|
|
239
260
|
`Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`
|
|
240
261
|
);
|
|
241
|
-
return new JinaReranker({ apiKey: jinaApiKey });
|
|
262
|
+
return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });
|
|
242
263
|
}
|
|
243
264
|
};
|
|
244
265
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
export enum DATE_RANGE {
|
|
4
|
+
PAST_HOUR = 'h',
|
|
5
|
+
PAST_24_HOURS = 'd',
|
|
6
|
+
PAST_WEEK = 'w',
|
|
7
|
+
PAST_MONTH = 'm',
|
|
8
|
+
PAST_YEAR = 'y',
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export const DEFAULT_QUERY_DESCRIPTION = `
|
|
12
|
+
GUIDELINES:
|
|
13
|
+
- Start broad, then narrow: Begin with key concepts, then refine with specifics
|
|
14
|
+
- Think like sources: Use terminology experts would use in the field
|
|
15
|
+
- Consider perspective: Frame queries from different viewpoints for better results
|
|
16
|
+
- Quality over quantity: A precise 3-4 word query often beats lengthy sentences
|
|
17
|
+
|
|
18
|
+
TECHNIQUES (combine for power searches):
|
|
19
|
+
- EXACT PHRASES: Use quotes ("climate change report")
|
|
20
|
+
- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)
|
|
21
|
+
- SITE-SPECIFIC: Restrict to websites (site:edu research)
|
|
22
|
+
- FILETYPE: Find specific documents (filetype:pdf study)
|
|
23
|
+
- OR OPERATOR: Find alternatives (electric OR hybrid cars)
|
|
24
|
+
- DATE RANGE: Recent information (data after:2020)
|
|
25
|
+
- WILDCARDS: Use * for unknown terms (how to * bread)
|
|
26
|
+
- SPECIFIC QUESTIONS: Use who/what/when/where/why/how
|
|
27
|
+
- DOMAIN TERMS: Include technical terminology for specialized topics
|
|
28
|
+
- CONCISE TERMS: Prioritize keywords over sentences
|
|
29
|
+
`.trim();
|
|
30
|
+
|
|
31
|
+
export const DEFAULT_COUNTRY_DESCRIPTION =
|
|
32
|
+
`Country code to localize search results.
|
|
33
|
+
Use standard 2-letter country codes: "us", "uk", "ca", "de", "fr", "jp", "br", etc.
|
|
34
|
+
Provide this when the search should return results specific to a particular country.
|
|
35
|
+
Examples:
|
|
36
|
+
- "us" for United States (default)
|
|
37
|
+
- "de" for Germany
|
|
38
|
+
- "in" for India
|
|
39
|
+
`.trim();
|
|
40
|
+
|
|
41
|
+
export const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);
|
|
42
|
+
export const dateSchema = z
|
|
43
|
+
.nativeEnum(DATE_RANGE)
|
|
44
|
+
.optional()
|
|
45
|
+
.describe('Date range for search results.');
|
|
46
|
+
export const countrySchema = z
|
|
47
|
+
.string()
|
|
48
|
+
.optional()
|
|
49
|
+
.describe(DEFAULT_COUNTRY_DESCRIPTION);
|
|
50
|
+
export const imagesSchema = z
|
|
51
|
+
.boolean()
|
|
52
|
+
.optional()
|
|
53
|
+
.describe('Whether to also run an image search.');
|
|
54
|
+
|
|
55
|
+
export const videosSchema = z
|
|
56
|
+
.boolean()
|
|
57
|
+
.optional()
|
|
58
|
+
.describe('Whether to also run a video search.');
|
|
59
|
+
|
|
60
|
+
export const newsSchema = z
|
|
61
|
+
.boolean()
|
|
62
|
+
.optional()
|
|
63
|
+
.describe('Whether to also run a news search.');
|