@librechat/agents 3.0.0-rc9 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +23 -2
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/graphs/MultiAgentGraph.cjs +5 -5
- package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
- package/dist/cjs/instrumentation.cjs +21 -0
- package/dist/cjs/instrumentation.cjs.map +1 -0
- package/dist/cjs/llm/anthropic/index.cjs +21 -2
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/google/index.cjs +3 -0
- package/dist/cjs/llm/google/index.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +13 -0
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/ollama/index.cjs +3 -0
- package/dist/cjs/llm/ollama/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +18 -3
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +6 -1
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +5 -1
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +1 -1
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +8 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +49 -0
- package/dist/cjs/messages/cache.cjs.map +1 -0
- package/dist/cjs/messages/content.cjs +53 -0
- package/dist/cjs/messages/content.cjs.map +1 -0
- package/dist/cjs/messages/core.cjs +5 -1
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +50 -59
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +28 -0
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +57 -5
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +7 -0
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +2 -0
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/search/firecrawl.cjs +3 -1
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +8 -6
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +5 -5
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/serper-scraper.cjs +132 -0
- package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +46 -9
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/utils/handlers.cjs +70 -0
- package/dist/cjs/utils/handlers.cjs.map +1 -0
- package/dist/cjs/utils/misc.cjs +8 -1
- package/dist/cjs/utils/misc.cjs.map +1 -1
- package/dist/cjs/utils/title.cjs +54 -25
- package/dist/cjs/utils/title.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +23 -2
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/graphs/MultiAgentGraph.mjs +5 -5
- package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
- package/dist/esm/instrumentation.mjs +19 -0
- package/dist/esm/instrumentation.mjs.map +1 -0
- package/dist/esm/llm/anthropic/index.mjs +21 -2
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/google/index.mjs +3 -0
- package/dist/esm/llm/google/index.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +13 -0
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/ollama/index.mjs +3 -0
- package/dist/esm/llm/ollama/index.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +18 -3
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +6 -1
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +5 -1
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +1 -1
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +5 -2
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +47 -0
- package/dist/esm/messages/cache.mjs.map +1 -0
- package/dist/esm/messages/content.mjs +51 -0
- package/dist/esm/messages/content.mjs.map +1 -0
- package/dist/esm/messages/core.mjs +5 -1
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +50 -58
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +28 -0
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +57 -5
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +7 -0
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +2 -0
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +3 -1
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +8 -6
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +5 -5
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/serper-scraper.mjs +129 -0
- package/dist/esm/tools/search/serper-scraper.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +46 -9
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/utils/handlers.mjs +68 -0
- package/dist/esm/utils/handlers.mjs.map +1 -0
- package/dist/esm/utils/misc.mjs +8 -2
- package/dist/esm/utils/misc.mjs.map +1 -1
- package/dist/esm/utils/title.mjs +54 -25
- package/dist/esm/utils/title.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/instrumentation.d.ts +1 -0
- package/dist/types/llm/anthropic/index.d.ts +3 -0
- package/dist/types/llm/google/index.d.ts +1 -0
- package/dist/types/llm/ollama/index.d.ts +1 -0
- package/dist/types/llm/openai/index.d.ts +4 -0
- package/dist/types/llm/openrouter/index.d.ts +4 -2
- package/dist/types/llm/vertexai/index.d.ts +1 -1
- package/dist/types/messages/cache.d.ts +8 -0
- package/dist/types/messages/content.d.ts +7 -0
- package/dist/types/messages/format.d.ts +22 -25
- package/dist/types/messages/index.d.ts +2 -0
- package/dist/types/run.d.ts +2 -1
- package/dist/types/tools/search/firecrawl.d.ts +2 -1
- package/dist/types/tools/search/rerankers.d.ts +4 -1
- package/dist/types/tools/search/search.d.ts +1 -2
- package/dist/types/tools/search/serper-scraper.d.ts +59 -0
- package/dist/types/tools/search/tool.d.ts +25 -4
- package/dist/types/tools/search/types.d.ts +31 -1
- package/dist/types/types/graph.d.ts +3 -1
- package/dist/types/types/messages.d.ts +4 -0
- package/dist/types/utils/handlers.d.ts +34 -0
- package/dist/types/utils/index.d.ts +1 -0
- package/dist/types/utils/misc.d.ts +1 -0
- package/package.json +7 -3
- package/src/agents/AgentContext.ts +8 -0
- package/src/graphs/Graph.ts +31 -2
- package/src/graphs/MultiAgentGraph.ts +5 -5
- package/src/instrumentation.ts +22 -0
- package/src/llm/anthropic/index.ts +23 -2
- package/src/llm/anthropic/llm.spec.ts +1 -1
- package/src/llm/google/index.ts +4 -0
- package/src/llm/google/utils/common.ts +14 -0
- package/src/llm/ollama/index.ts +3 -0
- package/src/llm/openai/index.ts +17 -4
- package/src/llm/openai/utils/index.ts +7 -1
- package/src/llm/openrouter/index.ts +15 -6
- package/src/llm/vertexai/index.ts +2 -2
- package/src/messages/cache.test.ts +262 -0
- package/src/messages/cache.ts +56 -0
- package/src/messages/content.test.ts +362 -0
- package/src/messages/content.ts +63 -0
- package/src/messages/core.ts +5 -2
- package/src/messages/format.ts +65 -71
- package/src/messages/formatMessage.test.ts +418 -2
- package/src/messages/index.ts +2 -0
- package/src/messages/prune.ts +51 -0
- package/src/run.ts +82 -10
- package/src/scripts/ant_web_search.ts +1 -1
- package/src/scripts/handoff-test.ts +1 -1
- package/src/scripts/multi-agent-chain.ts +4 -4
- package/src/scripts/multi-agent-conditional.ts +4 -4
- package/src/scripts/multi-agent-document-review-chain.ts +4 -4
- package/src/scripts/multi-agent-parallel.ts +10 -8
- package/src/scripts/multi-agent-sequence.ts +3 -3
- package/src/scripts/multi-agent-supervisor.ts +5 -3
- package/src/scripts/multi-agent-test.ts +2 -2
- package/src/scripts/search.ts +5 -1
- package/src/scripts/simple.ts +8 -0
- package/src/scripts/test-custom-prompt-key.ts +4 -4
- package/src/scripts/test-handoff-input.ts +3 -3
- package/src/scripts/test-multi-agent-list-handoff.ts +2 -2
- package/src/scripts/tools.ts +4 -1
- package/src/specs/agent-handoffs.test.ts +889 -0
- package/src/stream.ts +9 -2
- package/src/tools/search/firecrawl.ts +5 -2
- package/src/tools/search/jina-reranker.test.ts +126 -0
- package/src/tools/search/rerankers.ts +11 -5
- package/src/tools/search/search.ts +6 -8
- package/src/tools/search/serper-scraper.ts +155 -0
- package/src/tools/search/tool.ts +49 -8
- package/src/tools/search/types.ts +46 -0
- package/src/types/graph.ts +6 -1
- package/src/types/messages.ts +4 -0
- package/src/utils/handlers.ts +107 -0
- package/src/utils/index.ts +2 -1
- package/src/utils/llmConfig.ts +35 -1
- package/src/utils/misc.ts +33 -21
- package/src/utils/title.ts +80 -40
package/src/stream.ts
CHANGED
|
@@ -398,9 +398,13 @@ export function createContentAggregator(): t.ContentAggregatorResult {
|
|
|
398
398
|
|
|
399
399
|
const updateContent = (
|
|
400
400
|
index: number,
|
|
401
|
-
contentPart
|
|
401
|
+
contentPart?: t.MessageContentComplex,
|
|
402
402
|
finalUpdate = false
|
|
403
403
|
): void => {
|
|
404
|
+
if (!contentPart) {
|
|
405
|
+
console.warn('No content part found in \'updateContent\'');
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
404
408
|
const partType = contentPart.type ?? '';
|
|
405
409
|
if (!partType) {
|
|
406
410
|
console.warn('No content type found in content part');
|
|
@@ -578,7 +582,10 @@ export function createContentAggregator(): t.ContentAggregatorResult {
|
|
|
578
582
|
event === GraphEvents.ON_AGENT_UPDATE &&
|
|
579
583
|
(data as t.AgentUpdate | undefined)?.agent_update
|
|
580
584
|
) {
|
|
581
|
-
const contentPart = data as t.AgentUpdate;
|
|
585
|
+
const contentPart = data as t.AgentUpdate | undefined;
|
|
586
|
+
if (!contentPart) {
|
|
587
|
+
return;
|
|
588
|
+
}
|
|
582
589
|
updateContent(contentPart.agent_update.index, contentPart);
|
|
583
590
|
} else if (event === GraphEvents.ON_REASONING_DELTA) {
|
|
584
591
|
const reasoningDelta = data as t.ReasoningDeltaEvent;
|
|
@@ -7,9 +7,10 @@ import { createDefaultLogger } from './utils';
|
|
|
7
7
|
* Firecrawl scraper implementation
|
|
8
8
|
* Uses the Firecrawl API to scrape web pages
|
|
9
9
|
*/
|
|
10
|
-
export class FirecrawlScraper {
|
|
10
|
+
export class FirecrawlScraper implements t.BaseScraper {
|
|
11
11
|
private apiKey: string;
|
|
12
12
|
private apiUrl: string;
|
|
13
|
+
private version: string;
|
|
13
14
|
private defaultFormats: string[];
|
|
14
15
|
private timeout: number;
|
|
15
16
|
private logger: t.Logger;
|
|
@@ -32,11 +33,13 @@ export class FirecrawlScraper {
|
|
|
32
33
|
constructor(config: t.FirecrawlScraperConfig = {}) {
|
|
33
34
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
34
35
|
|
|
36
|
+
this.version = config.version ?? 'v2';
|
|
37
|
+
|
|
35
38
|
const baseUrl =
|
|
36
39
|
config.apiUrl ??
|
|
37
40
|
process.env.FIRECRAWL_BASE_URL ??
|
|
38
41
|
'https://api.firecrawl.dev';
|
|
39
|
-
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/
|
|
42
|
+
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
|
|
40
43
|
|
|
41
44
|
this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
|
|
42
45
|
this.timeout = config.timeout ?? 7500;
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { JinaReranker } from './rerankers';
|
|
2
|
+
import { createDefaultLogger } from './utils';
|
|
3
|
+
|
|
4
|
+
describe('JinaReranker', () => {
|
|
5
|
+
const mockLogger = createDefaultLogger();
|
|
6
|
+
|
|
7
|
+
describe('constructor', () => {
|
|
8
|
+
it('should use default API URL when no apiUrl is provided', () => {
|
|
9
|
+
const reranker = new JinaReranker({
|
|
10
|
+
apiKey: 'test-key',
|
|
11
|
+
logger: mockLogger,
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
// Access private property for testing
|
|
15
|
+
const apiUrl = (reranker as any).apiUrl;
|
|
16
|
+
expect(apiUrl).toBe('https://api.jina.ai/v1/rerank');
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('should use custom API URL when provided', () => {
|
|
20
|
+
const customUrl = 'https://custom-jina-endpoint.com/v1/rerank';
|
|
21
|
+
const reranker = new JinaReranker({
|
|
22
|
+
apiKey: 'test-key',
|
|
23
|
+
apiUrl: customUrl,
|
|
24
|
+
logger: mockLogger,
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
const apiUrl = (reranker as any).apiUrl;
|
|
28
|
+
expect(apiUrl).toBe(customUrl);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('should use environment variable JINA_API_URL when available', () => {
|
|
32
|
+
const originalEnv = process.env.JINA_API_URL;
|
|
33
|
+
process.env.JINA_API_URL = 'https://env-jina-endpoint.com/v1/rerank';
|
|
34
|
+
|
|
35
|
+
const reranker = new JinaReranker({
|
|
36
|
+
apiKey: 'test-key',
|
|
37
|
+
logger: mockLogger,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
const apiUrl = (reranker as any).apiUrl;
|
|
41
|
+
expect(apiUrl).toBe('https://env-jina-endpoint.com/v1/rerank');
|
|
42
|
+
|
|
43
|
+
// Restore original environment
|
|
44
|
+
if (originalEnv) {
|
|
45
|
+
process.env.JINA_API_URL = originalEnv;
|
|
46
|
+
} else {
|
|
47
|
+
delete process.env.JINA_API_URL;
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('should prioritize explicit apiUrl over environment variable', () => {
|
|
52
|
+
const originalEnv = process.env.JINA_API_URL;
|
|
53
|
+
process.env.JINA_API_URL = 'https://env-jina-endpoint.com/v1/rerank';
|
|
54
|
+
|
|
55
|
+
const customUrl = 'https://explicit-jina-endpoint.com/v1/rerank';
|
|
56
|
+
const reranker = new JinaReranker({
|
|
57
|
+
apiKey: 'test-key',
|
|
58
|
+
apiUrl: customUrl,
|
|
59
|
+
logger: mockLogger,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const apiUrl = (reranker as any).apiUrl;
|
|
63
|
+
expect(apiUrl).toBe(customUrl);
|
|
64
|
+
|
|
65
|
+
// Restore original environment
|
|
66
|
+
if (originalEnv) {
|
|
67
|
+
process.env.JINA_API_URL = originalEnv;
|
|
68
|
+
} else {
|
|
69
|
+
delete process.env.JINA_API_URL;
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('rerank method', () => {
|
|
75
|
+
it('should log the API URL being used', async () => {
|
|
76
|
+
const customUrl = 'https://test-jina-endpoint.com/v1/rerank';
|
|
77
|
+
const reranker = new JinaReranker({
|
|
78
|
+
apiKey: 'test-key',
|
|
79
|
+
apiUrl: customUrl,
|
|
80
|
+
logger: mockLogger,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
const logSpy = jest.spyOn(mockLogger, 'debug');
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
await reranker.rerank('test query', ['document1', 'document2'], 2);
|
|
87
|
+
} catch (error) {
|
|
88
|
+
// Expected to fail due to missing API key, but we can check the log
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
expect(logSpy).toHaveBeenCalledWith(
|
|
92
|
+
expect.stringContaining(`Reranking 2 chunks with Jina using API URL: ${customUrl}`)
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
logSpy.mockRestore();
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
describe('createReranker', () => {
|
|
101
|
+
const { createReranker } = require('./rerankers');
|
|
102
|
+
|
|
103
|
+
it('should create JinaReranker with jinaApiUrl when provided', () => {
|
|
104
|
+
const customUrl = 'https://custom-jina-endpoint.com/v1/rerank';
|
|
105
|
+
const reranker = createReranker({
|
|
106
|
+
rerankerType: 'jina',
|
|
107
|
+
jinaApiKey: 'test-key',
|
|
108
|
+
jinaApiUrl: customUrl,
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
expect(reranker).toBeInstanceOf(JinaReranker);
|
|
112
|
+
const apiUrl = (reranker as any).apiUrl;
|
|
113
|
+
expect(apiUrl).toBe(customUrl);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('should create JinaReranker with default URL when jinaApiUrl is not provided', () => {
|
|
117
|
+
const reranker = createReranker({
|
|
118
|
+
rerankerType: 'jina',
|
|
119
|
+
jinaApiKey: 'test-key',
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
expect(reranker).toBeInstanceOf(JinaReranker);
|
|
123
|
+
const apiUrl = (reranker as any).apiUrl;
|
|
124
|
+
expect(apiUrl).toBe('https://api.jina.ai/v1/rerank');
|
|
125
|
+
});
|
|
126
|
+
});
|
|
@@ -28,15 +28,20 @@ export abstract class BaseReranker {
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
export class JinaReranker extends BaseReranker {
|
|
31
|
+
private apiUrl: string;
|
|
32
|
+
|
|
31
33
|
constructor({
|
|
32
34
|
apiKey = process.env.JINA_API_KEY,
|
|
35
|
+
apiUrl = process.env.JINA_API_URL || 'https://api.jina.ai/v1/rerank',
|
|
33
36
|
logger,
|
|
34
37
|
}: {
|
|
35
38
|
apiKey?: string;
|
|
39
|
+
apiUrl?: string;
|
|
36
40
|
logger?: t.Logger;
|
|
37
41
|
}) {
|
|
38
42
|
super(logger);
|
|
39
43
|
this.apiKey = apiKey;
|
|
44
|
+
this.apiUrl = apiUrl;
|
|
40
45
|
}
|
|
41
46
|
|
|
42
47
|
async rerank(
|
|
@@ -44,7 +49,7 @@ export class JinaReranker extends BaseReranker {
|
|
|
44
49
|
documents: string[],
|
|
45
50
|
topK: number = 5
|
|
46
51
|
): Promise<t.Highlight[]> {
|
|
47
|
-
this.logger.debug(`Reranking ${documents.length} chunks with Jina`);
|
|
52
|
+
this.logger.debug(`Reranking ${documents.length} chunks with Jina using API URL: ${this.apiUrl}`);
|
|
48
53
|
|
|
49
54
|
try {
|
|
50
55
|
if (this.apiKey == null || this.apiKey === '') {
|
|
@@ -61,7 +66,7 @@ export class JinaReranker extends BaseReranker {
|
|
|
61
66
|
};
|
|
62
67
|
|
|
63
68
|
const response = await axios.post<t.JinaRerankerResponse | undefined>(
|
|
64
|
-
|
|
69
|
+
this.apiUrl,
|
|
65
70
|
requestData,
|
|
66
71
|
{
|
|
67
72
|
headers: {
|
|
@@ -201,17 +206,18 @@ export class InfinityReranker extends BaseReranker {
|
|
|
201
206
|
export const createReranker = (config: {
|
|
202
207
|
rerankerType: t.RerankerType;
|
|
203
208
|
jinaApiKey?: string;
|
|
209
|
+
jinaApiUrl?: string;
|
|
204
210
|
cohereApiKey?: string;
|
|
205
211
|
logger?: t.Logger;
|
|
206
212
|
}): BaseReranker | undefined => {
|
|
207
|
-
const { rerankerType, jinaApiKey, cohereApiKey, logger } = config;
|
|
213
|
+
const { rerankerType, jinaApiKey, jinaApiUrl, cohereApiKey, logger } = config;
|
|
208
214
|
|
|
209
215
|
// Create a default logger if none is provided
|
|
210
216
|
const defaultLogger = logger || createDefaultLogger();
|
|
211
217
|
|
|
212
218
|
switch (rerankerType.toLowerCase()) {
|
|
213
219
|
case 'jina':
|
|
214
|
-
return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });
|
|
220
|
+
return new JinaReranker({ apiKey: jinaApiKey, apiUrl: jinaApiUrl, logger: defaultLogger });
|
|
215
221
|
case 'cohere':
|
|
216
222
|
return new CohereReranker({
|
|
217
223
|
apiKey: cohereApiKey,
|
|
@@ -226,7 +232,7 @@ export const createReranker = (config: {
|
|
|
226
232
|
defaultLogger.warn(
|
|
227
233
|
`Unknown reranker type: ${rerankerType}. Defaulting to InfinityReranker.`
|
|
228
234
|
);
|
|
229
|
-
return new JinaReranker({ apiKey: jinaApiKey, logger: defaultLogger });
|
|
235
|
+
return new JinaReranker({ apiKey: jinaApiKey, apiUrl: jinaApiUrl, logger: defaultLogger });
|
|
230
236
|
}
|
|
231
237
|
};
|
|
232
238
|
|
|
@@ -2,7 +2,6 @@ import axios from 'axios';
|
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
3
|
import type * as t from './types';
|
|
4
4
|
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
|
-
import { FirecrawlScraper } from './firecrawl';
|
|
6
5
|
import { BaseReranker } from './rerankers';
|
|
7
6
|
|
|
8
7
|
const chunker = {
|
|
@@ -434,7 +433,7 @@ export const createSearchAPI = (
|
|
|
434
433
|
|
|
435
434
|
export const createSourceProcessor = (
|
|
436
435
|
config: t.ProcessSourcesConfig = {},
|
|
437
|
-
scraperInstance?:
|
|
436
|
+
scraperInstance?: t.BaseScraper
|
|
438
437
|
): {
|
|
439
438
|
processSources: (
|
|
440
439
|
fields: t.ProcessSourcesFields
|
|
@@ -442,7 +441,7 @@ export const createSourceProcessor = (
|
|
|
442
441
|
topResults: number;
|
|
443
442
|
} => {
|
|
444
443
|
if (!scraperInstance) {
|
|
445
|
-
throw new Error('
|
|
444
|
+
throw new Error('Scraper instance is required');
|
|
446
445
|
}
|
|
447
446
|
const {
|
|
448
447
|
topResults = 5,
|
|
@@ -453,7 +452,7 @@ export const createSourceProcessor = (
|
|
|
453
452
|
} = config;
|
|
454
453
|
|
|
455
454
|
const logger_ = logger || createDefaultLogger();
|
|
456
|
-
const
|
|
455
|
+
const scraper = scraperInstance;
|
|
457
456
|
|
|
458
457
|
const webScraper = {
|
|
459
458
|
scrapeMany: async ({
|
|
@@ -465,12 +464,12 @@ export const createSourceProcessor = (
|
|
|
465
464
|
links: string[];
|
|
466
465
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
467
466
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
468
|
-
logger_.debug(`Scraping ${links.length} links
|
|
467
|
+
logger_.debug(`Scraping ${links.length} links`);
|
|
469
468
|
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
470
469
|
try {
|
|
471
470
|
for (let i = 0; i < links.length; i++) {
|
|
472
471
|
const currentLink = links[i];
|
|
473
|
-
const promise: Promise<t.ScrapeResult> =
|
|
472
|
+
const promise: Promise<t.ScrapeResult> = scraper
|
|
474
473
|
.scrapeUrl(currentLink, {})
|
|
475
474
|
.then(([url, response]) => {
|
|
476
475
|
const attribution = getAttribution(
|
|
@@ -479,8 +478,7 @@ export const createSourceProcessor = (
|
|
|
479
478
|
logger_
|
|
480
479
|
);
|
|
481
480
|
if (response.success && response.data) {
|
|
482
|
-
const [content, references] =
|
|
483
|
-
firecrawlScraper.extractContent(response);
|
|
481
|
+
const [content, references] = scraper.extractContent(response);
|
|
484
482
|
return {
|
|
485
483
|
url,
|
|
486
484
|
references,
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import type * as t from './types';
|
|
3
|
+
import { createDefaultLogger } from './utils';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Serper scraper implementation
|
|
7
|
+
* Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
|
|
8
|
+
*
|
|
9
|
+
* Features:
|
|
10
|
+
* - Simple API with single endpoint
|
|
11
|
+
* - Returns both text and markdown content
|
|
12
|
+
* - Includes metadata from scraped pages
|
|
13
|
+
* - Credits-based pricing model
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const scraper = createSerperScraper({
|
|
18
|
+
* apiKey: 'your-serper-api-key',
|
|
19
|
+
* includeMarkdown: true,
|
|
20
|
+
* timeout: 10000
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* const [url, response] = await scraper.scrapeUrl('https://example.com');
|
|
24
|
+
* if (response.success) {
|
|
25
|
+
* const [content] = scraper.extractContent(response);
|
|
26
|
+
* console.log(content);
|
|
27
|
+
* }
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export class SerperScraper implements t.BaseScraper {
|
|
31
|
+
private apiKey: string;
|
|
32
|
+
private apiUrl: string;
|
|
33
|
+
private timeout: number;
|
|
34
|
+
private logger: t.Logger;
|
|
35
|
+
private includeMarkdown: boolean;
|
|
36
|
+
|
|
37
|
+
constructor(config: t.SerperScraperConfig = {}) {
|
|
38
|
+
this.apiKey = config.apiKey ?? process.env.SERPER_API_KEY ?? '';
|
|
39
|
+
|
|
40
|
+
this.apiUrl =
|
|
41
|
+
config.apiUrl ??
|
|
42
|
+
process.env.SERPER_SCRAPE_URL ??
|
|
43
|
+
'https://scrape.serper.dev';
|
|
44
|
+
|
|
45
|
+
this.timeout = config.timeout ?? 7500;
|
|
46
|
+
this.includeMarkdown = config.includeMarkdown ?? true;
|
|
47
|
+
|
|
48
|
+
this.logger = config.logger || createDefaultLogger();
|
|
49
|
+
|
|
50
|
+
if (!this.apiKey) {
|
|
51
|
+
this.logger.warn('SERPER_API_KEY is not set. Scraping will not work.');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
this.logger.debug(
|
|
55
|
+
`Serper scraper initialized with API URL: ${this.apiUrl}`
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Scrape a single URL
|
|
61
|
+
* @param url URL to scrape
|
|
62
|
+
* @param options Scrape options
|
|
63
|
+
* @returns Scrape response
|
|
64
|
+
*/
|
|
65
|
+
async scrapeUrl(
|
|
66
|
+
url: string,
|
|
67
|
+
options: t.SerperScrapeOptions = {}
|
|
68
|
+
): Promise<[string, t.SerperScrapeResponse]> {
|
|
69
|
+
if (!this.apiKey) {
|
|
70
|
+
return [
|
|
71
|
+
url,
|
|
72
|
+
{
|
|
73
|
+
success: false,
|
|
74
|
+
error: 'SERPER_API_KEY is not set',
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
const payload = {
|
|
81
|
+
url,
|
|
82
|
+
includeMarkdown: options.includeMarkdown ?? this.includeMarkdown,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const response = await axios.post(this.apiUrl, payload, {
|
|
86
|
+
headers: {
|
|
87
|
+
'X-API-KEY': this.apiKey,
|
|
88
|
+
'Content-Type': 'application/json',
|
|
89
|
+
},
|
|
90
|
+
timeout: options.timeout ?? this.timeout,
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
return [url, { success: true, data: response.data }];
|
|
94
|
+
} catch (error) {
|
|
95
|
+
const errorMessage =
|
|
96
|
+
error instanceof Error ? error.message : String(error);
|
|
97
|
+
return [
|
|
98
|
+
url,
|
|
99
|
+
{
|
|
100
|
+
success: false,
|
|
101
|
+
error: `Serper Scrape API request failed: ${errorMessage}`,
|
|
102
|
+
},
|
|
103
|
+
];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Extract content from scrape response
|
|
109
|
+
* @param response Scrape response
|
|
110
|
+
* @returns Extracted content or empty string if not available
|
|
111
|
+
*/
|
|
112
|
+
extractContent(
|
|
113
|
+
response: t.SerperScrapeResponse
|
|
114
|
+
): [string, undefined | t.References] {
|
|
115
|
+
if (!response.success || !response.data) {
|
|
116
|
+
return ['', undefined];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (response.data.markdown != null) {
|
|
120
|
+
return [response.data.markdown, undefined];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (response.data.text != null) {
|
|
124
|
+
return [response.data.text, undefined];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return ['', undefined];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Extract metadata from scrape response
|
|
132
|
+
* @param response Scrape response
|
|
133
|
+
* @returns Metadata object
|
|
134
|
+
*/
|
|
135
|
+
extractMetadata(
|
|
136
|
+
response: t.SerperScrapeResponse
|
|
137
|
+
): Record<string, string | number | boolean | null | undefined> {
|
|
138
|
+
if (!response.success || !response.data || !response.data.metadata) {
|
|
139
|
+
return {};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return response.data.metadata;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Create a Serper scraper instance
|
|
148
|
+
* @param config Scraper configuration
|
|
149
|
+
* @returns Serper scraper instance
|
|
150
|
+
*/
|
|
151
|
+
export const createSerperScraper = (
|
|
152
|
+
config: t.SerperScraperConfig = {}
|
|
153
|
+
): SerperScraper => {
|
|
154
|
+
return new SerperScraper(config);
|
|
155
|
+
};
|
package/src/tools/search/tool.ts
CHANGED
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
newsSchema,
|
|
13
13
|
} from './schema';
|
|
14
14
|
import { createSearchAPI, createSourceProcessor } from './search';
|
|
15
|
+
import { createSerperScraper } from './serper-scraper';
|
|
15
16
|
import { createFirecrawlScraper } from './firecrawl';
|
|
16
17
|
import { expandHighlights } from './highlights';
|
|
17
18
|
import { formatResultsForLLM } from './format';
|
|
@@ -328,6 +329,27 @@ Use anchor marker(s) immediately after the statement:
|
|
|
328
329
|
* Creates a search tool with a schema that dynamically includes the country field
|
|
329
330
|
* only when the searchProvider is 'serper'.
|
|
330
331
|
*
|
|
332
|
+
* Supports multiple scraper providers:
|
|
333
|
+
* - Firecrawl (default): Full-featured web scraping with multiple formats
|
|
334
|
+
* - Serper: Lightweight scraping using Serper's scrape API
|
|
335
|
+
*
|
|
336
|
+
* @example
|
|
337
|
+
* ```typescript
|
|
338
|
+
* // Using Firecrawl scraper (default)
|
|
339
|
+
* const searchTool = createSearchTool({
|
|
340
|
+
* searchProvider: 'serper',
|
|
341
|
+
* scraperProvider: 'firecrawl',
|
|
342
|
+
* firecrawlApiKey: 'your-firecrawl-key'
|
|
343
|
+
* });
|
|
344
|
+
*
|
|
345
|
+
* // Using Serper scraper
|
|
346
|
+
* const searchTool = createSearchTool({
|
|
347
|
+
* searchProvider: 'serper',
|
|
348
|
+
* scraperProvider: 'serper',
|
|
349
|
+
* serperApiKey: 'your-serper-key'
|
|
350
|
+
* });
|
|
351
|
+
* ```
|
|
352
|
+
*
|
|
331
353
|
* @param config - The search tool configuration
|
|
332
354
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
333
355
|
*/
|
|
@@ -344,11 +366,15 @@ export const createSearchTool = (
|
|
|
344
366
|
strategies = ['no_extraction'],
|
|
345
367
|
filterContent = true,
|
|
346
368
|
safeSearch = 1,
|
|
369
|
+
scraperProvider = 'firecrawl',
|
|
347
370
|
firecrawlApiKey,
|
|
348
371
|
firecrawlApiUrl,
|
|
372
|
+
firecrawlVersion,
|
|
349
373
|
firecrawlOptions,
|
|
374
|
+
serperScraperOptions,
|
|
350
375
|
scraperTimeout,
|
|
351
376
|
jinaApiKey,
|
|
377
|
+
jinaApiUrl,
|
|
352
378
|
cohereApiKey,
|
|
353
379
|
onSearchResults: _onSearchResults,
|
|
354
380
|
onGetHighlights,
|
|
@@ -384,17 +410,32 @@ export const createSearchTool = (
|
|
|
384
410
|
searxngApiKey,
|
|
385
411
|
});
|
|
386
412
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
413
|
+
/** Create scraper based on scraperProvider */
|
|
414
|
+
let scraperInstance: t.BaseScraper;
|
|
415
|
+
|
|
416
|
+
if (scraperProvider === 'serper') {
|
|
417
|
+
scraperInstance = createSerperScraper({
|
|
418
|
+
...serperScraperOptions,
|
|
419
|
+
apiKey: serperApiKey,
|
|
420
|
+
timeout: scraperTimeout ?? serperScraperOptions?.timeout,
|
|
421
|
+
logger,
|
|
422
|
+
});
|
|
423
|
+
} else {
|
|
424
|
+
scraperInstance = createFirecrawlScraper({
|
|
425
|
+
...firecrawlOptions,
|
|
426
|
+
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
427
|
+
apiUrl: firecrawlApiUrl,
|
|
428
|
+
version: firecrawlVersion,
|
|
429
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
430
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
431
|
+
logger,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
394
434
|
|
|
395
435
|
const selectedReranker = createReranker({
|
|
396
436
|
rerankerType,
|
|
397
437
|
jinaApiKey,
|
|
438
|
+
jinaApiUrl,
|
|
398
439
|
cohereApiKey,
|
|
399
440
|
logger,
|
|
400
441
|
});
|
|
@@ -411,7 +452,7 @@ export const createSearchTool = (
|
|
|
411
452
|
filterContent,
|
|
412
453
|
logger,
|
|
413
454
|
},
|
|
414
|
-
|
|
455
|
+
scraperInstance
|
|
415
456
|
);
|
|
416
457
|
|
|
417
458
|
const search = createSearchProcessor({
|
|
@@ -5,6 +5,7 @@ import type { BaseReranker } from './rerankers';
|
|
|
5
5
|
import { DATE_RANGE } from './schema';
|
|
6
6
|
|
|
7
7
|
export type SearchProvider = 'serper' | 'searxng';
|
|
8
|
+
export type ScraperProvider = 'firecrawl' | 'serper';
|
|
8
9
|
export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
|
|
9
10
|
|
|
10
11
|
export interface Highlight {
|
|
@@ -94,9 +95,18 @@ export interface ProcessSourcesConfig {
|
|
|
94
95
|
export interface FirecrawlConfig {
|
|
95
96
|
firecrawlApiKey?: string;
|
|
96
97
|
firecrawlApiUrl?: string;
|
|
98
|
+
firecrawlVersion?: string;
|
|
97
99
|
firecrawlOptions?: FirecrawlScraperConfig;
|
|
98
100
|
}
|
|
99
101
|
|
|
102
|
+
export interface SerperScraperConfig {
|
|
103
|
+
apiKey?: string;
|
|
104
|
+
apiUrl?: string;
|
|
105
|
+
timeout?: number;
|
|
106
|
+
logger?: Logger;
|
|
107
|
+
includeMarkdown?: boolean;
|
|
108
|
+
}
|
|
109
|
+
|
|
100
110
|
export interface ScraperContentResult {
|
|
101
111
|
content: string;
|
|
102
112
|
}
|
|
@@ -148,9 +158,12 @@ export interface SearchToolConfig
|
|
|
148
158
|
logger?: Logger;
|
|
149
159
|
safeSearch?: SafeSearchLevel;
|
|
150
160
|
jinaApiKey?: string;
|
|
161
|
+
jinaApiUrl?: string;
|
|
151
162
|
cohereApiKey?: string;
|
|
152
163
|
rerankerType?: RerankerType;
|
|
164
|
+
scraperProvider?: ScraperProvider;
|
|
153
165
|
scraperTimeout?: number;
|
|
166
|
+
serperScraperOptions?: SerperScraperConfig;
|
|
154
167
|
onSearchResults?: (
|
|
155
168
|
results: SearchResult,
|
|
156
169
|
runnableConfig?: RunnableConfig
|
|
@@ -169,9 +182,30 @@ export type UsedReferences = {
|
|
|
169
182
|
reference: MediaReference;
|
|
170
183
|
}[];
|
|
171
184
|
|
|
185
|
+
/** Base Scraper Interface */
|
|
186
|
+
export interface BaseScraper {
|
|
187
|
+
scrapeUrl(
|
|
188
|
+
url: string,
|
|
189
|
+
options?: unknown
|
|
190
|
+
): Promise<[string, FirecrawlScrapeResponse | SerperScrapeResponse]>;
|
|
191
|
+
extractContent(
|
|
192
|
+
response: FirecrawlScrapeResponse | SerperScrapeResponse
|
|
193
|
+
): [string, undefined | References];
|
|
194
|
+
extractMetadata(
|
|
195
|
+
response: FirecrawlScrapeResponse | SerperScrapeResponse
|
|
196
|
+
):
|
|
197
|
+
| ScrapeMetadata
|
|
198
|
+
| Record<string, string | number | boolean | null | undefined>;
|
|
199
|
+
}
|
|
200
|
+
|
|
172
201
|
/** Firecrawl */
|
|
173
202
|
export type FirecrawlScrapeOptions = Omit<
|
|
174
203
|
FirecrawlScraperConfig,
|
|
204
|
+
'apiKey' | 'apiUrl' | 'version' | 'logger'
|
|
205
|
+
>;
|
|
206
|
+
|
|
207
|
+
export type SerperScrapeOptions = Omit<
|
|
208
|
+
SerperScraperConfig,
|
|
175
209
|
'apiKey' | 'apiUrl' | 'logger'
|
|
176
210
|
>;
|
|
177
211
|
|
|
@@ -250,9 +284,21 @@ export interface FirecrawlScrapeResponse {
|
|
|
250
284
|
error?: string;
|
|
251
285
|
}
|
|
252
286
|
|
|
287
|
+
export interface SerperScrapeResponse {
|
|
288
|
+
success: boolean;
|
|
289
|
+
data?: {
|
|
290
|
+
text?: string;
|
|
291
|
+
markdown?: string;
|
|
292
|
+
metadata?: Record<string, string | number | boolean | null | undefined>;
|
|
293
|
+
credits?: number;
|
|
294
|
+
};
|
|
295
|
+
error?: string;
|
|
296
|
+
}
|
|
297
|
+
|
|
253
298
|
export interface FirecrawlScraperConfig {
|
|
254
299
|
apiKey?: string;
|
|
255
300
|
apiUrl?: string;
|
|
301
|
+
version?: string;
|
|
256
302
|
formats?: string[];
|
|
257
303
|
timeout?: number;
|
|
258
304
|
logger?: Logger;
|
package/src/types/graph.ts
CHANGED
|
@@ -334,7 +334,10 @@ export type GraphEdge = {
|
|
|
334
334
|
*/
|
|
335
335
|
prompt?:
|
|
336
336
|
| string
|
|
337
|
-
| ((
|
|
337
|
+
| ((
|
|
338
|
+
messages: BaseMessage[],
|
|
339
|
+
runStartIndex: number
|
|
340
|
+
) => string | Promise<string> | undefined);
|
|
338
341
|
/**
|
|
339
342
|
* When true, excludes messages from startIndex when adding prompt.
|
|
340
343
|
* Automatically set to true when {results} variable is used in prompt.
|
|
@@ -364,4 +367,6 @@ export interface AgentInputs {
|
|
|
364
367
|
clientOptions?: ClientOptions;
|
|
365
368
|
additional_instructions?: string;
|
|
366
369
|
reasoningKey?: 'reasoning_content' | 'reasoning';
|
|
370
|
+
/** Format content blocks as strings (for legacy compatibility i.e. Ollama/Azure Serverless) */
|
|
371
|
+
useLegacyContent?: boolean;
|
|
367
372
|
}
|