@librechat/agents 2.4.82 → 2.4.84
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +3 -0
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/tools/search/firecrawl.cjs +3 -1
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +5 -5
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/serper-scraper.cjs +132 -0
- package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +45 -9
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +3 -0
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +3 -1
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +5 -5
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/serper-scraper.mjs +129 -0
- package/dist/esm/tools/search/serper-scraper.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +45 -9
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +2 -1
- package/dist/types/tools/search/search.d.ts +1 -2
- package/dist/types/tools/search/serper-scraper.d.ts +59 -0
- package/dist/types/tools/search/tool.d.ts +21 -0
- package/dist/types/tools/search/types.d.ts +30 -1
- package/package.json +2 -2
- package/src/graphs/Graph.ts +2 -0
- package/src/scripts/search.ts +5 -1
- package/src/tools/search/firecrawl.ts +5 -2
- package/src/tools/search/search.ts +6 -8
- package/src/tools/search/serper-scraper.ts +155 -0
- package/src/tools/search/tool.ts +47 -8
- package/src/tools/search/types.ts +45 -0
- package/src/utils/llmConfig.ts +10 -0
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
|
-
import { FirecrawlScraper } from './firecrawl';
|
|
3
2
|
export declare const createSearchAPI: (config: t.SearchConfig) => {
|
|
4
3
|
getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
|
|
5
4
|
};
|
|
6
|
-
export declare const createSourceProcessor: (config?: t.ProcessSourcesConfig, scraperInstance?:
|
|
5
|
+
export declare const createSourceProcessor: (config?: t.ProcessSourcesConfig, scraperInstance?: t.BaseScraper) => {
|
|
7
6
|
processSources: (fields: t.ProcessSourcesFields) => Promise<t.SearchResultData>;
|
|
8
7
|
topResults: number;
|
|
9
8
|
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type * as t from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Serper scraper implementation
|
|
4
|
+
* Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
|
|
5
|
+
*
|
|
6
|
+
* Features:
|
|
7
|
+
* - Simple API with single endpoint
|
|
8
|
+
* - Returns both text and markdown content
|
|
9
|
+
* - Includes metadata from scraped pages
|
|
10
|
+
* - Credits-based pricing model
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const scraper = createSerperScraper({
|
|
15
|
+
* apiKey: 'your-serper-api-key',
|
|
16
|
+
* includeMarkdown: true,
|
|
17
|
+
* timeout: 10000
|
|
18
|
+
* });
|
|
19
|
+
*
|
|
20
|
+
* const [url, response] = await scraper.scrapeUrl('https://example.com');
|
|
21
|
+
* if (response.success) {
|
|
22
|
+
* const [content] = scraper.extractContent(response);
|
|
23
|
+
* console.log(content);
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export declare class SerperScraper implements t.BaseScraper {
|
|
28
|
+
private apiKey;
|
|
29
|
+
private apiUrl;
|
|
30
|
+
private timeout;
|
|
31
|
+
private logger;
|
|
32
|
+
private includeMarkdown;
|
|
33
|
+
constructor(config?: t.SerperScraperConfig);
|
|
34
|
+
/**
|
|
35
|
+
* Scrape a single URL
|
|
36
|
+
* @param url URL to scrape
|
|
37
|
+
* @param options Scrape options
|
|
38
|
+
* @returns Scrape response
|
|
39
|
+
*/
|
|
40
|
+
scrapeUrl(url: string, options?: t.SerperScrapeOptions): Promise<[string, t.SerperScrapeResponse]>;
|
|
41
|
+
/**
|
|
42
|
+
* Extract content from scrape response
|
|
43
|
+
* @param response Scrape response
|
|
44
|
+
* @returns Extracted content or empty string if not available
|
|
45
|
+
*/
|
|
46
|
+
extractContent(response: t.SerperScrapeResponse): [string, undefined | t.References];
|
|
47
|
+
/**
|
|
48
|
+
* Extract metadata from scrape response
|
|
49
|
+
* @param response Scrape response
|
|
50
|
+
* @returns Metadata object
|
|
51
|
+
*/
|
|
52
|
+
extractMetadata(response: t.SerperScrapeResponse): Record<string, string | number | boolean | null | undefined>;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Create a Serper scraper instance
|
|
56
|
+
* @param config Scraper configuration
|
|
57
|
+
* @returns Serper scraper instance
|
|
58
|
+
*/
|
|
59
|
+
export declare const createSerperScraper: (config?: t.SerperScraperConfig) => SerperScraper;
|
|
@@ -6,6 +6,27 @@ import { DATE_RANGE } from './schema';
|
|
|
6
6
|
* Creates a search tool with a schema that dynamically includes the country field
|
|
7
7
|
* only when the searchProvider is 'serper'.
|
|
8
8
|
*
|
|
9
|
+
* Supports multiple scraper providers:
|
|
10
|
+
* - Firecrawl (default): Full-featured web scraping with multiple formats
|
|
11
|
+
* - Serper: Lightweight scraping using Serper's scrape API
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* // Using Firecrawl scraper (default)
|
|
16
|
+
* const searchTool = createSearchTool({
|
|
17
|
+
* searchProvider: 'serper',
|
|
18
|
+
* scraperProvider: 'firecrawl',
|
|
19
|
+
* firecrawlApiKey: 'your-firecrawl-key'
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* // Using Serper scraper
|
|
23
|
+
* const searchTool = createSearchTool({
|
|
24
|
+
* searchProvider: 'serper',
|
|
25
|
+
* scraperProvider: 'serper',
|
|
26
|
+
* serperApiKey: 'your-serper-key'
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*
|
|
9
30
|
* @param config - The search tool configuration
|
|
10
31
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
11
32
|
*/
|
|
@@ -4,6 +4,7 @@ import type { RunnableConfig } from '@langchain/core/runnables';
|
|
|
4
4
|
import type { BaseReranker } from './rerankers';
|
|
5
5
|
import { DATE_RANGE } from './schema';
|
|
6
6
|
export type SearchProvider = 'serper' | 'searxng';
|
|
7
|
+
export type ScraperProvider = 'firecrawl' | 'serper';
|
|
7
8
|
export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
|
|
8
9
|
export interface Highlight {
|
|
9
10
|
score: number;
|
|
@@ -85,8 +86,16 @@ export interface ProcessSourcesConfig {
|
|
|
85
86
|
export interface FirecrawlConfig {
|
|
86
87
|
firecrawlApiKey?: string;
|
|
87
88
|
firecrawlApiUrl?: string;
|
|
89
|
+
firecrawlVersion?: string;
|
|
88
90
|
firecrawlOptions?: FirecrawlScraperConfig;
|
|
89
91
|
}
|
|
92
|
+
export interface SerperScraperConfig {
|
|
93
|
+
apiKey?: string;
|
|
94
|
+
apiUrl?: string;
|
|
95
|
+
timeout?: number;
|
|
96
|
+
logger?: Logger;
|
|
97
|
+
includeMarkdown?: boolean;
|
|
98
|
+
}
|
|
90
99
|
export interface ScraperContentResult {
|
|
91
100
|
content: string;
|
|
92
101
|
}
|
|
@@ -133,7 +142,9 @@ export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, Fi
|
|
|
133
142
|
jinaApiUrl?: string;
|
|
134
143
|
cohereApiKey?: string;
|
|
135
144
|
rerankerType?: RerankerType;
|
|
145
|
+
scraperProvider?: ScraperProvider;
|
|
136
146
|
scraperTimeout?: number;
|
|
147
|
+
serperScraperOptions?: SerperScraperConfig;
|
|
137
148
|
onSearchResults?: (results: SearchResult, runnableConfig?: RunnableConfig) => void;
|
|
138
149
|
onGetHighlights?: (link: string) => void;
|
|
139
150
|
}
|
|
@@ -147,8 +158,15 @@ export type UsedReferences = {
|
|
|
147
158
|
originalIndex: number;
|
|
148
159
|
reference: MediaReference;
|
|
149
160
|
}[];
|
|
161
|
+
/** Base Scraper Interface */
|
|
162
|
+
export interface BaseScraper {
|
|
163
|
+
scrapeUrl(url: string, options?: unknown): Promise<[string, FirecrawlScrapeResponse | SerperScrapeResponse]>;
|
|
164
|
+
extractContent(response: FirecrawlScrapeResponse | SerperScrapeResponse): [string, undefined | References];
|
|
165
|
+
extractMetadata(response: FirecrawlScrapeResponse | SerperScrapeResponse): ScrapeMetadata | Record<string, string | number | boolean | null | undefined>;
|
|
166
|
+
}
|
|
150
167
|
/** Firecrawl */
|
|
151
|
-
export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
|
|
168
|
+
export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'version' | 'logger'>;
|
|
169
|
+
export type SerperScrapeOptions = Omit<SerperScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
|
|
152
170
|
export interface ScrapeMetadata {
|
|
153
171
|
sourceURL?: string;
|
|
154
172
|
url?: string;
|
|
@@ -214,9 +232,20 @@ export interface FirecrawlScrapeResponse {
|
|
|
214
232
|
};
|
|
215
233
|
error?: string;
|
|
216
234
|
}
|
|
235
|
+
export interface SerperScrapeResponse {
|
|
236
|
+
success: boolean;
|
|
237
|
+
data?: {
|
|
238
|
+
text?: string;
|
|
239
|
+
markdown?: string;
|
|
240
|
+
metadata?: Record<string, string | number | boolean | null | undefined>;
|
|
241
|
+
credits?: number;
|
|
242
|
+
};
|
|
243
|
+
error?: string;
|
|
244
|
+
}
|
|
217
245
|
export interface FirecrawlScraperConfig {
|
|
218
246
|
apiKey?: string;
|
|
219
247
|
apiUrl?: string;
|
|
248
|
+
version?: string;
|
|
220
249
|
formats?: string[];
|
|
221
250
|
timeout?: number;
|
|
222
251
|
logger?: Logger;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@librechat/agents",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.84",
|
|
4
4
|
"main": "./dist/cjs/main.cjs",
|
|
5
5
|
"module": "./dist/esm/main.mjs",
|
|
6
6
|
"types": "./dist/types/index.d.ts",
|
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"image": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/image.ts --provider 'google' --name 'Jo' --location 'New York, NY'",
|
|
48
48
|
"code_exec_files": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/code_exec_files.ts --provider 'openAI' --name 'Jo' --location 'New York, NY'",
|
|
49
49
|
"code_exec_simple": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/code_exec_simple.ts --provider 'google' --name 'Jo' --location 'New York, NY'",
|
|
50
|
-
"simple": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/simple.ts --provider '
|
|
50
|
+
"simple": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/simple.ts --provider 'zhipu' --name 'Jo' --location 'New York, NY'",
|
|
51
51
|
"caching": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/caching.ts --name 'Jo' --location 'New York, NY'",
|
|
52
52
|
"thinking": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/thinking.ts --name 'Jo' --location 'New York, NY'",
|
|
53
53
|
"memory": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/memory.ts --provider 'openAI' --name 'Jo' --location 'New York, NY'",
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -312,6 +312,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, GraphNode> {
|
|
|
312
312
|
this.currentTokenType === 'think_and_text'
|
|
313
313
|
) {
|
|
314
314
|
keyList.push('reasoning');
|
|
315
|
+
} else if (this.tokenTypeSwitch === 'content') {
|
|
316
|
+
keyList.push('post-reasoning');
|
|
315
317
|
}
|
|
316
318
|
|
|
317
319
|
if (this.invokedToolIds != null && this.invokedToolIds.size > 0) {
|
package/src/scripts/search.ts
CHANGED
|
@@ -83,7 +83,11 @@ async function testStandardStreaming(): Promise<void> {
|
|
|
83
83
|
graphConfig: {
|
|
84
84
|
type: 'standard',
|
|
85
85
|
llmConfig,
|
|
86
|
-
tools: [
|
|
86
|
+
tools: [
|
|
87
|
+
createSearchTool({
|
|
88
|
+
scraperProvider: 'serper',
|
|
89
|
+
}),
|
|
90
|
+
],
|
|
87
91
|
instructions:
|
|
88
92
|
'You are a friendly AI assistant. Always address the user by their name.',
|
|
89
93
|
// additional_instructions: `The user's name is ${userName} and they are located in ${location}.`,
|
|
@@ -7,9 +7,10 @@ import { createDefaultLogger } from './utils';
|
|
|
7
7
|
* Firecrawl scraper implementation
|
|
8
8
|
* Uses the Firecrawl API to scrape web pages
|
|
9
9
|
*/
|
|
10
|
-
export class FirecrawlScraper {
|
|
10
|
+
export class FirecrawlScraper implements t.BaseScraper {
|
|
11
11
|
private apiKey: string;
|
|
12
12
|
private apiUrl: string;
|
|
13
|
+
private version: string;
|
|
13
14
|
private defaultFormats: string[];
|
|
14
15
|
private timeout: number;
|
|
15
16
|
private logger: t.Logger;
|
|
@@ -32,11 +33,13 @@ export class FirecrawlScraper {
|
|
|
32
33
|
constructor(config: t.FirecrawlScraperConfig = {}) {
|
|
33
34
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
34
35
|
|
|
36
|
+
this.version = config.version ?? 'v2';
|
|
37
|
+
|
|
35
38
|
const baseUrl =
|
|
36
39
|
config.apiUrl ??
|
|
37
40
|
process.env.FIRECRAWL_BASE_URL ??
|
|
38
41
|
'https://api.firecrawl.dev';
|
|
39
|
-
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/
|
|
42
|
+
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
|
|
40
43
|
|
|
41
44
|
this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
|
|
42
45
|
this.timeout = config.timeout ?? 7500;
|
|
@@ -2,7 +2,6 @@ import axios from 'axios';
|
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
3
|
import type * as t from './types';
|
|
4
4
|
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
|
-
import { FirecrawlScraper } from './firecrawl';
|
|
6
5
|
import { BaseReranker } from './rerankers';
|
|
7
6
|
|
|
8
7
|
const chunker = {
|
|
@@ -434,7 +433,7 @@ export const createSearchAPI = (
|
|
|
434
433
|
|
|
435
434
|
export const createSourceProcessor = (
|
|
436
435
|
config: t.ProcessSourcesConfig = {},
|
|
437
|
-
scraperInstance?:
|
|
436
|
+
scraperInstance?: t.BaseScraper
|
|
438
437
|
): {
|
|
439
438
|
processSources: (
|
|
440
439
|
fields: t.ProcessSourcesFields
|
|
@@ -442,7 +441,7 @@ export const createSourceProcessor = (
|
|
|
442
441
|
topResults: number;
|
|
443
442
|
} => {
|
|
444
443
|
if (!scraperInstance) {
|
|
445
|
-
throw new Error('
|
|
444
|
+
throw new Error('Scraper instance is required');
|
|
446
445
|
}
|
|
447
446
|
const {
|
|
448
447
|
topResults = 5,
|
|
@@ -453,7 +452,7 @@ export const createSourceProcessor = (
|
|
|
453
452
|
} = config;
|
|
454
453
|
|
|
455
454
|
const logger_ = logger || createDefaultLogger();
|
|
456
|
-
const
|
|
455
|
+
const scraper = scraperInstance;
|
|
457
456
|
|
|
458
457
|
const webScraper = {
|
|
459
458
|
scrapeMany: async ({
|
|
@@ -465,12 +464,12 @@ export const createSourceProcessor = (
|
|
|
465
464
|
links: string[];
|
|
466
465
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
467
466
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
468
|
-
logger_.debug(`Scraping ${links.length} links
|
|
467
|
+
logger_.debug(`Scraping ${links.length} links`);
|
|
469
468
|
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
470
469
|
try {
|
|
471
470
|
for (let i = 0; i < links.length; i++) {
|
|
472
471
|
const currentLink = links[i];
|
|
473
|
-
const promise: Promise<t.ScrapeResult> =
|
|
472
|
+
const promise: Promise<t.ScrapeResult> = scraper
|
|
474
473
|
.scrapeUrl(currentLink, {})
|
|
475
474
|
.then(([url, response]) => {
|
|
476
475
|
const attribution = getAttribution(
|
|
@@ -479,8 +478,7 @@ export const createSourceProcessor = (
|
|
|
479
478
|
logger_
|
|
480
479
|
);
|
|
481
480
|
if (response.success && response.data) {
|
|
482
|
-
const [content, references] =
|
|
483
|
-
firecrawlScraper.extractContent(response);
|
|
481
|
+
const [content, references] = scraper.extractContent(response);
|
|
484
482
|
return {
|
|
485
483
|
url,
|
|
486
484
|
references,
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import type * as t from './types';
|
|
3
|
+
import { createDefaultLogger } from './utils';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Serper scraper implementation
|
|
7
|
+
* Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
|
|
8
|
+
*
|
|
9
|
+
* Features:
|
|
10
|
+
* - Simple API with single endpoint
|
|
11
|
+
* - Returns both text and markdown content
|
|
12
|
+
* - Includes metadata from scraped pages
|
|
13
|
+
* - Credits-based pricing model
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const scraper = createSerperScraper({
|
|
18
|
+
* apiKey: 'your-serper-api-key',
|
|
19
|
+
* includeMarkdown: true,
|
|
20
|
+
* timeout: 10000
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* const [url, response] = await scraper.scrapeUrl('https://example.com');
|
|
24
|
+
* if (response.success) {
|
|
25
|
+
* const [content] = scraper.extractContent(response);
|
|
26
|
+
* console.log(content);
|
|
27
|
+
* }
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export class SerperScraper implements t.BaseScraper {
|
|
31
|
+
private apiKey: string;
|
|
32
|
+
private apiUrl: string;
|
|
33
|
+
private timeout: number;
|
|
34
|
+
private logger: t.Logger;
|
|
35
|
+
private includeMarkdown: boolean;
|
|
36
|
+
|
|
37
|
+
constructor(config: t.SerperScraperConfig = {}) {
|
|
38
|
+
this.apiKey = config.apiKey ?? process.env.SERPER_API_KEY ?? '';
|
|
39
|
+
|
|
40
|
+
this.apiUrl =
|
|
41
|
+
config.apiUrl ??
|
|
42
|
+
process.env.SERPER_SCRAPE_URL ??
|
|
43
|
+
'https://scrape.serper.dev';
|
|
44
|
+
|
|
45
|
+
this.timeout = config.timeout ?? 7500;
|
|
46
|
+
this.includeMarkdown = config.includeMarkdown ?? true;
|
|
47
|
+
|
|
48
|
+
this.logger = config.logger || createDefaultLogger();
|
|
49
|
+
|
|
50
|
+
if (!this.apiKey) {
|
|
51
|
+
this.logger.warn('SERPER_API_KEY is not set. Scraping will not work.');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
this.logger.debug(
|
|
55
|
+
`Serper scraper initialized with API URL: ${this.apiUrl}`
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Scrape a single URL
|
|
61
|
+
* @param url URL to scrape
|
|
62
|
+
* @param options Scrape options
|
|
63
|
+
* @returns Scrape response
|
|
64
|
+
*/
|
|
65
|
+
async scrapeUrl(
|
|
66
|
+
url: string,
|
|
67
|
+
options: t.SerperScrapeOptions = {}
|
|
68
|
+
): Promise<[string, t.SerperScrapeResponse]> {
|
|
69
|
+
if (!this.apiKey) {
|
|
70
|
+
return [
|
|
71
|
+
url,
|
|
72
|
+
{
|
|
73
|
+
success: false,
|
|
74
|
+
error: 'SERPER_API_KEY is not set',
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
const payload = {
|
|
81
|
+
url,
|
|
82
|
+
includeMarkdown: options.includeMarkdown ?? this.includeMarkdown,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const response = await axios.post(this.apiUrl, payload, {
|
|
86
|
+
headers: {
|
|
87
|
+
'X-API-KEY': this.apiKey,
|
|
88
|
+
'Content-Type': 'application/json',
|
|
89
|
+
},
|
|
90
|
+
timeout: options.timeout ?? this.timeout,
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
return [url, { success: true, data: response.data }];
|
|
94
|
+
} catch (error) {
|
|
95
|
+
const errorMessage =
|
|
96
|
+
error instanceof Error ? error.message : String(error);
|
|
97
|
+
return [
|
|
98
|
+
url,
|
|
99
|
+
{
|
|
100
|
+
success: false,
|
|
101
|
+
error: `Serper Scrape API request failed: ${errorMessage}`,
|
|
102
|
+
},
|
|
103
|
+
];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Extract content from scrape response
|
|
109
|
+
* @param response Scrape response
|
|
110
|
+
* @returns Extracted content or empty string if not available
|
|
111
|
+
*/
|
|
112
|
+
extractContent(
|
|
113
|
+
response: t.SerperScrapeResponse
|
|
114
|
+
): [string, undefined | t.References] {
|
|
115
|
+
if (!response.success || !response.data) {
|
|
116
|
+
return ['', undefined];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (response.data.markdown != null) {
|
|
120
|
+
return [response.data.markdown, undefined];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (response.data.text != null) {
|
|
124
|
+
return [response.data.text, undefined];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return ['', undefined];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Extract metadata from scrape response
|
|
132
|
+
* @param response Scrape response
|
|
133
|
+
* @returns Metadata object
|
|
134
|
+
*/
|
|
135
|
+
extractMetadata(
|
|
136
|
+
response: t.SerperScrapeResponse
|
|
137
|
+
): Record<string, string | number | boolean | null | undefined> {
|
|
138
|
+
if (!response.success || !response.data || !response.data.metadata) {
|
|
139
|
+
return {};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return response.data.metadata;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Create a Serper scraper instance
|
|
148
|
+
* @param config Scraper configuration
|
|
149
|
+
* @returns Serper scraper instance
|
|
150
|
+
*/
|
|
151
|
+
export const createSerperScraper = (
|
|
152
|
+
config: t.SerperScraperConfig = {}
|
|
153
|
+
): SerperScraper => {
|
|
154
|
+
return new SerperScraper(config);
|
|
155
|
+
};
|
package/src/tools/search/tool.ts
CHANGED
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
newsSchema,
|
|
13
13
|
} from './schema';
|
|
14
14
|
import { createSearchAPI, createSourceProcessor } from './search';
|
|
15
|
+
import { createSerperScraper } from './serper-scraper';
|
|
15
16
|
import { createFirecrawlScraper } from './firecrawl';
|
|
16
17
|
import { expandHighlights } from './highlights';
|
|
17
18
|
import { formatResultsForLLM } from './format';
|
|
@@ -328,6 +329,27 @@ Use anchor marker(s) immediately after the statement:
|
|
|
328
329
|
* Creates a search tool with a schema that dynamically includes the country field
|
|
329
330
|
* only when the searchProvider is 'serper'.
|
|
330
331
|
*
|
|
332
|
+
* Supports multiple scraper providers:
|
|
333
|
+
* - Firecrawl (default): Full-featured web scraping with multiple formats
|
|
334
|
+
* - Serper: Lightweight scraping using Serper's scrape API
|
|
335
|
+
*
|
|
336
|
+
* @example
|
|
337
|
+
* ```typescript
|
|
338
|
+
* // Using Firecrawl scraper (default)
|
|
339
|
+
* const searchTool = createSearchTool({
|
|
340
|
+
* searchProvider: 'serper',
|
|
341
|
+
* scraperProvider: 'firecrawl',
|
|
342
|
+
* firecrawlApiKey: 'your-firecrawl-key'
|
|
343
|
+
* });
|
|
344
|
+
*
|
|
345
|
+
* // Using Serper scraper
|
|
346
|
+
* const searchTool = createSearchTool({
|
|
347
|
+
* searchProvider: 'serper',
|
|
348
|
+
* scraperProvider: 'serper',
|
|
349
|
+
* serperApiKey: 'your-serper-key'
|
|
350
|
+
* });
|
|
351
|
+
* ```
|
|
352
|
+
*
|
|
331
353
|
* @param config - The search tool configuration
|
|
332
354
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
333
355
|
*/
|
|
@@ -344,9 +366,12 @@ export const createSearchTool = (
|
|
|
344
366
|
strategies = ['no_extraction'],
|
|
345
367
|
filterContent = true,
|
|
346
368
|
safeSearch = 1,
|
|
369
|
+
scraperProvider = 'firecrawl',
|
|
347
370
|
firecrawlApiKey,
|
|
348
371
|
firecrawlApiUrl,
|
|
372
|
+
firecrawlVersion,
|
|
349
373
|
firecrawlOptions,
|
|
374
|
+
serperScraperOptions,
|
|
350
375
|
scraperTimeout,
|
|
351
376
|
jinaApiKey,
|
|
352
377
|
jinaApiUrl,
|
|
@@ -385,13 +410,27 @@ export const createSearchTool = (
|
|
|
385
410
|
searxngApiKey,
|
|
386
411
|
});
|
|
387
412
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
413
|
+
/** Create scraper based on scraperProvider */
|
|
414
|
+
let scraperInstance: t.BaseScraper;
|
|
415
|
+
|
|
416
|
+
if (scraperProvider === 'serper') {
|
|
417
|
+
scraperInstance = createSerperScraper({
|
|
418
|
+
...serperScraperOptions,
|
|
419
|
+
apiKey: serperApiKey,
|
|
420
|
+
timeout: scraperTimeout ?? serperScraperOptions?.timeout,
|
|
421
|
+
logger,
|
|
422
|
+
});
|
|
423
|
+
} else {
|
|
424
|
+
scraperInstance = createFirecrawlScraper({
|
|
425
|
+
...firecrawlOptions,
|
|
426
|
+
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
427
|
+
apiUrl: firecrawlApiUrl,
|
|
428
|
+
version: firecrawlVersion,
|
|
429
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
430
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
431
|
+
logger,
|
|
432
|
+
});
|
|
433
|
+
}
|
|
395
434
|
|
|
396
435
|
const selectedReranker = createReranker({
|
|
397
436
|
rerankerType,
|
|
@@ -413,7 +452,7 @@ export const createSearchTool = (
|
|
|
413
452
|
filterContent,
|
|
414
453
|
logger,
|
|
415
454
|
},
|
|
416
|
-
|
|
455
|
+
scraperInstance
|
|
417
456
|
);
|
|
418
457
|
|
|
419
458
|
const search = createSearchProcessor({
|
|
@@ -5,6 +5,7 @@ import type { BaseReranker } from './rerankers';
|
|
|
5
5
|
import { DATE_RANGE } from './schema';
|
|
6
6
|
|
|
7
7
|
export type SearchProvider = 'serper' | 'searxng';
|
|
8
|
+
export type ScraperProvider = 'firecrawl' | 'serper';
|
|
8
9
|
export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
|
|
9
10
|
|
|
10
11
|
export interface Highlight {
|
|
@@ -94,9 +95,18 @@ export interface ProcessSourcesConfig {
|
|
|
94
95
|
export interface FirecrawlConfig {
|
|
95
96
|
firecrawlApiKey?: string;
|
|
96
97
|
firecrawlApiUrl?: string;
|
|
98
|
+
firecrawlVersion?: string;
|
|
97
99
|
firecrawlOptions?: FirecrawlScraperConfig;
|
|
98
100
|
}
|
|
99
101
|
|
|
102
|
+
export interface SerperScraperConfig {
|
|
103
|
+
apiKey?: string;
|
|
104
|
+
apiUrl?: string;
|
|
105
|
+
timeout?: number;
|
|
106
|
+
logger?: Logger;
|
|
107
|
+
includeMarkdown?: boolean;
|
|
108
|
+
}
|
|
109
|
+
|
|
100
110
|
export interface ScraperContentResult {
|
|
101
111
|
content: string;
|
|
102
112
|
}
|
|
@@ -151,7 +161,9 @@ export interface SearchToolConfig
|
|
|
151
161
|
jinaApiUrl?: string;
|
|
152
162
|
cohereApiKey?: string;
|
|
153
163
|
rerankerType?: RerankerType;
|
|
164
|
+
scraperProvider?: ScraperProvider;
|
|
154
165
|
scraperTimeout?: number;
|
|
166
|
+
serperScraperOptions?: SerperScraperConfig;
|
|
155
167
|
onSearchResults?: (
|
|
156
168
|
results: SearchResult,
|
|
157
169
|
runnableConfig?: RunnableConfig
|
|
@@ -170,9 +182,30 @@ export type UsedReferences = {
|
|
|
170
182
|
reference: MediaReference;
|
|
171
183
|
}[];
|
|
172
184
|
|
|
185
|
+
/** Base Scraper Interface */
|
|
186
|
+
export interface BaseScraper {
|
|
187
|
+
scrapeUrl(
|
|
188
|
+
url: string,
|
|
189
|
+
options?: unknown
|
|
190
|
+
): Promise<[string, FirecrawlScrapeResponse | SerperScrapeResponse]>;
|
|
191
|
+
extractContent(
|
|
192
|
+
response: FirecrawlScrapeResponse | SerperScrapeResponse
|
|
193
|
+
): [string, undefined | References];
|
|
194
|
+
extractMetadata(
|
|
195
|
+
response: FirecrawlScrapeResponse | SerperScrapeResponse
|
|
196
|
+
):
|
|
197
|
+
| ScrapeMetadata
|
|
198
|
+
| Record<string, string | number | boolean | null | undefined>;
|
|
199
|
+
}
|
|
200
|
+
|
|
173
201
|
/** Firecrawl */
|
|
174
202
|
export type FirecrawlScrapeOptions = Omit<
|
|
175
203
|
FirecrawlScraperConfig,
|
|
204
|
+
'apiKey' | 'apiUrl' | 'version' | 'logger'
|
|
205
|
+
>;
|
|
206
|
+
|
|
207
|
+
export type SerperScrapeOptions = Omit<
|
|
208
|
+
SerperScraperConfig,
|
|
176
209
|
'apiKey' | 'apiUrl' | 'logger'
|
|
177
210
|
>;
|
|
178
211
|
|
|
@@ -251,9 +284,21 @@ export interface FirecrawlScrapeResponse {
|
|
|
251
284
|
error?: string;
|
|
252
285
|
}
|
|
253
286
|
|
|
287
|
+
export interface SerperScrapeResponse {
|
|
288
|
+
success: boolean;
|
|
289
|
+
data?: {
|
|
290
|
+
text?: string;
|
|
291
|
+
markdown?: string;
|
|
292
|
+
metadata?: Record<string, string | number | boolean | null | undefined>;
|
|
293
|
+
credits?: number;
|
|
294
|
+
};
|
|
295
|
+
error?: string;
|
|
296
|
+
}
|
|
297
|
+
|
|
254
298
|
export interface FirecrawlScraperConfig {
|
|
255
299
|
apiKey?: string;
|
|
256
300
|
apiUrl?: string;
|
|
301
|
+
version?: string;
|
|
257
302
|
formats?: string[];
|
|
258
303
|
timeout?: number;
|
|
259
304
|
logger?: Logger;
|
package/src/utils/llmConfig.ts
CHANGED
|
@@ -94,6 +94,16 @@ export const llmConfigs: Record<string, t.LLMConfig | undefined> = {
|
|
|
94
94
|
baseURL: 'http://192.168.254.183:1233/v1',
|
|
95
95
|
},
|
|
96
96
|
},
|
|
97
|
+
zhipu: {
|
|
98
|
+
provider: Providers.OPENAI,
|
|
99
|
+
streaming: true,
|
|
100
|
+
streamUsage: false,
|
|
101
|
+
model: 'glm-4.5-air',
|
|
102
|
+
apiKey: process.env.ZHIPU_API_KEY,
|
|
103
|
+
configuration: {
|
|
104
|
+
baseURL: 'https://open.bigmodel.cn/api/paas/v4',
|
|
105
|
+
},
|
|
106
|
+
},
|
|
97
107
|
[Providers.DEEPSEEK]: {
|
|
98
108
|
provider: Providers.DEEPSEEK,
|
|
99
109
|
model: 'deepseek-reasoner',
|