@elizaos/plugin-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +400 -0
- package/dist/index.cjs +9366 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +9284 -0
- package/dist/index.js.map +1 -0
- package/package.json +80 -0
- package/src/__tests__/action-chaining.test.ts +532 -0
- package/src/__tests__/actions.test.ts +118 -0
- package/src/__tests__/cache-rate-limiter.test.ts +303 -0
- package/src/__tests__/content-extractors.test.ts +26 -0
- package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
- package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
- package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
- package/src/__tests__/e2e.test.ts +1870 -0
- package/src/__tests__/multi-benchmark-runner.ts +427 -0
- package/src/__tests__/providers.test.ts +156 -0
- package/src/__tests__/real-world.e2e.test.ts +788 -0
- package/src/__tests__/research-scenarios.test.ts +755 -0
- package/src/__tests__/research.e2e.test.ts +704 -0
- package/src/__tests__/research.test.ts +174 -0
- package/src/__tests__/search-providers.test.ts +174 -0
- package/src/__tests__/single-benchmark-runner.ts +735 -0
- package/src/__tests__/test-search-providers.ts +171 -0
- package/src/__tests__/verify-apis.test.ts +82 -0
- package/src/actions.ts +1677 -0
- package/src/benchmark/deepresearch-benchmark.ts +369 -0
- package/src/evaluation/research-evaluator.ts +444 -0
- package/src/examples/api-integration.md +498 -0
- package/src/examples/browserbase-integration.md +132 -0
- package/src/examples/debug-research-query.ts +162 -0
- package/src/examples/defi-code-scenarios.md +536 -0
- package/src/examples/defi-implementation-guide.md +454 -0
- package/src/examples/eliza-research-example.ts +142 -0
- package/src/examples/fix-renewable-energy-research.ts +209 -0
- package/src/examples/research-scenarios.md +408 -0
- package/src/examples/run-complete-renewable-research.ts +303 -0
- package/src/examples/run-deep-research.ts +352 -0
- package/src/examples/run-logged-research.ts +304 -0
- package/src/examples/run-real-research.ts +151 -0
- package/src/examples/save-research-output.ts +133 -0
- package/src/examples/test-file-logging.ts +199 -0
- package/src/examples/test-real-research.ts +67 -0
- package/src/examples/test-renewable-energy-research.ts +229 -0
- package/src/index.ts +28 -0
- package/src/integrations/cache.ts +128 -0
- package/src/integrations/content-extractors/firecrawl.ts +314 -0
- package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
- package/src/integrations/content-extractors/playwright.ts +420 -0
- package/src/integrations/factory.ts +419 -0
- package/src/integrations/index.ts +18 -0
- package/src/integrations/rate-limiter.ts +181 -0
- package/src/integrations/search-providers/academic.ts +290 -0
- package/src/integrations/search-providers/exa.ts +205 -0
- package/src/integrations/search-providers/npm.ts +330 -0
- package/src/integrations/search-providers/pypi.ts +211 -0
- package/src/integrations/search-providers/serpapi.ts +277 -0
- package/src/integrations/search-providers/serper.ts +358 -0
- package/src/integrations/search-providers/stagehand-google.ts +87 -0
- package/src/integrations/search-providers/tavily.ts +187 -0
- package/src/processing/relevance-analyzer.ts +353 -0
- package/src/processing/research-logger.ts +450 -0
- package/src/processing/result-processor.ts +372 -0
- package/src/prompts/research-prompts.ts +419 -0
- package/src/providers/cacheProvider.ts +164 -0
- package/src/providers.ts +173 -0
- package/src/service.ts +2588 -0
- package/src/services/swe-bench.ts +286 -0
- package/src/strategies/research-strategies.ts +790 -0
- package/src/types/pdf-parse.d.ts +34 -0
- package/src/types.ts +551 -0
- package/src/verification/claim-verifier.ts +443 -0
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
# Deep Research Plugin - Real API Integration Guide
|
|
2
|
+
|
|
3
|
+
This guide shows how to integrate real search APIs with the deep research plugin for production use.
|
|
4
|
+
|
|
5
|
+
## Available Search API Providers
|
|
6
|
+
|
|
7
|
+
### 1. Tavily API Integration
|
|
8
|
+
|
|
9
|
+
Tavily provides a search API specifically designed for AI applications.
|
|
10
|
+
|
|
11
|
+
```typescript
|
|
12
|
+
// src/integrations/tavily.ts
|
|
13
|
+
import axios from 'axios';
|
|
14
|
+
import { SearchResult } from '../types';
|
|
15
|
+
|
|
16
|
+
export class TavilySearchProvider {
|
|
17
|
+
private apiKey: string;
|
|
18
|
+
private baseUrl = 'https://api.tavily.com/search';
|
|
19
|
+
|
|
20
|
+
constructor(apiKey: string) {
|
|
21
|
+
this.apiKey = apiKey;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async search(query: string, maxResults: number = 10): Promise<SearchResult[]> {
|
|
25
|
+
try {
|
|
26
|
+
const response = await axios.post(this.baseUrl, {
|
|
27
|
+
api_key: this.apiKey,
|
|
28
|
+
query,
|
|
29
|
+
search_depth: 'advanced',
|
|
30
|
+
include_answer: true,
|
|
31
|
+
include_raw_content: true,
|
|
32
|
+
max_results: maxResults,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
return response.data.results.map((result: any) => ({
|
|
36
|
+
title: result.title,
|
|
37
|
+
url: result.url,
|
|
38
|
+
snippet: result.snippet || result.content?.substring(0, 200),
|
|
39
|
+
content: result.raw_content,
|
|
40
|
+
}));
|
|
41
|
+
} catch (error) {
|
|
42
|
+
console.error('Tavily search error:', error);
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 2. Serper API Integration
|
|
50
|
+
|
|
51
|
+
Serper provides Google search results via API.
|
|
52
|
+
|
|
53
|
+
```typescript
|
|
54
|
+
// src/integrations/serper.ts
|
|
55
|
+
import axios from 'axios';
|
|
56
|
+
import { SearchResult } from '../types';
|
|
57
|
+
|
|
58
|
+
export class SerperSearchProvider {
|
|
59
|
+
private apiKey: string;
|
|
60
|
+
private baseUrl = 'https://google.serper.dev/search';
|
|
61
|
+
|
|
62
|
+
constructor(apiKey: string) {
|
|
63
|
+
this.apiKey = apiKey;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async search(query: string, maxResults: number = 10): Promise<SearchResult[]> {
|
|
67
|
+
try {
|
|
68
|
+
const response = await axios.post(
|
|
69
|
+
this.baseUrl,
|
|
70
|
+
{
|
|
71
|
+
q: query,
|
|
72
|
+
num: maxResults,
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
headers: {
|
|
76
|
+
'X-API-KEY': this.apiKey,
|
|
77
|
+
'Content-Type': 'application/json',
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
const results: SearchResult[] = [];
|
|
83
|
+
|
|
84
|
+
// Process organic results
|
|
85
|
+
if (response.data.organic) {
|
|
86
|
+
results.push(...response.data.organic.map((result: any) => ({
|
|
87
|
+
title: result.title,
|
|
88
|
+
url: result.link,
|
|
89
|
+
snippet: result.snippet,
|
|
90
|
+
})));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Include knowledge graph if available
|
|
94
|
+
if (response.data.knowledgeGraph) {
|
|
95
|
+
results.unshift({
|
|
96
|
+
title: response.data.knowledgeGraph.title,
|
|
97
|
+
url: response.data.knowledgeGraph.website || '',
|
|
98
|
+
snippet: response.data.knowledgeGraph.description,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return results.slice(0, maxResults);
|
|
103
|
+
} catch (error) {
|
|
104
|
+
console.error('Serper search error:', error);
|
|
105
|
+
return [];
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 3. Brave Search API Integration
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
// src/integrations/brave.ts
|
|
115
|
+
import axios from 'axios';
|
|
116
|
+
import { SearchResult } from '../types';
|
|
117
|
+
|
|
118
|
+
export class BraveSearchProvider {
|
|
119
|
+
private apiKey: string;
|
|
120
|
+
private baseUrl = 'https://api.search.brave.com/res/v1/web/search';
|
|
121
|
+
|
|
122
|
+
constructor(apiKey: string) {
|
|
123
|
+
this.apiKey = apiKey;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async search(query: string, maxResults: number = 10): Promise<SearchResult[]> {
|
|
127
|
+
try {
|
|
128
|
+
const response = await axios.get(this.baseUrl, {
|
|
129
|
+
headers: {
|
|
130
|
+
'Accept': 'application/json',
|
|
131
|
+
'X-Subscription-Token': this.apiKey,
|
|
132
|
+
},
|
|
133
|
+
params: {
|
|
134
|
+
q: query,
|
|
135
|
+
count: maxResults,
|
|
136
|
+
},
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
return response.data.web.results.map((result: any) => ({
|
|
140
|
+
title: result.title,
|
|
141
|
+
url: result.url,
|
|
142
|
+
snippet: result.description,
|
|
143
|
+
}));
|
|
144
|
+
} catch (error) {
|
|
145
|
+
console.error('Brave search error:', error);
|
|
146
|
+
return [];
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Content Extraction Integration
|
|
153
|
+
|
|
154
|
+
### 1. Firecrawl Integration
|
|
155
|
+
|
|
156
|
+
Firecrawl provides web scraping and content extraction.
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
// src/integrations/firecrawl.ts
|
|
160
|
+
import axios from 'axios';
|
|
161
|
+
|
|
162
|
+
export class FirecrawlContentExtractor {
|
|
163
|
+
private apiKey: string;
|
|
164
|
+
private baseUrl = 'https://api.firecrawl.dev/v0';
|
|
165
|
+
|
|
166
|
+
constructor(apiKey: string) {
|
|
167
|
+
this.apiKey = apiKey;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
async extractContent(url: string): Promise<string | null> {
|
|
171
|
+
try {
|
|
172
|
+
const response = await axios.post(
|
|
173
|
+
`${this.baseUrl}/scrape`,
|
|
174
|
+
{ url },
|
|
175
|
+
{
|
|
176
|
+
headers: {
|
|
177
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
178
|
+
},
|
|
179
|
+
}
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
return response.data.data.markdown || response.data.data.content;
|
|
183
|
+
} catch (error) {
|
|
184
|
+
console.error('Firecrawl extraction error:', error);
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### 2. Playwright/Puppeteer Integration
|
|
192
|
+
|
|
193
|
+
For JavaScript-heavy sites that require browser rendering:
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
// src/integrations/browser-content.ts
|
|
197
|
+
import { chromium } from 'playwright';
|
|
198
|
+
|
|
199
|
+
export class BrowserContentExtractor {
|
|
200
|
+
async extractContent(url: string): Promise<string | null> {
|
|
201
|
+
let browser;
|
|
202
|
+
|
|
203
|
+
try {
|
|
204
|
+
browser = await chromium.launch({ headless: true });
|
|
205
|
+
const page = await browser.newPage();
|
|
206
|
+
|
|
207
|
+
// Set a reasonable timeout
|
|
208
|
+
await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
|
|
209
|
+
|
|
210
|
+
// Wait for content to load
|
|
211
|
+
await page.waitForSelector('body', { timeout: 10000 });
|
|
212
|
+
|
|
213
|
+
// Extract main content
|
|
214
|
+
const content = await page.evaluate(() => {
|
|
215
|
+
// Remove script and style elements
|
|
216
|
+
const scripts = document.querySelectorAll('script, style');
|
|
217
|
+
scripts.forEach(el => el.remove());
|
|
218
|
+
|
|
219
|
+
// Try to find main content areas
|
|
220
|
+
const mainContent =
|
|
221
|
+
document.querySelector('main')?.innerText ||
|
|
222
|
+
document.querySelector('article')?.innerText ||
|
|
223
|
+
document.querySelector('[role="main"]')?.innerText ||
|
|
224
|
+
document.body.innerText;
|
|
225
|
+
|
|
226
|
+
return mainContent;
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
return content;
|
|
230
|
+
} catch (error) {
|
|
231
|
+
console.error('Browser extraction error:', error);
|
|
232
|
+
return null;
|
|
233
|
+
} finally {
|
|
234
|
+
if (browser) {
|
|
235
|
+
await browser.close();
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Updating the Research Service
|
|
243
|
+
|
|
244
|
+
To integrate these providers into the ResearchService:
|
|
245
|
+
|
|
246
|
+
```typescript
|
|
247
|
+
// Update src/service.ts
|
|
248
|
+
|
|
249
|
+
import { TavilySearchProvider } from './integrations/tavily';
|
|
250
|
+
import { SerperSearchProvider } from './integrations/serper';
|
|
251
|
+
import { FirecrawlContentExtractor } from './integrations/firecrawl';
|
|
252
|
+
import { BrowserContentExtractor } from './integrations/browser-content';
|
|
253
|
+
|
|
254
|
+
export class ResearchService extends Service {
|
|
255
|
+
private searchProvider: TavilySearchProvider | SerperSearchProvider | null = null;
|
|
256
|
+
private contentExtractor: FirecrawlContentExtractor | BrowserContentExtractor | null = null;
|
|
257
|
+
|
|
258
|
+
constructor(runtime: IAgentRuntime, config?: ResearchConfig) {
|
|
259
|
+
super(runtime);
|
|
260
|
+
|
|
261
|
+
// Initialize search provider based on available API keys
|
|
262
|
+
const tavilyKey = runtime.getSetting('TAVILY_API_KEY');
|
|
263
|
+
const serperKey = runtime.getSetting('SERPER_API_KEY');
|
|
264
|
+
|
|
265
|
+
if (tavilyKey) {
|
|
266
|
+
this.searchProvider = new TavilySearchProvider(tavilyKey);
|
|
267
|
+
} else if (serperKey) {
|
|
268
|
+
this.searchProvider = new SerperSearchProvider(serperKey);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Initialize content extractor
|
|
272
|
+
const firecrawlKey = runtime.getSetting('FIRECRAWL_API_KEY');
|
|
273
|
+
if (firecrawlKey) {
|
|
274
|
+
this.contentExtractor = new FirecrawlContentExtractor(firecrawlKey);
|
|
275
|
+
} else {
|
|
276
|
+
this.contentExtractor = new BrowserContentExtractor();
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
private async performWebSearch(query: string): Promise<SearchResult[]> {
|
|
281
|
+
if (this.searchProvider) {
|
|
282
|
+
return this.searchProvider.search(query, this.researchConfig.maxSearchResults);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Fallback to mock results if no provider configured
|
|
286
|
+
return this.generateMockResults(query);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
private async fetchWebContent(url: string): Promise<string | null> {
|
|
290
|
+
if (this.contentExtractor) {
|
|
291
|
+
return this.contentExtractor.extractContent(url);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Fallback to mock content
|
|
295
|
+
return this.generateMockContent(url);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## Environment Configuration
|
|
301
|
+
|
|
302
|
+
Add these to your `.env` file:
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
# Search API Keys (choose one)
|
|
306
|
+
TAVILY_API_KEY=your_tavily_api_key_here
|
|
307
|
+
SERPER_API_KEY=your_serper_api_key_here
|
|
308
|
+
BRAVE_SEARCH_API_KEY=your_brave_api_key_here
|
|
309
|
+
|
|
310
|
+
# Content Extraction
|
|
311
|
+
FIRECRAWL_API_KEY=your_firecrawl_api_key_here
|
|
312
|
+
|
|
313
|
+
# Optional: Configure search preferences
|
|
314
|
+
RESEARCH_MAX_RESULTS=10
|
|
315
|
+
RESEARCH_TIMEOUT=300000
|
|
316
|
+
RESEARCH_ENABLE_CITATIONS=true
|
|
317
|
+
RESEARCH_LANGUAGE=en
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
## Production Deployment Considerations
|
|
321
|
+
|
|
322
|
+
### 1. Rate Limiting
|
|
323
|
+
Most APIs have rate limits. Implement rate limiting:
|
|
324
|
+
|
|
325
|
+
```typescript
|
|
326
|
+
import { RateLimiter } from 'limiter';
|
|
327
|
+
|
|
328
|
+
class RateLimitedSearchProvider {
|
|
329
|
+
private limiter: RateLimiter;
|
|
330
|
+
|
|
331
|
+
constructor(requestsPerMinute: number = 60) {
|
|
332
|
+
this.limiter = new RateLimiter({
|
|
333
|
+
tokensPerInterval: requestsPerMinute,
|
|
334
|
+
interval: 'minute',
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
async search(query: string): Promise<SearchResult[]> {
|
|
339
|
+
await this.limiter.removeTokens(1);
|
|
340
|
+
return this.performSearch(query);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
### 2. Caching
|
|
346
|
+
Cache search results to reduce API calls:
|
|
347
|
+
|
|
348
|
+
```typescript
|
|
349
|
+
import { LRUCache } from 'lru-cache';
|
|
350
|
+
|
|
351
|
+
class CachedSearchProvider {
|
|
352
|
+
private cache: LRUCache<string, SearchResult[]>;
|
|
353
|
+
|
|
354
|
+
constructor() {
|
|
355
|
+
this.cache = new LRUCache({
|
|
356
|
+
max: 1000,
|
|
357
|
+
ttl: 1000 * 60 * 60, // 1 hour
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
async search(query: string): Promise<SearchResult[]> {
|
|
362
|
+
const cached = this.cache.get(query);
|
|
363
|
+
if (cached) return cached;
|
|
364
|
+
|
|
365
|
+
const results = await this.performSearch(query);
|
|
366
|
+
this.cache.set(query, results);
|
|
367
|
+
return results;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
### 3. Error Handling and Fallbacks
|
|
373
|
+
Implement robust error handling with fallbacks:
|
|
374
|
+
|
|
375
|
+
```typescript
|
|
376
|
+
class ResilientSearchService {
|
|
377
|
+
private providers: SearchProvider[] = [];
|
|
378
|
+
|
|
379
|
+
async search(query: string): Promise<SearchResult[]> {
|
|
380
|
+
for (const provider of this.providers) {
|
|
381
|
+
try {
|
|
382
|
+
const results = await provider.search(query);
|
|
383
|
+
if (results.length > 0) return results;
|
|
384
|
+
} catch (error) {
|
|
385
|
+
console.error(`Provider ${provider.name} failed:`, error);
|
|
386
|
+
// Continue to next provider
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// All providers failed, return empty results
|
|
391
|
+
return [];
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
## Testing with Real APIs
|
|
397
|
+
|
|
398
|
+
Update your E2E tests to use real APIs:
|
|
399
|
+
|
|
400
|
+
```typescript
|
|
401
|
+
// src/tests/real-api-test.ts
|
|
402
|
+
import { ResearchService } from '../service';
|
|
403
|
+
|
|
404
|
+
describe('Real API Integration Tests', () => {
|
|
405
|
+
let service: ResearchService;
|
|
406
|
+
|
|
407
|
+
beforeAll(() => {
|
|
408
|
+
// Ensure API keys are set in environment
|
|
409
|
+
if (!process.env.TAVILY_API_KEY && !process.env.SERPER_API_KEY) {
|
|
410
|
+
throw new Error('No search API keys configured for testing');
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
service = new ResearchService(runtime);
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
it('should perform real web search', async () => {
|
|
417
|
+
const project = await service.createResearchProject(
|
|
418
|
+
'Latest TypeScript features in 2024'
|
|
419
|
+
);
|
|
420
|
+
|
|
421
|
+
// Wait for research to complete
|
|
422
|
+
await waitForCompletion(project.id, 120000); // 2 minutes
|
|
423
|
+
|
|
424
|
+
const completed = await service.getProject(project.id);
|
|
425
|
+
expect(completed.sources.length).toBeGreaterThan(0);
|
|
426
|
+
expect(completed.findings.length).toBeGreaterThan(0);
|
|
427
|
+
expect(completed.report).toBeDefined();
|
|
428
|
+
});
|
|
429
|
+
});
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
## Monitoring and Observability
|
|
433
|
+
|
|
434
|
+
Add logging and metrics:
|
|
435
|
+
|
|
436
|
+
```typescript
|
|
437
|
+
import { Logger } from 'winston';
|
|
438
|
+
import { Counter, Histogram } from 'prom-client';
|
|
439
|
+
|
|
440
|
+
const searchCounter = new Counter({
|
|
441
|
+
name: 'research_searches_total',
|
|
442
|
+
help: 'Total number of searches performed',
|
|
443
|
+
labelNames: ['provider', 'status'],
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
const searchDuration = new Histogram({
|
|
447
|
+
name: 'research_search_duration_seconds',
|
|
448
|
+
help: 'Duration of search operations',
|
|
449
|
+
labelNames: ['provider'],
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
class MonitoredSearchProvider {
|
|
453
|
+
async search(query: string): Promise<SearchResult[]> {
|
|
454
|
+
const timer = searchDuration.startTimer({ provider: this.name });
|
|
455
|
+
|
|
456
|
+
try {
|
|
457
|
+
const results = await this.performSearch(query);
|
|
458
|
+
searchCounter.inc({ provider: this.name, status: 'success' });
|
|
459
|
+
return results;
|
|
460
|
+
} catch (error) {
|
|
461
|
+
searchCounter.inc({ provider: this.name, status: 'error' });
|
|
462
|
+
throw error;
|
|
463
|
+
} finally {
|
|
464
|
+
timer();
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
## Cost Optimization
|
|
471
|
+
|
|
472
|
+
Different APIs have different pricing models:
|
|
473
|
+
|
|
474
|
+
- **Tavily**: $0.001 per search (as of 2024)
|
|
475
|
+
- **Serper**: $0.05 per 100 searches
|
|
476
|
+
- **Firecrawl**: $0.001 per page scraped
|
|
477
|
+
|
|
478
|
+
Implement cost tracking and budgets:
|
|
479
|
+
|
|
480
|
+
```typescript
|
|
481
|
+
class CostAwareSearchService {
|
|
482
|
+
private monthlyBudget: number;
|
|
483
|
+
private currentSpend: number = 0;
|
|
484
|
+
|
|
485
|
+
async search(query: string): Promise<SearchResult[]> {
|
|
486
|
+
const estimatedCost = this.calculateSearchCost();
|
|
487
|
+
|
|
488
|
+
if (this.currentSpend + estimatedCost > this.monthlyBudget) {
|
|
489
|
+
throw new Error('Monthly search budget exceeded');
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const results = await this.performSearch(query);
|
|
493
|
+
this.currentSpend += estimatedCost;
|
|
494
|
+
|
|
495
|
+
return results;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
```
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Browserbase Integration for Research Plugin
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The Research Plugin automatically detects and uses browserbase when available, providing enhanced web scraping capabilities that bypass blocks and CAPTCHAs.
|
|
6
|
+
|
|
7
|
+
## Setup
|
|
8
|
+
|
|
9
|
+
1. Install both plugins:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
bun add @elizaos/plugin-research @elizaos/plugin-stagehand
|
|
13
|
+
# or
|
|
14
|
+
npm install @elizaos/plugin-research @elizaos/plugin-stagehand
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
2. Configure environment variables:
|
|
18
|
+
|
|
19
|
+
```env
|
|
20
|
+
# Browserbase Configuration (Optional but recommended)
|
|
21
|
+
BROWSERBASE_API_KEY=your_api_key
|
|
22
|
+
BROWSERBASE_PROJECT_ID=your_project_id
|
|
23
|
+
|
|
24
|
+
# Optional: Use specific AI model for extraction
|
|
25
|
+
OPENAI_API_KEY=your_openai_key
|
|
26
|
+
# or
|
|
27
|
+
ANTHROPIC_API_KEY=your_anthropic_key
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
3. Load both plugins in your ElizaOS config:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { researchPlugin } from '@elizaos/plugin-research';
|
|
34
|
+
import { stagehandPlugin } from '@elizaos/plugin-stagehand';
|
|
35
|
+
|
|
36
|
+
const config = {
|
|
37
|
+
plugins: [
|
|
38
|
+
stagehandPlugin, // Load browserbase first
|
|
39
|
+
researchPlugin, // Research will auto-detect it
|
|
40
|
+
],
|
|
41
|
+
};
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## How It Works
|
|
45
|
+
|
|
46
|
+
When browserbase is available, the research plugin will:
|
|
47
|
+
|
|
48
|
+
1. **Use Stagehand Google Search** instead of DuckDuckGo for better results
|
|
49
|
+
2. **Use Stagehand Content Extraction** with AI-powered extraction instead of Playwright
|
|
50
|
+
3. **Bypass blocks and CAPTCHAs** automatically
|
|
51
|
+
|
|
52
|
+
## Benefits
|
|
53
|
+
|
|
54
|
+
### Without Browserbase (Default)
|
|
55
|
+
|
|
56
|
+
- Uses DuckDuckGo (free but limited)
|
|
57
|
+
- Uses Playwright (often gets blocked)
|
|
58
|
+
- Timeouts on many sites (Forbes, WSJ, etc.)
|
|
59
|
+
- No CAPTCHA solving
|
|
60
|
+
|
|
61
|
+
### With Browserbase
|
|
62
|
+
|
|
63
|
+
- Uses Google search via Stagehand
|
|
64
|
+
- AI-powered content extraction
|
|
65
|
+
- Bypasses most anti-bot measures
|
|
66
|
+
- Handles CAPTCHAs automatically
|
|
67
|
+
- Much faster and more reliable
|
|
68
|
+
|
|
69
|
+
## Example Usage
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
// The agent will automatically use browserbase if available
|
|
73
|
+
const response = await agent.sendMessage({
|
|
74
|
+
text: 'start_research Latest AI breakthroughs in 2024',
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// You'll see in logs:
|
|
78
|
+
// "Using Stagehand Google search provider"
|
|
79
|
+
// "Using Stagehand content extractor (via browserbase)"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Fallback Chain
|
|
83
|
+
|
|
84
|
+
The research plugin has a smart fallback system:
|
|
85
|
+
|
|
86
|
+
1. **Search Providers** (in order):
|
|
87
|
+
|
|
88
|
+
- Tavily (if API key provided)
|
|
89
|
+
- Serper (if API key provided)
|
|
90
|
+
- Stagehand/Google (if browserbase available)
|
|
91
|
+
- DuckDuckGo (always available, no API needed)
|
|
92
|
+
|
|
93
|
+
2. **Content Extractors** (in order):
|
|
94
|
+
- Stagehand (if browserbase available)
|
|
95
|
+
- Firecrawl (if API key provided)
|
|
96
|
+
- Playwright (always available but often blocked)
|
|
97
|
+
|
|
98
|
+
## Testing
|
|
99
|
+
|
|
100
|
+
Run the included test to verify browserbase integration:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
cd packages/plugin-research
|
|
104
|
+
./test-research-live.sh
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
You should see:
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
📋 Configuration:
|
|
111
|
+
Search Provider: Stagehand/Google
|
|
112
|
+
Content Extractor: Stagehand/Browserbase
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Troubleshooting
|
|
116
|
+
|
|
117
|
+
### "No stagehand service found"
|
|
118
|
+
|
|
119
|
+
- Ensure stagehandPlugin is loaded BEFORE researchPlugin
|
|
120
|
+
- Check that browserbase plugin initialized correctly
|
|
121
|
+
|
|
122
|
+
### Still getting blocked
|
|
123
|
+
|
|
124
|
+
- Verify BROWSERBASE_API_KEY is set correctly
|
|
125
|
+
- Check browserbase dashboard for usage/errors
|
|
126
|
+
- Some sites may still block even with browserbase
|
|
127
|
+
|
|
128
|
+
### Slow performance
|
|
129
|
+
|
|
130
|
+
- Browserbase sessions take time to initialize
|
|
131
|
+
- Consider caching results for repeated queries
|
|
132
|
+
- Use fewer search results (maxSearchResults: 5)
|