@elizaos/plugin-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +400 -0
  2. package/dist/index.cjs +9366 -0
  3. package/dist/index.cjs.map +1 -0
  4. package/dist/index.js +9284 -0
  5. package/dist/index.js.map +1 -0
  6. package/package.json +80 -0
  7. package/src/__tests__/action-chaining.test.ts +532 -0
  8. package/src/__tests__/actions.test.ts +118 -0
  9. package/src/__tests__/cache-rate-limiter.test.ts +303 -0
  10. package/src/__tests__/content-extractors.test.ts +26 -0
  11. package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
  12. package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
  13. package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
  14. package/src/__tests__/e2e.test.ts +1870 -0
  15. package/src/__tests__/multi-benchmark-runner.ts +427 -0
  16. package/src/__tests__/providers.test.ts +156 -0
  17. package/src/__tests__/real-world.e2e.test.ts +788 -0
  18. package/src/__tests__/research-scenarios.test.ts +755 -0
  19. package/src/__tests__/research.e2e.test.ts +704 -0
  20. package/src/__tests__/research.test.ts +174 -0
  21. package/src/__tests__/search-providers.test.ts +174 -0
  22. package/src/__tests__/single-benchmark-runner.ts +735 -0
  23. package/src/__tests__/test-search-providers.ts +171 -0
  24. package/src/__tests__/verify-apis.test.ts +82 -0
  25. package/src/actions.ts +1677 -0
  26. package/src/benchmark/deepresearch-benchmark.ts +369 -0
  27. package/src/evaluation/research-evaluator.ts +444 -0
  28. package/src/examples/api-integration.md +498 -0
  29. package/src/examples/browserbase-integration.md +132 -0
  30. package/src/examples/debug-research-query.ts +162 -0
  31. package/src/examples/defi-code-scenarios.md +536 -0
  32. package/src/examples/defi-implementation-guide.md +454 -0
  33. package/src/examples/eliza-research-example.ts +142 -0
  34. package/src/examples/fix-renewable-energy-research.ts +209 -0
  35. package/src/examples/research-scenarios.md +408 -0
  36. package/src/examples/run-complete-renewable-research.ts +303 -0
  37. package/src/examples/run-deep-research.ts +352 -0
  38. package/src/examples/run-logged-research.ts +304 -0
  39. package/src/examples/run-real-research.ts +151 -0
  40. package/src/examples/save-research-output.ts +133 -0
  41. package/src/examples/test-file-logging.ts +199 -0
  42. package/src/examples/test-real-research.ts +67 -0
  43. package/src/examples/test-renewable-energy-research.ts +229 -0
  44. package/src/index.ts +28 -0
  45. package/src/integrations/cache.ts +128 -0
  46. package/src/integrations/content-extractors/firecrawl.ts +314 -0
  47. package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
  48. package/src/integrations/content-extractors/playwright.ts +420 -0
  49. package/src/integrations/factory.ts +419 -0
  50. package/src/integrations/index.ts +18 -0
  51. package/src/integrations/rate-limiter.ts +181 -0
  52. package/src/integrations/search-providers/academic.ts +290 -0
  53. package/src/integrations/search-providers/exa.ts +205 -0
  54. package/src/integrations/search-providers/npm.ts +330 -0
  55. package/src/integrations/search-providers/pypi.ts +211 -0
  56. package/src/integrations/search-providers/serpapi.ts +277 -0
  57. package/src/integrations/search-providers/serper.ts +358 -0
  58. package/src/integrations/search-providers/stagehand-google.ts +87 -0
  59. package/src/integrations/search-providers/tavily.ts +187 -0
  60. package/src/processing/relevance-analyzer.ts +353 -0
  61. package/src/processing/research-logger.ts +450 -0
  62. package/src/processing/result-processor.ts +372 -0
  63. package/src/prompts/research-prompts.ts +419 -0
  64. package/src/providers/cacheProvider.ts +164 -0
  65. package/src/providers.ts +173 -0
  66. package/src/service.ts +2588 -0
  67. package/src/services/swe-bench.ts +286 -0
  68. package/src/strategies/research-strategies.ts +790 -0
  69. package/src/types/pdf-parse.d.ts +34 -0
  70. package/src/types.ts +551 -0
  71. package/src/verification/claim-verifier.ts +443 -0
@@ -0,0 +1,450 @@
1
+ import { elizaLogger, IAgentRuntime } from '@elizaos/core';
2
+ import { SearchResult, ResearchSource, ResearchFinding, ResearchProject } from '../types';
3
+ import { RelevanceScore, RelevanceAnalysis } from './relevance-analyzer';
4
+ import fs from 'fs/promises';
5
+ import path from 'path';
6
+
7
+ export interface SearchLog {
8
+ timestamp: number;
9
+ projectId: string;
10
+ query: string;
11
+ originalQuery: string;
12
+ provider: string;
13
+ resultsCount: number;
14
+ results: Array<{
15
+ title: string;
16
+ url: string;
17
+ snippet: string;
18
+ relevanceScore?: RelevanceScore;
19
+ processed: boolean;
20
+ contentExtracted: boolean;
21
+ findingsExtracted: number;
22
+ }>;
23
+ }
24
+
25
+ export interface ContentExtractionLog {
26
+ timestamp: number;
27
+ projectId: string;
28
+ url: string;
29
+ sourceTitle: string;
30
+ method: string; // 'stagehand', 'firecrawl', 'playwright', 'pdf'
31
+ success: boolean;
32
+ contentLength: number;
33
+ error?: string;
34
+ relevanceScore?: RelevanceScore;
35
+ }
36
+
37
+ export interface FindingExtractionLog {
38
+ timestamp: number;
39
+ projectId: string;
40
+ sourceUrl: string;
41
+ originalQuery: string;
42
+ contentLength: number;
43
+ findingsExtracted: number;
44
+ findings: Array<{
45
+ content: string;
46
+ relevance: number;
47
+ confidence: number;
48
+ category: string;
49
+ relevanceScore?: RelevanceScore;
50
+ }>;
51
+ queryAlignment: number;
52
+ }
53
+
54
+ export interface ResearchSession {
55
+ projectId: string;
56
+ originalQuery: string;
57
+ startTime: number;
58
+ queryAnalysis: RelevanceAnalysis;
59
+ searchLogs: SearchLog[];
60
+ extractionLogs: ContentExtractionLog[];
61
+ findingLogs: FindingExtractionLog[];
62
+ summary: {
63
+ totalSearches: number;
64
+ totalResults: number;
65
+ relevantResults: number;
66
+ totalSources: number;
67
+ successfulExtractions: number;
68
+ totalFindings: number;
69
+ relevantFindings: number;
70
+ overallRelevance: number;
71
+ gaps: string[];
72
+ recommendations: string[];
73
+ };
74
+ }
75
+
76
+ /**
77
+ * Comprehensive logging system for research operations to track relevance throughout the pipeline
78
+ */
79
+ export class ResearchLogger {
80
+ private sessions: Map<string, ResearchSession> = new Map();
81
+ private logsDir: string;
82
+
83
+ constructor(private runtime: IAgentRuntime) {
84
+ this.logsDir = path.join(process.cwd(), 'research_logs');
85
+ }
86
+
87
+ async initializeSession(
88
+ projectId: string,
89
+ originalQuery: string,
90
+ queryAnalysis: RelevanceAnalysis
91
+ ): Promise<void> {
92
+ elizaLogger.info(`[ResearchLogger] Initializing session for project: ${projectId}`);
93
+
94
+ const session: ResearchSession = {
95
+ projectId,
96
+ originalQuery,
97
+ startTime: Date.now(),
98
+ queryAnalysis,
99
+ searchLogs: [],
100
+ extractionLogs: [],
101
+ findingLogs: [],
102
+ summary: {
103
+ totalSearches: 0,
104
+ totalResults: 0,
105
+ relevantResults: 0,
106
+ totalSources: 0,
107
+ successfulExtractions: 0,
108
+ totalFindings: 0,
109
+ relevantFindings: 0,
110
+ overallRelevance: 0,
111
+ gaps: [],
112
+ recommendations: []
113
+ }
114
+ };
115
+
116
+ this.sessions.set(projectId, session);
117
+
118
+ // Create logs directory
119
+ await fs.mkdir(this.logsDir, { recursive: true });
120
+ }
121
+
122
+ async logSearch(
123
+ projectId: string,
124
+ query: string,
125
+ provider: string,
126
+ results: SearchResult[],
127
+ relevanceScores?: Map<string, RelevanceScore>
128
+ ): Promise<void> {
129
+ const session = this.sessions.get(projectId);
130
+ if (!session) return;
131
+
132
+ elizaLogger.info(`[ResearchLogger] Logging search: ${query} (${results.length} results)`);
133
+
134
+ const searchLog: SearchLog = {
135
+ timestamp: Date.now(),
136
+ projectId,
137
+ query,
138
+ originalQuery: session.originalQuery,
139
+ provider,
140
+ resultsCount: results.length,
141
+ results: results.map(result => ({
142
+ title: result.title,
143
+ url: result.url,
144
+ snippet: result.snippet,
145
+ relevanceScore: relevanceScores?.get(result.url),
146
+ processed: false,
147
+ contentExtracted: false,
148
+ findingsExtracted: 0
149
+ }))
150
+ };
151
+
152
+ session.searchLogs.push(searchLog);
153
+ session.summary.totalSearches++;
154
+ session.summary.totalResults += results.length;
155
+
156
+ // Count relevant results (score >= 0.6)
157
+ const relevantCount = searchLog.results.filter(r => (r.relevanceScore?.score || 0) >= 0.6).length;
158
+ session.summary.relevantResults += relevantCount;
159
+
160
+ elizaLogger.info(`[ResearchLogger] Search logged: ${relevantCount}/${results.length} relevant results`);
161
+ }
162
+
163
+ async logContentExtraction(
164
+ projectId: string,
165
+ url: string,
166
+ sourceTitle: string,
167
+ method: string,
168
+ success: boolean,
169
+ contentLength: number,
170
+ error?: string,
171
+ relevanceScore?: RelevanceScore
172
+ ): Promise<void> {
173
+ const session = this.sessions.get(projectId);
174
+ if (!session) return;
175
+
176
+ elizaLogger.info(`[ResearchLogger] Logging content extraction: ${url} (${success ? 'success' : 'failed'})`);
177
+
178
+ const extractionLog: ContentExtractionLog = {
179
+ timestamp: Date.now(),
180
+ projectId,
181
+ url,
182
+ sourceTitle,
183
+ method,
184
+ success,
185
+ contentLength,
186
+ error,
187
+ relevanceScore
188
+ };
189
+
190
+ session.extractionLogs.push(extractionLog);
191
+ session.summary.totalSources++;
192
+
193
+ if (success) {
194
+ session.summary.successfulExtractions++;
195
+ }
196
+
197
+ // Update search log
198
+ for (const searchLog of session.searchLogs) {
199
+ const result = searchLog.results.find(r => r.url === url);
200
+ if (result) {
201
+ result.processed = true;
202
+ result.contentExtracted = success;
203
+ break;
204
+ }
205
+ }
206
+
207
+ elizaLogger.debug(`[ResearchLogger] Content extraction logged: ${sourceTitle} - ${contentLength} chars`);
208
+ }
209
+
210
+ async logFindingExtraction(
211
+ projectId: string,
212
+ sourceUrl: string,
213
+ contentLength: number,
214
+ findings: Array<{
215
+ content: string;
216
+ relevance: number;
217
+ confidence: number;
218
+ category: string;
219
+ }>,
220
+ findingRelevanceScores?: Map<string, RelevanceScore>
221
+ ): Promise<void> {
222
+ const session = this.sessions.get(projectId);
223
+ if (!session) return;
224
+
225
+ elizaLogger.info(`[ResearchLogger] Logging finding extraction: ${sourceUrl} (${findings.length} findings)`);
226
+
227
+ const findingsWithScores = findings.map(finding => ({
228
+ ...finding,
229
+ relevanceScore: findingRelevanceScores?.get(finding.content)
230
+ }));
231
+
232
+ const findingLog: FindingExtractionLog = {
233
+ timestamp: Date.now(),
234
+ projectId,
235
+ sourceUrl,
236
+ originalQuery: session.originalQuery,
237
+ contentLength,
238
+ findingsExtracted: findings.length,
239
+ findings: findingsWithScores,
240
+ queryAlignment: findingsWithScores.reduce((sum, f) => sum + (f.relevanceScore?.queryAlignment || f.relevance), 0) / Math.max(findings.length, 1)
241
+ };
242
+
243
+ session.findingLogs.push(findingLog);
244
+ session.summary.totalFindings += findings.length;
245
+
246
+ // Count relevant findings (relevance >= 0.7)
247
+ const relevantCount = findings.filter(f => f.relevance >= 0.7).length;
248
+ session.summary.relevantFindings += relevantCount;
249
+
250
+ // Update search log
251
+ for (const searchLog of session.searchLogs) {
252
+ const result = searchLog.results.find(r => r.url === sourceUrl);
253
+ if (result) {
254
+ result.findingsExtracted = findings.length;
255
+ break;
256
+ }
257
+ }
258
+
259
+ elizaLogger.info(`[ResearchLogger] Finding extraction logged: ${relevantCount}/${findings.length} relevant findings`);
260
+ }
261
+
262
+ async finalizeSession(
263
+ projectId: string,
264
+ gaps: string[],
265
+ recommendations: string[]
266
+ ): Promise<ResearchSession> {
267
+ const session = this.sessions.get(projectId);
268
+ if (!session) throw new Error('Session not found');
269
+
270
+ elizaLogger.info(`[ResearchLogger] Finalizing session for project: ${projectId}`);
271
+
272
+ // Calculate overall relevance
273
+ const totalRelevanceScore = session.findingLogs.reduce((sum, log) => sum + log.queryAlignment, 0);
274
+ session.summary.overallRelevance = totalRelevanceScore / Math.max(session.findingLogs.length, 1);
275
+
276
+ session.summary.gaps = gaps;
277
+ session.summary.recommendations = recommendations;
278
+
279
+ // Save detailed log to file
280
+ await this.saveSessionLog(session);
281
+
282
+ // Log summary
283
+ elizaLogger.info(`[ResearchLogger] Session summary for ${projectId}:`, {
284
+ duration: Date.now() - session.startTime,
285
+ searches: session.summary.totalSearches,
286
+ results: session.summary.totalResults,
287
+ relevantResults: session.summary.relevantResults,
288
+ sources: session.summary.totalSources,
289
+ successfulExtractions: session.summary.successfulExtractions,
290
+ findings: session.summary.totalFindings,
291
+ relevantFindings: session.summary.relevantFindings,
292
+ overallRelevance: session.summary.overallRelevance,
293
+ gaps: gaps.length,
294
+ recommendations: recommendations.length
295
+ });
296
+
297
+ return session;
298
+ }
299
+
300
+ private async saveSessionLog(session: ResearchSession): Promise<void> {
301
+ try {
302
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
303
+ const sanitizedQuery = session.originalQuery
304
+ .toLowerCase()
305
+ .replace(/[^a-z0-9]+/g, '-')
306
+ .substring(0, 50);
307
+
308
+ const filename = `${timestamp}_${sanitizedQuery}_research-log.json`;
309
+ const filepath = path.join(this.logsDir, filename);
310
+
311
+ // Create comprehensive log with all details
312
+ const logData = {
313
+ ...session,
314
+ metadata: {
315
+ savedAt: Date.now(),
316
+ version: '1.0',
317
+ description: 'Comprehensive research session log with relevance tracking'
318
+ },
319
+ analysis: {
320
+ relevanceByPhase: {
321
+ searchResults: session.summary.relevantResults / Math.max(session.summary.totalResults, 1),
322
+ contentExtraction: session.summary.successfulExtractions / Math.max(session.summary.totalSources, 1),
323
+ findingExtraction: session.summary.relevantFindings / Math.max(session.summary.totalFindings, 1)
324
+ },
325
+ bottlenecks: this.identifyBottlenecks(session),
326
+ recommendations: this.generateTechnicalRecommendations(session)
327
+ }
328
+ };
329
+
330
+ await fs.writeFile(filepath, JSON.stringify(logData, null, 2), 'utf-8');
331
+ elizaLogger.info(`[ResearchLogger] Session log saved to: ${filepath}`);
332
+
333
+ // Also create a summary report
334
+ const summaryPath = filepath.replace('.json', '_summary.md');
335
+ const summaryContent = this.generateSummaryReport(session);
336
+ await fs.writeFile(summaryPath, summaryContent, 'utf-8');
337
+ elizaLogger.info(`[ResearchLogger] Summary report saved to: ${summaryPath}`);
338
+
339
+ } catch (error) {
340
+ elizaLogger.error('[ResearchLogger] Failed to save session log:', error);
341
+ }
342
+ }
343
+
344
+ private identifyBottlenecks(session: ResearchSession): string[] {
345
+ const bottlenecks: string[] = [];
346
+
347
+ const relevanceRatio = session.summary.relevantResults / Math.max(session.summary.totalResults, 1);
348
+ const extractionRatio = session.summary.successfulExtractions / Math.max(session.summary.totalSources, 1);
349
+ const findingRatio = session.summary.relevantFindings / Math.max(session.summary.totalFindings, 1);
350
+
351
+ if (relevanceRatio < 0.5) {
352
+ bottlenecks.push('Low search result relevance - improve query generation or search provider selection');
353
+ }
354
+
355
+ if (extractionRatio < 0.7) {
356
+ bottlenecks.push('Poor content extraction success rate - improve extraction methods or fallbacks');
357
+ }
358
+
359
+ if (findingRatio < 0.6) {
360
+ bottlenecks.push('Low finding relevance - improve extraction prompts or relevance filtering');
361
+ }
362
+
363
+ if (session.summary.overallRelevance < 0.7) {
364
+ bottlenecks.push('Overall low relevance - review entire pipeline for query alignment');
365
+ }
366
+
367
+ return bottlenecks;
368
+ }
369
+
370
+ private generateTechnicalRecommendations(session: ResearchSession): string[] {
371
+ const recommendations: string[] = [];
372
+
373
+ // Search quality recommendations
374
+ const avgResultsPerSearch = session.summary.totalResults / Math.max(session.summary.totalSearches, 1);
375
+ if (avgResultsPerSearch < 10) {
376
+ recommendations.push('Increase search breadth - too few results per search');
377
+ }
378
+
379
+ // Finding quality recommendations
380
+ const avgFindingsPerSource = session.summary.totalFindings / Math.max(session.summary.successfulExtractions, 1);
381
+ if (avgFindingsPerSource < 2) {
382
+ recommendations.push('Improve finding extraction - too few findings per source');
383
+ }
384
+
385
+ // Relevance recommendations
386
+ if (session.summary.overallRelevance < 0.8) {
387
+ recommendations.push('Enhance relevance filtering throughout pipeline');
388
+ }
389
+
390
+ return recommendations;
391
+ }
392
+
393
+ private generateSummaryReport(session: ResearchSession): string {
394
+ const duration = Date.now() - session.startTime;
395
+ const durationMin = Math.round(duration / 60000);
396
+
397
+ return `# Research Session Summary
398
+
399
+ ## Query
400
+ **Original Query:** ${session.originalQuery}
401
+
402
+ **Duration:** ${durationMin} minutes
403
+
404
+ ## Query Analysis
405
+ - **Intent:** ${session.queryAnalysis.queryIntent}
406
+ - **Key Topics:** ${session.queryAnalysis.keyTopics.join(', ')}
407
+ - **Required Elements:** ${session.queryAnalysis.requiredElements.join(', ')}
408
+
409
+ ## Results Summary
410
+ - **Searches:** ${session.summary.totalSearches}
411
+ - **Total Results:** ${session.summary.totalResults}
412
+ - **Relevant Results:** ${session.summary.relevantResults} (${(session.summary.relevantResults / Math.max(session.summary.totalResults, 1) * 100).toFixed(1)}%)
413
+ - **Sources Processed:** ${session.summary.totalSources}
414
+ - **Successful Extractions:** ${session.summary.successfulExtractions} (${(session.summary.successfulExtractions / Math.max(session.summary.totalSources, 1) * 100).toFixed(1)}%)
415
+ - **Total Findings:** ${session.summary.totalFindings}
416
+ - **Relevant Findings:** ${session.summary.relevantFindings} (${(session.summary.relevantFindings / Math.max(session.summary.totalFindings, 1) * 100).toFixed(1)}%)
417
+
418
+ ## Relevance Score
419
+ **Overall Relevance:** ${(session.summary.overallRelevance * 100).toFixed(1)}%
420
+
421
+ ## Identified Gaps
422
+ ${session.summary.gaps.map(gap => `- ${gap}`).join('\n')}
423
+
424
+ ## Recommendations
425
+ ${session.summary.recommendations.map(rec => `- ${rec}`).join('\n')}
426
+
427
+ ## Search Details
428
+ ${session.searchLogs.map((log, i) => `
429
+ ### Search ${i + 1}: ${log.query}
430
+ - Provider: ${log.provider}
431
+ - Results: ${log.resultsCount}
432
+ - Relevant: ${log.results.filter(r => (r.relevanceScore?.score || 0) >= 0.6).length}
433
+ `).join('\n')}
434
+
435
+ ## Content Extraction
436
+ ${session.extractionLogs.map((log, i) => `
437
+ ### Extraction ${i + 1}: ${log.sourceTitle}
438
+ - URL: ${log.url}
439
+ - Method: ${log.method}
440
+ - Success: ${log.success ? 'Yes' : 'No'}
441
+ - Content Length: ${log.contentLength} chars
442
+ ${log.error ? `- Error: ${log.error}` : ''}
443
+ `).join('\n')}
444
+ `;
445
+ }
446
+
447
+ getSessionSummary(projectId: string): ResearchSession | undefined {
448
+ return this.sessions.get(projectId);
449
+ }
450
+ }