@elizaos/plugin-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +400 -0
  2. package/dist/index.cjs +9366 -0
  3. package/dist/index.cjs.map +1 -0
  4. package/dist/index.js +9284 -0
  5. package/dist/index.js.map +1 -0
  6. package/package.json +80 -0
  7. package/src/__tests__/action-chaining.test.ts +532 -0
  8. package/src/__tests__/actions.test.ts +118 -0
  9. package/src/__tests__/cache-rate-limiter.test.ts +303 -0
  10. package/src/__tests__/content-extractors.test.ts +26 -0
  11. package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
  12. package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
  13. package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
  14. package/src/__tests__/e2e.test.ts +1870 -0
  15. package/src/__tests__/multi-benchmark-runner.ts +427 -0
  16. package/src/__tests__/providers.test.ts +156 -0
  17. package/src/__tests__/real-world.e2e.test.ts +788 -0
  18. package/src/__tests__/research-scenarios.test.ts +755 -0
  19. package/src/__tests__/research.e2e.test.ts +704 -0
  20. package/src/__tests__/research.test.ts +174 -0
  21. package/src/__tests__/search-providers.test.ts +174 -0
  22. package/src/__tests__/single-benchmark-runner.ts +735 -0
  23. package/src/__tests__/test-search-providers.ts +171 -0
  24. package/src/__tests__/verify-apis.test.ts +82 -0
  25. package/src/actions.ts +1677 -0
  26. package/src/benchmark/deepresearch-benchmark.ts +369 -0
  27. package/src/evaluation/research-evaluator.ts +444 -0
  28. package/src/examples/api-integration.md +498 -0
  29. package/src/examples/browserbase-integration.md +132 -0
  30. package/src/examples/debug-research-query.ts +162 -0
  31. package/src/examples/defi-code-scenarios.md +536 -0
  32. package/src/examples/defi-implementation-guide.md +454 -0
  33. package/src/examples/eliza-research-example.ts +142 -0
  34. package/src/examples/fix-renewable-energy-research.ts +209 -0
  35. package/src/examples/research-scenarios.md +408 -0
  36. package/src/examples/run-complete-renewable-research.ts +303 -0
  37. package/src/examples/run-deep-research.ts +352 -0
  38. package/src/examples/run-logged-research.ts +304 -0
  39. package/src/examples/run-real-research.ts +151 -0
  40. package/src/examples/save-research-output.ts +133 -0
  41. package/src/examples/test-file-logging.ts +199 -0
  42. package/src/examples/test-real-research.ts +67 -0
  43. package/src/examples/test-renewable-energy-research.ts +229 -0
  44. package/src/index.ts +28 -0
  45. package/src/integrations/cache.ts +128 -0
  46. package/src/integrations/content-extractors/firecrawl.ts +314 -0
  47. package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
  48. package/src/integrations/content-extractors/playwright.ts +420 -0
  49. package/src/integrations/factory.ts +419 -0
  50. package/src/integrations/index.ts +18 -0
  51. package/src/integrations/rate-limiter.ts +181 -0
  52. package/src/integrations/search-providers/academic.ts +290 -0
  53. package/src/integrations/search-providers/exa.ts +205 -0
  54. package/src/integrations/search-providers/npm.ts +330 -0
  55. package/src/integrations/search-providers/pypi.ts +211 -0
  56. package/src/integrations/search-providers/serpapi.ts +277 -0
  57. package/src/integrations/search-providers/serper.ts +358 -0
  58. package/src/integrations/search-providers/stagehand-google.ts +87 -0
  59. package/src/integrations/search-providers/tavily.ts +187 -0
  60. package/src/processing/relevance-analyzer.ts +353 -0
  61. package/src/processing/research-logger.ts +450 -0
  62. package/src/processing/result-processor.ts +372 -0
  63. package/src/prompts/research-prompts.ts +419 -0
  64. package/src/providers/cacheProvider.ts +164 -0
  65. package/src/providers.ts +173 -0
  66. package/src/service.ts +2588 -0
  67. package/src/services/swe-bench.ts +286 -0
  68. package/src/strategies/research-strategies.ts +790 -0
  69. package/src/types/pdf-parse.d.ts +34 -0
  70. package/src/types.ts +551 -0
  71. package/src/verification/claim-verifier.ts +443 -0
@@ -0,0 +1,443 @@
1
+ import { IAgentRuntime, elizaLogger } from '@elizaos/core';
2
+ import crypto from 'crypto';
3
+ import { RESEARCH_PROMPTS, formatPrompt, getPromptConfig } from '../prompts/research-prompts';
4
+ import {
5
+ FactualClaim,
6
+ ResearchSource,
7
+ VerificationStatus
8
+ } from '../types';
9
+
10
+ export interface VerificationEvidence {
11
+ sourceUrl: string;
12
+ relevantExcerpt: string;
13
+ supports: boolean;
14
+ confidence: number;
15
+ reasoning: string;
16
+ }
17
+
18
+ export interface CrossReferenceResult {
19
+ claim: FactualClaim;
20
+ primaryEvidence: VerificationEvidence;
21
+ corroboratingEvidence: VerificationEvidence[];
22
+ contradictingEvidence: VerificationEvidence[];
23
+ overallVerificationStatus: VerificationStatus;
24
+ aggregateConfidence: number;
25
+ consensusLevel: 'strong' | 'moderate' | 'weak' | 'disputed';
26
+ }
27
+
28
+ export interface SourceReliability {
29
+ domain: string;
30
+ score: number;
31
+ factors: {
32
+ peerReviewed: boolean;
33
+ authorCredentials: boolean;
34
+ publicationReputation: number;
35
+ citationCount: number;
36
+ };
37
+ }
38
+
39
+ /**
40
+ * Advanced claim verification system that actually checks sources
41
+ */
42
+ export class ClaimVerifier {
43
+ private verificationCache = new Map<string, CrossReferenceResult>();
44
+ private sourceReliabilityCache = new Map<string, SourceReliability>();
45
+
46
+ constructor(
47
+ private runtime: IAgentRuntime,
48
+ private contentExtractor: any
49
+ ) {}
50
+
51
+ /**
52
+ * Verify a claim by retrieving actual source content and cross-referencing
53
+ */
54
+ async verifyClaim(
55
+ claim: FactualClaim,
56
+ primarySource: ResearchSource,
57
+ allSources: ResearchSource[]
58
+ ): Promise<CrossReferenceResult> {
59
+ const cacheKey = this.generateCacheKey(claim);
60
+
61
+ // Check cache first
62
+ if (this.verificationCache.has(cacheKey)) {
63
+ return this.verificationCache.get(cacheKey)!;
64
+ }
65
+
66
+ elizaLogger.info(`[ClaimVerifier] Verifying claim: "${claim.statement}"`);
67
+
68
+ // Step 1: Verify against primary source
69
+ const primaryEvidence = await this.verifyAgainstSource(claim, primarySource);
70
+
71
+ // Step 2: Find corroborating sources
72
+ const relatedSources = this.findRelatedSources(claim, allSources, primarySource);
73
+ const corroboratingEvidence: VerificationEvidence[] = [];
74
+ const contradictingEvidence: VerificationEvidence[] = [];
75
+
76
+ // Step 3: Cross-reference with related sources
77
+ for (const source of relatedSources) {
78
+ const evidence = await this.verifyAgainstSource(claim, source);
79
+
80
+ if (evidence.supports) {
81
+ corroboratingEvidence.push(evidence);
82
+ } else if (evidence.confidence > 0.6) {
83
+ // Only count as contradicting if we're confident it contradicts
84
+ contradictingEvidence.push(evidence);
85
+ }
86
+ }
87
+
88
+ // Step 4: Determine overall verification status
89
+ const result = this.aggregateVerificationResults(
90
+ claim,
91
+ primaryEvidence,
92
+ corroboratingEvidence,
93
+ contradictingEvidence
94
+ );
95
+
96
+ // Cache the result
97
+ this.verificationCache.set(cacheKey, result);
98
+
99
+ elizaLogger.info(`[ClaimVerifier] Verification complete: ${result.overallVerificationStatus} (${result.aggregateConfidence})`);
100
+
101
+ return result;
102
+ }
103
+
104
+ /**
105
+ * Verify claim against a specific source by retrieving content
106
+ */
107
+ private async verifyAgainstSource(
108
+ claim: FactualClaim,
109
+ source: ResearchSource
110
+ ): Promise<VerificationEvidence> {
111
+ try {
112
+ // Get source content (from cache or fresh extraction)
113
+ let sourceContent = source.fullContent;
114
+
115
+ if (!sourceContent || sourceContent.length < 1000) {
116
+ // Re-extract if we don't have enough content
117
+ elizaLogger.info(`[ClaimVerifier] Extracting content from ${source.url}`);
118
+ const extracted = await this.contentExtractor.extractContent(source.url);
119
+ sourceContent = extracted?.content || source.snippet || '';
120
+ }
121
+
122
+ // Ensure sourceContent is defined
123
+ if (!sourceContent) {
124
+ sourceContent = '';
125
+ }
126
+
127
+ // Find relevant excerpt
128
+ const relevantExcerpt = this.findRelevantExcerpt(claim.statement, sourceContent);
129
+
130
+ // Use advanced prompt to verify
131
+ const verificationPrompt = formatPrompt(RESEARCH_PROMPTS.CLAIM_VERIFICATION, {
132
+ claim: claim.statement,
133
+ sourceUrl: source.url,
134
+ evidence: claim.supportingEvidence.join(' '),
135
+ sourceContent: relevantExcerpt
136
+ });
137
+
138
+ const config = getPromptConfig('verification');
139
+ const response = await this.runtime.useModel(config.modelType, {
140
+ messages: [
141
+ {
142
+ role: 'system',
143
+ content: 'You are a rigorous fact-checker. Be extremely strict about verification.'
144
+ },
145
+ { role: 'user', content: verificationPrompt }
146
+ ],
147
+ temperature: config.temperature,
148
+ max_tokens: config.maxTokens || 1500,
149
+ });
150
+
151
+ const result = this.parseVerificationResponse(response);
152
+
153
+ return {
154
+ sourceUrl: source.url,
155
+ relevantExcerpt,
156
+ supports: result.status === 'VERIFIED' || result.status === 'PARTIAL',
157
+ confidence: result.confidence,
158
+ reasoning: result.reasoning
159
+ };
160
+
161
+ } catch (error) {
162
+ elizaLogger.error(`[ClaimVerifier] Error verifying against ${source.url}:`, error);
163
+ return {
164
+ sourceUrl: source.url,
165
+ relevantExcerpt: '',
166
+ supports: false,
167
+ confidence: 0,
168
+ reasoning: 'Failed to verify due to extraction error'
169
+ };
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Find the most relevant excerpt from source content
175
+ */
176
+ private findRelevantExcerpt(claim: string, content: string): string {
177
+ const claimTerms = claim.toLowerCase().split(/\s+/).filter(term => term.length > 3);
178
+ const sentences = content.split(/[.!?]+/);
179
+
180
+ // Score each sentence by term overlap
181
+ const scoredSentences = sentences.map(sentence => {
182
+ const sentenceLower = sentence.toLowerCase();
183
+ const termMatches = claimTerms.filter(term => sentenceLower.includes(term)).length;
184
+ const score = termMatches / claimTerms.length;
185
+ return { sentence, score };
186
+ });
187
+
188
+ // Sort by relevance
189
+ scoredSentences.sort((a, b) => b.score - a.score);
190
+
191
+ // Take top 5 most relevant sentences and their context
192
+ const relevantSentences = scoredSentences.slice(0, 5);
193
+
194
+ // Build excerpt with context
195
+ let excerpt = '';
196
+ for (const { sentence } of relevantSentences) {
197
+ const sentenceIndex = sentences.indexOf(sentence);
198
+ const contextStart = Math.max(0, sentenceIndex - 1);
199
+ const contextEnd = Math.min(sentences.length - 1, sentenceIndex + 1);
200
+
201
+ const contextualExcerpt = sentences.slice(contextStart, contextEnd + 1).join('. ');
202
+ if (!excerpt.includes(contextualExcerpt)) {
203
+ excerpt += contextualExcerpt + '\n\n[...]\n\n';
204
+ }
205
+ }
206
+
207
+ return excerpt.substring(0, 5000); // Limit to 5k chars
208
+ }
209
+
210
+ /**
211
+ * Find sources that might corroborate or contradict the claim
212
+ */
213
+ private findRelatedSources(
214
+ claim: FactualClaim,
215
+ allSources: ResearchSource[],
216
+ excludeSource: ResearchSource
217
+ ): ResearchSource[] {
218
+ // Extract key entities and concepts from claim
219
+ const claimTerms = this.extractKeyTerms(claim.statement);
220
+
221
+ return allSources
222
+ .filter(source => source.id !== excludeSource.id)
223
+ .map(source => {
224
+ // Score relevance based on term overlap
225
+ const sourceText = (source.title + ' ' + source.snippet).toLowerCase();
226
+ const matchCount = claimTerms.filter(term => sourceText.includes(term.toLowerCase())).length;
227
+ const relevanceScore = matchCount / claimTerms.length;
228
+
229
+ return { source, relevanceScore };
230
+ })
231
+ .filter(item => item.relevanceScore > 0.3)
232
+ .sort((a, b) => b.relevanceScore - a.relevanceScore)
233
+ .slice(0, 5) // Top 5 most relevant
234
+ .map(item => item.source);
235
+ }
236
+
237
+ /**
238
+ * Extract key terms from a claim for matching
239
+ */
240
+ private extractKeyTerms(claim: string): string[] {
241
+ // Remove common words and extract significant terms
242
+ const stopWords = new Set(['the', 'is', 'at', 'which', 'on', 'and', 'a', 'an', 'as', 'are', 'was', 'were', 'been', 'be', 'have', 'has', 'had', 'that', 'with', 'for', 'of', 'in', 'to']);
243
+
244
+ return claim
245
+ .split(/\s+/)
246
+ .map(word => word.toLowerCase().replace(/[^a-z0-9]/g, ''))
247
+ .filter(word => word.length > 3 && !stopWords.has(word));
248
+ }
249
+
250
+ /**
251
+ * Parse LLM verification response
252
+ */
253
+ private parseVerificationResponse(response: any): {
254
+ status: string;
255
+ confidence: number;
256
+ reasoning: string;
257
+ } {
258
+ try {
259
+ const content = typeof response === 'string' ? response : response.content || '';
260
+
261
+ // Try to parse as JSON first
262
+ if (content.includes('{') && content.includes('}')) {
263
+ const jsonMatch = content.match(/\{[\s\S]*\}/);
264
+ if (jsonMatch) {
265
+ const parsed = JSON.parse(jsonMatch[0]);
266
+ return {
267
+ status: parsed.status || 'UNVERIFIED',
268
+ confidence: parsed.confidence || 0,
269
+ reasoning: parsed.reasoning || parsed.justification || ''
270
+ };
271
+ }
272
+ }
273
+
274
+ // Fallback: Extract from text
275
+ const statusMatch = content.match(/Status:\s*(VERIFIED|PARTIALLY_VERIFIED|UNVERIFIED|CONTRADICTED)/i);
276
+ const confidenceMatch = content.match(/Confidence:\s*([0-9.]+)/i);
277
+
278
+ return {
279
+ status: statusMatch?.[1] || 'UNVERIFIED',
280
+ confidence: confidenceMatch ? parseFloat(confidenceMatch[1]) : 0,
281
+ reasoning: content
282
+ };
283
+ } catch (error) {
284
+ elizaLogger.error('[ClaimVerifier] Error parsing verification response:', error);
285
+ return {
286
+ status: 'UNVERIFIED',
287
+ confidence: 0,
288
+ reasoning: 'Failed to parse verification response'
289
+ };
290
+ }
291
+ }
292
+
293
+ /**
294
+ * Aggregate verification results from multiple sources
295
+ */
296
+ private aggregateVerificationResults(
297
+ claim: FactualClaim,
298
+ primaryEvidence: VerificationEvidence,
299
+ corroboratingEvidence: VerificationEvidence[],
300
+ contradictingEvidence: VerificationEvidence[]
301
+ ): CrossReferenceResult {
302
+ // Calculate aggregate confidence
303
+ const supportingEvidence = primaryEvidence.supports
304
+ ? [primaryEvidence, ...corroboratingEvidence]
305
+ : corroboratingEvidence;
306
+
307
+ const totalSupporting = supportingEvidence.length;
308
+ const totalContradicting = contradictingEvidence.length;
309
+ const totalEvidence = totalSupporting + totalContradicting;
310
+
311
+ // Weighted confidence calculation
312
+ let aggregateConfidence = 0;
313
+ if (totalEvidence > 0) {
314
+ const supportWeight = supportingEvidence.reduce((sum, e) => sum + e.confidence, 0);
315
+ const contradictWeight = contradictingEvidence.reduce((sum, e) => sum + e.confidence, 0);
316
+
317
+ aggregateConfidence = (supportWeight - contradictWeight * 0.5) / totalEvidence;
318
+ aggregateConfidence = Math.max(0, Math.min(1, aggregateConfidence));
319
+ }
320
+
321
+ // Determine verification status
322
+ let overallStatus: VerificationStatus;
323
+ let consensusLevel: 'strong' | 'moderate' | 'weak' | 'disputed';
324
+
325
+ if (totalContradicting > totalSupporting) {
326
+ overallStatus = VerificationStatus.DISPUTED;
327
+ consensusLevel = 'disputed';
328
+ } else if (totalSupporting === 0) {
329
+ overallStatus = VerificationStatus.UNVERIFIED;
330
+ consensusLevel = 'weak';
331
+ } else if (totalContradicting === 0 && totalSupporting >= 2) {
332
+ overallStatus = VerificationStatus.VERIFIED;
333
+ consensusLevel = 'strong';
334
+ } else if (totalSupporting > totalContradicting) {
335
+ overallStatus = VerificationStatus.PARTIAL;
336
+ consensusLevel = totalContradicting > 0 ? 'moderate' : 'weak';
337
+ } else {
338
+ overallStatus = VerificationStatus.UNVERIFIED;
339
+ consensusLevel = 'weak';
340
+ }
341
+
342
+ return {
343
+ claim,
344
+ primaryEvidence,
345
+ corroboratingEvidence,
346
+ contradictingEvidence,
347
+ overallVerificationStatus: overallStatus,
348
+ aggregateConfidence,
349
+ consensusLevel
350
+ };
351
+ }
352
+
353
+ /**
354
+ * Generate cache key for claim
355
+ */
356
+ private generateCacheKey(claim: FactualClaim): string {
357
+ return crypto
358
+ .createHash('md5')
359
+ .update(claim.statement + claim.sourceUrls.join(','))
360
+ .digest('hex');
361
+ }
362
+
363
+ /**
364
+ * Batch verify multiple claims efficiently
365
+ */
366
+ async batchVerifyClaims(
367
+ claims: Array<{
368
+ claim: FactualClaim;
369
+ primarySource: ResearchSource;
370
+ }>,
371
+ allSources: ResearchSource[]
372
+ ): Promise<CrossReferenceResult[]> {
373
+ elizaLogger.info(`[ClaimVerifier] Batch verifying ${claims.length} claims`);
374
+
375
+ // Process in parallel with rate limiting
376
+ const batchSize = 5;
377
+ const results: CrossReferenceResult[] = [];
378
+
379
+ for (let i = 0; i < claims.length; i += batchSize) {
380
+ const batch = claims.slice(i, i + batchSize);
381
+ const batchResults = await Promise.all(
382
+ batch.map(({ claim, primarySource }) =>
383
+ this.verifyClaim(claim, primarySource, allSources)
384
+ )
385
+ );
386
+ results.push(...batchResults);
387
+ }
388
+
389
+ return results;
390
+ }
391
+
392
+ /**
393
+ * Get verification summary statistics
394
+ */
395
+ getVerificationStats(results: CrossReferenceResult[]): {
396
+ verified: number;
397
+ partiallyVerified: number;
398
+ unverified: number;
399
+ disputed: number;
400
+ averageConfidence: number;
401
+ strongConsensus: number;
402
+ } {
403
+ const stats = {
404
+ verified: 0,
405
+ partiallyVerified: 0,
406
+ unverified: 0,
407
+ disputed: 0,
408
+ totalConfidence: 0,
409
+ strongConsensus: 0,
410
+ };
411
+
412
+ for (const result of results) {
413
+ switch (result.overallVerificationStatus) {
414
+ case VerificationStatus.VERIFIED:
415
+ stats.verified++;
416
+ break;
417
+ case VerificationStatus.PARTIAL:
418
+ stats.partiallyVerified++;
419
+ break;
420
+ case VerificationStatus.UNVERIFIED:
421
+ stats.unverified++;
422
+ break;
423
+ case VerificationStatus.DISPUTED:
424
+ stats.disputed++;
425
+ break;
426
+ }
427
+
428
+ stats.totalConfidence += result.aggregateConfidence;
429
+ if (result.consensusLevel === 'strong') {
430
+ stats.strongConsensus++;
431
+ }
432
+ }
433
+
434
+ return {
435
+ verified: stats.verified,
436
+ partiallyVerified: stats.partiallyVerified,
437
+ unverified: stats.unverified,
438
+ disputed: stats.disputed,
439
+ averageConfidence: results.length > 0 ? stats.totalConfidence / results.length : 0,
440
+ strongConsensus: stats.strongConsensus,
441
+ };
442
+ }
443
+ }