@elizaos/plugin-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +400 -0
  2. package/dist/index.cjs +9366 -0
  3. package/dist/index.cjs.map +1 -0
  4. package/dist/index.js +9284 -0
  5. package/dist/index.js.map +1 -0
  6. package/package.json +80 -0
  7. package/src/__tests__/action-chaining.test.ts +532 -0
  8. package/src/__tests__/actions.test.ts +118 -0
  9. package/src/__tests__/cache-rate-limiter.test.ts +303 -0
  10. package/src/__tests__/content-extractors.test.ts +26 -0
  11. package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
  12. package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
  13. package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
  14. package/src/__tests__/e2e.test.ts +1870 -0
  15. package/src/__tests__/multi-benchmark-runner.ts +427 -0
  16. package/src/__tests__/providers.test.ts +156 -0
  17. package/src/__tests__/real-world.e2e.test.ts +788 -0
  18. package/src/__tests__/research-scenarios.test.ts +755 -0
  19. package/src/__tests__/research.e2e.test.ts +704 -0
  20. package/src/__tests__/research.test.ts +174 -0
  21. package/src/__tests__/search-providers.test.ts +174 -0
  22. package/src/__tests__/single-benchmark-runner.ts +735 -0
  23. package/src/__tests__/test-search-providers.ts +171 -0
  24. package/src/__tests__/verify-apis.test.ts +82 -0
  25. package/src/actions.ts +1677 -0
  26. package/src/benchmark/deepresearch-benchmark.ts +369 -0
  27. package/src/evaluation/research-evaluator.ts +444 -0
  28. package/src/examples/api-integration.md +498 -0
  29. package/src/examples/browserbase-integration.md +132 -0
  30. package/src/examples/debug-research-query.ts +162 -0
  31. package/src/examples/defi-code-scenarios.md +536 -0
  32. package/src/examples/defi-implementation-guide.md +454 -0
  33. package/src/examples/eliza-research-example.ts +142 -0
  34. package/src/examples/fix-renewable-energy-research.ts +209 -0
  35. package/src/examples/research-scenarios.md +408 -0
  36. package/src/examples/run-complete-renewable-research.ts +303 -0
  37. package/src/examples/run-deep-research.ts +352 -0
  38. package/src/examples/run-logged-research.ts +304 -0
  39. package/src/examples/run-real-research.ts +151 -0
  40. package/src/examples/save-research-output.ts +133 -0
  41. package/src/examples/test-file-logging.ts +199 -0
  42. package/src/examples/test-real-research.ts +67 -0
  43. package/src/examples/test-renewable-energy-research.ts +229 -0
  44. package/src/index.ts +28 -0
  45. package/src/integrations/cache.ts +128 -0
  46. package/src/integrations/content-extractors/firecrawl.ts +314 -0
  47. package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
  48. package/src/integrations/content-extractors/playwright.ts +420 -0
  49. package/src/integrations/factory.ts +419 -0
  50. package/src/integrations/index.ts +18 -0
  51. package/src/integrations/rate-limiter.ts +181 -0
  52. package/src/integrations/search-providers/academic.ts +290 -0
  53. package/src/integrations/search-providers/exa.ts +205 -0
  54. package/src/integrations/search-providers/npm.ts +330 -0
  55. package/src/integrations/search-providers/pypi.ts +211 -0
  56. package/src/integrations/search-providers/serpapi.ts +277 -0
  57. package/src/integrations/search-providers/serper.ts +358 -0
  58. package/src/integrations/search-providers/stagehand-google.ts +87 -0
  59. package/src/integrations/search-providers/tavily.ts +187 -0
  60. package/src/processing/relevance-analyzer.ts +353 -0
  61. package/src/processing/research-logger.ts +450 -0
  62. package/src/processing/result-processor.ts +372 -0
  63. package/src/prompts/research-prompts.ts +419 -0
  64. package/src/providers/cacheProvider.ts +164 -0
  65. package/src/providers.ts +173 -0
  66. package/src/service.ts +2588 -0
  67. package/src/services/swe-bench.ts +286 -0
  68. package/src/strategies/research-strategies.ts +790 -0
  69. package/src/types/pdf-parse.d.ts +34 -0
  70. package/src/types.ts +551 -0
  71. package/src/verification/claim-verifier.ts +443 -0
@@ -0,0 +1,34 @@
1
+ declare module 'pdf-parse' {
2
+ interface PDFInfo {
3
+ PDFFormatVersion?: string;
4
+ IsAcroFormPresent?: boolean;
5
+ IsXFAPresent?: boolean;
6
+ Title?: string;
7
+ Author?: string;
8
+ Subject?: string;
9
+ Keywords?: string;
10
+ Creator?: string;
11
+ Producer?: string;
12
+ CreationDate?: string;
13
+ ModDate?: string;
14
+ }
15
+
16
+ interface PDFData {
17
+ numpages: number;
18
+ numrender: number;
19
+ info: PDFInfo;
20
+ metadata: any;
21
+ text: string;
22
+ version: string;
23
+ }
24
+
25
+ interface PDFOptions {
26
+ pagerender?: (pageData: any) => string;
27
+ max?: number;
28
+ version?: string;
29
+ }
30
+
31
+ function pdf(dataBuffer: Buffer, options?: PDFOptions): Promise<PDFData>;
32
+
33
+ export = pdf;
34
+ }
package/src/types.ts ADDED
@@ -0,0 +1,551 @@
1
+ // Core Research Types for DeepResearch Bench Compatibility
2
+
3
+ export interface ResearchProject {
4
+ id: string;
5
+ query: string;
6
+ status: ResearchStatus;
7
+ phase: ResearchPhase;
8
+ createdAt: number;
9
+ updatedAt: number;
10
+ completedAt?: number;
11
+ findings: ResearchFinding[];
12
+ sources: ResearchSource[];
13
+ report?: ResearchReport;
14
+ error?: string;
15
+ metadata: ResearchMetadata;
16
+ evaluationResults?: EvaluationResults;
17
+ }
18
+
19
+ export interface ResearchMetadata {
20
+ domain: ResearchDomain;
21
+ taskType: TaskType;
22
+ language: string;
23
+ depth: ResearchDepth;
24
+ queryPlan: QueryPlan;
25
+ evaluationCriteria: EvaluationCriteria;
26
+ iterationHistory: IterationRecord[];
27
+ performanceMetrics: PerformanceMetrics;
28
+ categoryAnalysis?: Record<string, string>;
29
+ synthesis?: string;
30
+ }
31
+
32
+ // DeepResearch Bench Domain Support (22 domains)
33
+ export enum ResearchDomain {
34
+ PHYSICS = 'physics',
35
+ CHEMISTRY = 'chemistry',
36
+ BIOLOGY = 'biology',
37
+ ENVIRONMENTAL_SCIENCE = 'environmental_science',
38
+ ENGINEERING = 'engineering',
39
+ COMPUTER_SCIENCE = 'computer_science',
40
+ MATHEMATICS = 'mathematics',
41
+ MEDICINE = 'medicine',
42
+ PSYCHOLOGY = 'psychology',
43
+ ECONOMICS = 'economics',
44
+ FINANCE = 'finance',
45
+ BUSINESS = 'business',
46
+ MARKETING = 'marketing',
47
+ HUMAN_RESOURCES = 'human_resources',
48
+ LAW = 'law',
49
+ POLITICS = 'politics',
50
+ HISTORY = 'history',
51
+ PHILOSOPHY = 'philosophy',
52
+ ART_DESIGN = 'art_design',
53
+ ENTERTAINMENT = 'entertainment',
54
+ TRANSPORTATION = 'transportation',
55
+ GENERAL = 'general'
56
+ }
57
+
58
+ export enum TaskType {
59
+ EXPLORATORY = 'exploratory',
60
+ COMPARATIVE = 'comparative',
61
+ ANALYTICAL = 'analytical',
62
+ SYNTHETIC = 'synthetic',
63
+ EVALUATIVE = 'evaluative',
64
+ PREDICTIVE = 'predictive'
65
+ }
66
+
67
+ export enum ResearchDepth {
68
+ SURFACE = 'surface',
69
+ MODERATE = 'moderate',
70
+ DEEP = 'deep',
71
+ PHD_LEVEL = 'phd-level'
72
+ }
73
+
74
+ export interface QueryPlan {
75
+ mainQuery: string;
76
+ subQueries: SubQuery[];
77
+ searchStrategy: SearchStrategy;
78
+ expectedSources: number;
79
+ iterationCount: number;
80
+ adaptiveRefinement: boolean;
81
+ domainSpecificApproach: DomainApproach;
82
+ }
83
+
84
+ export interface SubQuery {
85
+ id: string;
86
+ query: string;
87
+ purpose: string;
88
+ priority: number;
89
+ dependsOn: string[];
90
+ searchProviders: string[];
91
+ expectedResultType: ResultType;
92
+ completed: boolean;
93
+ results?: SubQueryResult;
94
+ }
95
+
96
+ export interface SubQueryResult {
97
+ sources: ResearchSource[];
98
+ findings: ResearchFinding[];
99
+ quality: number;
100
+ nextQueries?: string[];
101
+ }
102
+
103
+ export enum ResultType {
104
+ FACTUAL = 'factual',
105
+ STATISTICAL = 'statistical',
106
+ THEORETICAL = 'theoretical',
107
+ PRACTICAL = 'practical',
108
+ COMPARATIVE = 'comparative'
109
+ }
110
+
111
+ export interface SearchStrategy {
112
+ approach: SearchApproach;
113
+ sourceTypes: SourceType[];
114
+ qualityThreshold: number;
115
+ diversityRequirement: boolean;
116
+ temporalFocus?: TemporalFocus;
117
+ geographicScope?: string[];
118
+ languagePreferences: string[];
119
+ }
120
+
121
+ export enum SearchApproach {
122
+ BREADTH_FIRST = 'breadth-first',
123
+ DEPTH_FIRST = 'depth-first',
124
+ ITERATIVE_REFINEMENT = 'iterative-refinement',
125
+ HYPOTHESIS_DRIVEN = 'hypothesis-driven',
126
+ CITATION_CHAINING = 'citation-chaining'
127
+ }
128
+
129
+ export enum TemporalFocus {
130
+ HISTORICAL = 'historical',
131
+ CURRENT = 'current',
132
+ RECENT = 'recent',
133
+ FUTURE_ORIENTED = 'future-oriented'
134
+ }
135
+
136
+ export interface DomainApproach {
137
+ methodology: string;
138
+ keyTerms: string[];
139
+ authoritySource: string[];
140
+ evaluationFocus: string[];
141
+ }
142
+
143
+ export enum ResearchStatus {
144
+ PENDING = 'pending',
145
+ ACTIVE = 'active',
146
+ COMPLETED = 'completed',
147
+ FAILED = 'failed',
148
+ PAUSED = 'paused',
149
+ EVALUATING = 'evaluating'
150
+ }
151
+
152
+ export enum ResearchPhase {
153
+ INITIALIZATION = 'initialization',
154
+ PLANNING = 'planning',
155
+ SEARCHING = 'searching',
156
+ ANALYZING = 'analyzing',
157
+ SYNTHESIZING = 'synthesizing',
158
+ EVALUATING = 'evaluating',
159
+ REPORTING = 'reporting',
160
+ COMPLETE = 'complete'
161
+ }
162
+
163
+ export interface ResearchFinding {
164
+ id: string;
165
+ content: string;
166
+ source: ResearchSource;
167
+ relevance: number;
168
+ confidence: number;
169
+ timestamp: number;
170
+ category: string;
171
+ subcategory?: string;
172
+ citations: Citation[];
173
+ factualClaims: FactualClaim[];
174
+ relatedFindings: string[];
175
+ verificationStatus: VerificationStatus;
176
+ extractionMethod: string;
177
+ }
178
+
179
+ export interface FactualClaim {
180
+ id: string;
181
+ statement: string;
182
+ supportingEvidence: string[];
183
+ sourceUrls: string[];
184
+ verificationStatus: VerificationStatus;
185
+ confidenceScore: number;
186
+ contradictions?: Contradiction[];
187
+ relatedClaims: string[];
188
+ }
189
+
190
+ export interface Contradiction {
191
+ claimId: string;
192
+ description: string;
193
+ severity: 'minor' | 'moderate' | 'major';
194
+ resolution?: string;
195
+ }
196
+
197
+ export enum VerificationStatus {
198
+ VERIFIED = 'verified',
199
+ UNVERIFIED = 'unverified',
200
+ DISPUTED = 'disputed',
201
+ PARTIAL = 'partial',
202
+ PENDING = 'pending'
203
+ }
204
+
205
+ export interface ResearchSource {
206
+ id: string;
207
+ url: string;
208
+ title: string;
209
+ snippet?: string;
210
+ fullContent?: string;
211
+ accessedAt: number;
212
+ type: SourceType;
213
+ reliability: number;
214
+ domain?: string;
215
+ author?: string[];
216
+ publishDate?: string;
217
+ lastModified?: string;
218
+ citations?: number;
219
+ peerReviewed?: boolean;
220
+ metadata: SourceMetadata;
221
+ }
222
+
223
+ export enum SourceType {
224
+ WEB = 'web',
225
+ ACADEMIC = 'academic',
226
+ NEWS = 'news',
227
+ TECHNICAL = 'technical',
228
+ BOOK = 'book',
229
+ VIDEO = 'video',
230
+ DATASET = 'dataset',
231
+ GOVERNMENT = 'government',
232
+ ORGANIZATION = 'organization'
233
+ }
234
+
235
+ export interface SourceMetadata {
236
+ journal?: string;
237
+ doi?: string;
238
+ isbn?: string;
239
+ conference?: string;
240
+ institution?: string;
241
+ license?: string;
242
+ language: string;
243
+ wordCount?: number;
244
+ readingLevel?: string;
245
+ }
246
+
247
+ export interface Citation {
248
+ id: string;
249
+ text: string;
250
+ source: ResearchSource;
251
+ pageNumber?: number;
252
+ section?: string;
253
+ confidence: number;
254
+ verificationStatus: VerificationStatus;
255
+ context: string;
256
+ usageCount: number;
257
+ }
258
+
259
+ export interface ResearchReport {
260
+ id: string;
261
+ title: string;
262
+ abstract: string;
263
+ summary: string;
264
+ sections: ReportSection[];
265
+ citations: Citation[];
266
+ bibliography: BibliographyEntry[];
267
+ generatedAt: number;
268
+ wordCount: number;
269
+ readingTime: number;
270
+ evaluationMetrics: EvaluationMetrics;
271
+ exportFormats: ExportFormat[];
272
+ }
273
+
274
+ export interface ReportSection {
275
+ id: string;
276
+ heading: string;
277
+ level: number;
278
+ content: string;
279
+ findings: string[];
280
+ citations: Citation[];
281
+ subsections?: ReportSection[];
282
+ metadata: SectionMetadata;
283
+ }
284
+
285
+ export interface SectionMetadata {
286
+ wordCount: number;
287
+ citationDensity: number;
288
+ readabilityScore: number;
289
+ keyTerms: string[];
290
+ }
291
+
292
+ export interface BibliographyEntry {
293
+ id: string;
294
+ citation: string;
295
+ format: 'APA' | 'MLA' | 'Chicago' | 'Harvard';
296
+ source: ResearchSource;
297
+ accessCount: number;
298
+ }
299
+
300
+ export interface ExportFormat {
301
+ format: 'json' | 'markdown' | 'pdf' | 'deepresearch' | 'latex' | 'docx';
302
+ url?: string;
303
+ generated: boolean;
304
+ }
305
+
306
+ // RACE Evaluation Framework
307
+ export interface EvaluationCriteria {
308
+ comprehensiveness: CriteriaDefinition;
309
+ depth: CriteriaDefinition;
310
+ instructionFollowing: CriteriaDefinition;
311
+ readability: CriteriaDefinition;
312
+ domainSpecific?: Record<string, CriteriaDefinition>;
313
+ }
314
+
315
+ export interface CriteriaDefinition {
316
+ name: string;
317
+ description: string;
318
+ weight: number;
319
+ rubric: RubricItem[];
320
+ scoringMethod: ScoringMethod;
321
+ }
322
+
323
+ export interface RubricItem {
324
+ score: number;
325
+ description: string;
326
+ examples?: string[];
327
+ }
328
+
329
+ export enum ScoringMethod {
330
+ BINARY = 'binary',
331
+ SCALE = 'scale',
332
+ RUBRIC = 'rubric',
333
+ COMPARATIVE = 'comparative'
334
+ }
335
+
336
+ export interface EvaluationMetrics {
337
+ raceScore: RACEScore;
338
+ factScore: FACTScore;
339
+ timestamp: number;
340
+ evaluatorVersion: string;
341
+ }
342
+
343
+ export interface RACEScore {
344
+ overall: number;
345
+ comprehensiveness: number;
346
+ depth: number;
347
+ instructionFollowing: number;
348
+ readability: number;
349
+ domainSpecific?: Record<string, number>;
350
+ breakdown: ScoreBreakdown[];
351
+ }
352
+
353
+ export interface ScoreBreakdown {
354
+ criterion: string;
355
+ score: number;
356
+ maxScore: number;
357
+ justification: string;
358
+ improvements: string[];
359
+ }
360
+
361
+ // FACT Evaluation Framework
362
+ export interface FACTScore {
363
+ citationAccuracy: number;
364
+ effectiveCitations: number;
365
+ totalCitations: number;
366
+ verifiedCitations: number;
367
+ disputedCitations: number;
368
+ citationCoverage: number;
369
+ sourceCredibility: number;
370
+ breakdown: FactBreakdown[];
371
+ }
372
+
373
+ export interface FactBreakdown {
374
+ sourceId: string;
375
+ citationsFromSource: number;
376
+ verifiedFromSource: number;
377
+ credibilityScore: number;
378
+ issues: string[];
379
+ }
380
+
381
+ // Performance and Optimization
382
+ export interface PerformanceMetrics {
383
+ totalDuration: number;
384
+ phaseBreakdown: Record<ResearchPhase, PhaseTiming>;
385
+ searchQueries: number;
386
+ sourcesProcessed: number;
387
+ tokensGenerated: number;
388
+ cacheHits: number;
389
+ parallelOperations: number;
390
+ }
391
+
392
+ export interface PhaseTiming {
393
+ startTime: number;
394
+ endTime: number;
395
+ duration: number;
396
+ retries: number;
397
+ errors: string[];
398
+ }
399
+
400
+ export interface IterationRecord {
401
+ iteration: number;
402
+ timestamp: number;
403
+ queriesUsed: string[];
404
+ sourcesFound: number;
405
+ findingsExtracted: number;
406
+ qualityScore: number;
407
+ refinementReason?: string;
408
+ }
409
+
410
+ // Research Configuration
411
+ export interface ResearchConfig {
412
+ maxSearchResults: number;
413
+ maxDepth: number;
414
+ timeout: number;
415
+ enableCitations: boolean;
416
+ enableImages: boolean;
417
+ searchProviders: string[];
418
+ language: string;
419
+ researchDepth: ResearchDepth;
420
+ domain: ResearchDomain;
421
+ evaluationEnabled: boolean;
422
+ cacheEnabled: boolean;
423
+ parallelSearches: number;
424
+ retryAttempts: number;
425
+ qualityThreshold: number;
426
+ }
427
+
428
+ // Search and Content Types
429
+ export interface SearchResult {
430
+ title: string;
431
+ url: string;
432
+ snippet: string;
433
+ content?: string;
434
+ score: number;
435
+ provider: string;
436
+ metadata: SearchMetadata;
437
+ }
438
+
439
+ export interface SearchMetadata {
440
+ author?: string[];
441
+ publishDate?: string;
442
+ domain?: string;
443
+ type?: string;
444
+ language: string;
445
+ location?: string;
446
+ }
447
+
448
+ // Progress Tracking
449
+ export interface ResearchProgress {
450
+ projectId: string;
451
+ phase: ResearchPhase;
452
+ message: string;
453
+ progress: number;
454
+ timestamp: number;
455
+ subProgress?: SubProgress;
456
+ estimatedCompletion?: number;
457
+ }
458
+
459
+ export interface SubProgress {
460
+ current: number;
461
+ total: number;
462
+ description: string;
463
+ items: string[];
464
+ }
465
+
466
+ // Evaluation Results
467
+ export interface EvaluationResults {
468
+ projectId: string;
469
+ raceEvaluation: RACEEvaluation;
470
+ factEvaluation: FACTEvaluation;
471
+ overallScore: number;
472
+ recommendations: string[];
473
+ timestamp: number;
474
+ }
475
+
476
+ export interface RACEEvaluation {
477
+ scores: RACEScore;
478
+ referenceComparison?: ReferenceComparison;
479
+ detailedFeedback: DetailedFeedback[];
480
+ }
481
+
482
+ export interface FACTEvaluation {
483
+ scores: FACTScore;
484
+ citationMap: CitationMap;
485
+ verificationDetails: VerificationDetail[];
486
+ }
487
+
488
+ export interface ReferenceComparison {
489
+ referenceId: string;
490
+ similarityScore: number;
491
+ strengths: string[];
492
+ gaps: string[];
493
+ }
494
+
495
+ export interface DetailedFeedback {
496
+ section: string;
497
+ score: number;
498
+ feedback: string;
499
+ suggestions: string[];
500
+ }
501
+
502
+ export interface CitationMap {
503
+ claims: Map<string, string[]>; // claim -> source URLs
504
+ sources: Map<string, string[]>; // source URL -> claims
505
+ verification: Map<string, VerificationStatus>;
506
+ }
507
+
508
+ export interface VerificationDetail {
509
+ claimId: string;
510
+ sourceUrl: string;
511
+ method: string;
512
+ result: VerificationStatus;
513
+ evidence?: string;
514
+ confidence: number;
515
+ }
516
+
517
+ // DeepResearch Bench Format
518
+ export interface DeepResearchBenchResult {
519
+ id: string;
520
+ prompt: string;
521
+ article: string;
522
+ metadata: DeepResearchMetadata;
523
+ }
524
+
525
+ export interface DeepResearchMetadata {
526
+ domain: string;
527
+ taskType: string;
528
+ generatedAt: string;
529
+ modelVersion: string;
530
+ evaluationScores: {
531
+ race: RACEScore;
532
+ fact: FACTScore;
533
+ };
534
+ }
535
+
536
+ // Action Chaining Support
537
+ export interface ActionContext {
538
+ projectId: string;
539
+ previousAction?: string;
540
+ previousResult?: any;
541
+ suggestedNextActions: string[];
542
+ state: Record<string, any>;
543
+ }
544
+
545
+ export interface ActionResult {
546
+ success: boolean;
547
+ data?: any;
548
+ error?: string;
549
+ nextActions: string[];
550
+ metadata: Record<string, any>;
551
+ }