@elizaos/plugin-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +400 -0
  2. package/dist/index.cjs +9366 -0
  3. package/dist/index.cjs.map +1 -0
  4. package/dist/index.js +9284 -0
  5. package/dist/index.js.map +1 -0
  6. package/package.json +80 -0
  7. package/src/__tests__/action-chaining.test.ts +532 -0
  8. package/src/__tests__/actions.test.ts +118 -0
  9. package/src/__tests__/cache-rate-limiter.test.ts +303 -0
  10. package/src/__tests__/content-extractors.test.ts +26 -0
  11. package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
  12. package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
  13. package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
  14. package/src/__tests__/e2e.test.ts +1870 -0
  15. package/src/__tests__/multi-benchmark-runner.ts +427 -0
  16. package/src/__tests__/providers.test.ts +156 -0
  17. package/src/__tests__/real-world.e2e.test.ts +788 -0
  18. package/src/__tests__/research-scenarios.test.ts +755 -0
  19. package/src/__tests__/research.e2e.test.ts +704 -0
  20. package/src/__tests__/research.test.ts +174 -0
  21. package/src/__tests__/search-providers.test.ts +174 -0
  22. package/src/__tests__/single-benchmark-runner.ts +735 -0
  23. package/src/__tests__/test-search-providers.ts +171 -0
  24. package/src/__tests__/verify-apis.test.ts +82 -0
  25. package/src/actions.ts +1677 -0
  26. package/src/benchmark/deepresearch-benchmark.ts +369 -0
  27. package/src/evaluation/research-evaluator.ts +444 -0
  28. package/src/examples/api-integration.md +498 -0
  29. package/src/examples/browserbase-integration.md +132 -0
  30. package/src/examples/debug-research-query.ts +162 -0
  31. package/src/examples/defi-code-scenarios.md +536 -0
  32. package/src/examples/defi-implementation-guide.md +454 -0
  33. package/src/examples/eliza-research-example.ts +142 -0
  34. package/src/examples/fix-renewable-energy-research.ts +209 -0
  35. package/src/examples/research-scenarios.md +408 -0
  36. package/src/examples/run-complete-renewable-research.ts +303 -0
  37. package/src/examples/run-deep-research.ts +352 -0
  38. package/src/examples/run-logged-research.ts +304 -0
  39. package/src/examples/run-real-research.ts +151 -0
  40. package/src/examples/save-research-output.ts +133 -0
  41. package/src/examples/test-file-logging.ts +199 -0
  42. package/src/examples/test-real-research.ts +67 -0
  43. package/src/examples/test-renewable-energy-research.ts +229 -0
  44. package/src/index.ts +28 -0
  45. package/src/integrations/cache.ts +128 -0
  46. package/src/integrations/content-extractors/firecrawl.ts +314 -0
  47. package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
  48. package/src/integrations/content-extractors/playwright.ts +420 -0
  49. package/src/integrations/factory.ts +419 -0
  50. package/src/integrations/index.ts +18 -0
  51. package/src/integrations/rate-limiter.ts +181 -0
  52. package/src/integrations/search-providers/academic.ts +290 -0
  53. package/src/integrations/search-providers/exa.ts +205 -0
  54. package/src/integrations/search-providers/npm.ts +330 -0
  55. package/src/integrations/search-providers/pypi.ts +211 -0
  56. package/src/integrations/search-providers/serpapi.ts +277 -0
  57. package/src/integrations/search-providers/serper.ts +358 -0
  58. package/src/integrations/search-providers/stagehand-google.ts +87 -0
  59. package/src/integrations/search-providers/tavily.ts +187 -0
  60. package/src/processing/relevance-analyzer.ts +353 -0
  61. package/src/processing/research-logger.ts +450 -0
  62. package/src/processing/result-processor.ts +372 -0
  63. package/src/prompts/research-prompts.ts +419 -0
  64. package/src/providers/cacheProvider.ts +164 -0
  65. package/src/providers.ts +173 -0
  66. package/src/service.ts +2588 -0
  67. package/src/services/swe-bench.ts +286 -0
  68. package/src/strategies/research-strategies.ts +790 -0
  69. package/src/types/pdf-parse.d.ts +34 -0
  70. package/src/types.ts +551 -0
  71. package/src/verification/claim-verifier.ts +443 -0
@@ -0,0 +1,290 @@
1
+ import { elizaLogger } from '@elizaos/core';
2
+ import axios from 'axios';
3
+ import { SearchResult, SourceType } from '../../types';
4
+
5
+ export interface AcademicSearchConfig {
6
+ semanticScholarApiKey?: string;
7
+ useProxy?: boolean;
8
+ timeout?: number;
9
+ }
10
+
11
+ export class AcademicSearchProvider {
12
+ public readonly name = 'Academic';
13
+ private config: AcademicSearchConfig;
14
+
15
+ constructor(config: AcademicSearchConfig = {}) {
16
+ this.config = {
17
+ timeout: 30000,
18
+ useProxy: false,
19
+ ...config,
20
+ };
21
+ }
22
+
23
+ async search(query: string, maxResults: number = 20): Promise<SearchResult[]> {
24
+ elizaLogger.info(`[Academic] Searching for: ${query}`);
25
+
26
+ const results: SearchResult[] = [];
27
+
28
+ // Search multiple academic sources in parallel
29
+ const searches = await Promise.allSettled([
30
+ this.searchSemanticScholar(query, Math.ceil(maxResults / 3)),
31
+ this.searchArxiv(query, Math.ceil(maxResults / 3)),
32
+ this.searchCrossRef(query, Math.ceil(maxResults / 3)),
33
+ ]);
34
+
35
+ for (const search of searches) {
36
+ if (search.status === 'fulfilled') {
37
+ results.push(...search.value);
38
+ } else {
39
+ elizaLogger.warn(`[Academic] Search failed:`, search.reason);
40
+ }
41
+ }
42
+
43
+ // Sort by relevance score and limit results
44
+ return results
45
+ .sort((a, b) => b.score - a.score)
46
+ .slice(0, maxResults);
47
+ }
48
+
49
+ private async searchSemanticScholar(query: string, limit: number): Promise<SearchResult[]> {
50
+ try {
51
+ const url = 'https://api.semanticscholar.org/graph/v1/paper/search';
52
+ const params = {
53
+ query,
54
+ limit,
55
+ fields: 'paperId,title,abstract,authors,year,citationCount,url,venue,publicationDate',
56
+ };
57
+
58
+ const headers: any = {
59
+ 'User-Agent': 'Mozilla/5.0 (compatible; ElizaOS/1.0)',
60
+ };
61
+
62
+ if (this.config.semanticScholarApiKey) {
63
+ headers['x-api-key'] = this.config.semanticScholarApiKey;
64
+ }
65
+
66
+ const response = await axios.get(url, {
67
+ params,
68
+ headers,
69
+ timeout: this.config.timeout,
70
+ validateStatus: (status) => status < 500, // Don't throw on 4xx errors
71
+ });
72
+
73
+ if (response.status === 429) {
74
+ elizaLogger.warn('[Semantic Scholar] Rate limited, falling back to public rate');
75
+ // Try again without API key
76
+ delete headers['x-api-key'];
77
+ const retryResponse = await axios.get(url, {
78
+ params,
79
+ headers,
80
+ timeout: this.config.timeout,
81
+ });
82
+ response.data = retryResponse.data;
83
+ } else if (response.status >= 400) {
84
+ elizaLogger.warn(`[Semantic Scholar] HTTP ${response.status}: ${response.statusText}`);
85
+ return [];
86
+ }
87
+
88
+ const results: SearchResult[] = [];
89
+
90
+ for (const paper of response.data.data || []) {
91
+ results.push({
92
+ title: paper.title || 'Untitled',
93
+ url: paper.url || `https://api.semanticscholar.org/paper/${paper.paperId}`,
94
+ snippet: paper.abstract || 'No abstract available',
95
+ score: this.calculateRelevanceScore(paper, query),
96
+ provider: 'semantic-scholar',
97
+ metadata: {
98
+ type: 'academic',
99
+ language: 'en',
100
+ domain: 'semanticscholar.org',
101
+ author: paper.authors?.map((a: any) => a.name),
102
+ publishDate: paper.publicationDate,
103
+ citationCount: paper.citationCount,
104
+ venue: paper.venue,
105
+ paperId: paper.paperId,
106
+ } as any,
107
+ });
108
+ }
109
+
110
+ elizaLogger.info(`[Semantic Scholar] Found ${results.length} results`);
111
+ return results;
112
+ } catch (error) {
113
+ elizaLogger.error('[Semantic Scholar] Search error:', error);
114
+ return [];
115
+ }
116
+ }
117
+
118
+ private async searchArxiv(query: string, limit: number): Promise<SearchResult[]> {
119
+ try {
120
+ const url = 'http://export.arxiv.org/api/query';
121
+ const params = {
122
+ search_query: `all:${query}`,
123
+ start: 0,
124
+ max_results: limit,
125
+ sortBy: 'relevance',
126
+ sortOrder: 'descending',
127
+ };
128
+
129
+ const response = await axios.get(url, {
130
+ params,
131
+ timeout: this.config.timeout,
132
+ });
133
+
134
+ // Parse XML response
135
+ const results: SearchResult[] = [];
136
+ const entries = response.data.match(/<entry>([\s\S]*?)<\/entry>/g) || [];
137
+
138
+ for (const entry of entries) {
139
+ const title = this.extractXmlValue(entry, 'title');
140
+ const summary = this.extractXmlValue(entry, 'summary');
141
+ const id = this.extractXmlValue(entry, 'id');
142
+ const published = this.extractXmlValue(entry, 'published');
143
+ const authors = this.extractXmlAuthors(entry);
144
+
145
+ if (title && id) {
146
+ results.push({
147
+ title: title.trim(),
148
+ url: id,
149
+ snippet: summary?.trim() || 'No summary available',
150
+ score: 0.85, // arXiv is highly reliable
151
+ provider: 'arxiv',
152
+ metadata: {
153
+ type: 'academic',
154
+ language: 'en',
155
+ domain: 'arxiv.org',
156
+ author: authors,
157
+ publishDate: published,
158
+ arxivId: id.split('/').pop(),
159
+ } as any,
160
+ });
161
+ }
162
+ }
163
+
164
+ elizaLogger.info(`[arXiv] Found ${results.length} results`);
165
+ return results;
166
+ } catch (error) {
167
+ elizaLogger.error('[arXiv] Search error:', error);
168
+ return [];
169
+ }
170
+ }
171
+
172
+ private async searchCrossRef(query: string, limit: number): Promise<SearchResult[]> {
173
+ try {
174
+ // CrossRef requires more specific queries, so enhance simple queries
175
+ const enhancedQuery = query.length < 5 ? `${query} research paper` : query;
176
+
177
+ const url = 'https://api.crossref.org/works';
178
+ const params = {
179
+ query: enhancedQuery,
180
+ rows: limit,
181
+ select: 'DOI,title,author,published-print,abstract,container-title,URL,cited-by-count',
182
+ };
183
+
184
+ const response = await axios.get(url, {
185
+ params,
186
+ headers: {
187
+ 'User-Agent': 'ElizaOS/1.0 (mailto:research@eliza.ai)',
188
+ },
189
+ timeout: this.config.timeout,
190
+ validateStatus: (status) => status < 500, // Don't throw on 4xx
191
+ });
192
+
193
+ if (response.status >= 400) {
194
+ elizaLogger.warn(`[CrossRef] HTTP ${response.status}: Query too short or invalid`);
195
+ return [];
196
+ }
197
+
198
+ const results: SearchResult[] = [];
199
+
200
+ for (const item of response.data.message.items || []) {
201
+ const title = Array.isArray(item.title) ? item.title[0] : item.title;
202
+ const abstract = item.abstract?.replace(/<[^>]*>/g, ''); // Remove HTML tags
203
+
204
+ results.push({
205
+ title: title || 'Untitled',
206
+ url: item.URL || `https://doi.org/${item.DOI}`,
207
+ snippet: abstract || 'No abstract available',
208
+ score: this.calculateCrossRefScore(item, query),
209
+ provider: 'crossref',
210
+ metadata: {
211
+ type: 'academic',
212
+ language: 'en',
213
+ domain: 'crossref.org',
214
+ doi: item.DOI,
215
+ author: item.author?.map((a: any) => `${a.given} ${a.family}`),
216
+ publishDate: item['published-print']?.['date-parts']?.[0]?.join('-'),
217
+ citationCount: item['cited-by-count'],
218
+ journal: item['container-title']?.[0],
219
+ } as any,
220
+ });
221
+ }
222
+
223
+ elizaLogger.info(`[CrossRef] Found ${results.length} results`);
224
+ return results;
225
+ } catch (error: any) {
226
+ if (axios.isAxiosError(error)) {
227
+ elizaLogger.error(`[CrossRef] API error: ${error.message}`, {
228
+ status: error.response?.status,
229
+ statusText: error.response?.statusText,
230
+ data: error.response?.data,
231
+ });
232
+ } else {
233
+ elizaLogger.error('[CrossRef] Search error:', error.message || error);
234
+ }
235
+ return [];
236
+ }
237
+ }
238
+
239
+ private calculateRelevanceScore(paper: any, query: string): number {
240
+ let score = 0.7; // Base score for academic papers
241
+
242
+ // Boost for citation count
243
+ if (paper.citationCount > 100) score += 0.1;
244
+ else if (paper.citationCount > 50) score += 0.05;
245
+
246
+ // Boost for recent papers
247
+ if (paper.year && paper.year >= new Date().getFullYear() - 2) score += 0.05;
248
+
249
+ // Boost for title match
250
+ const queryTerms = query.toLowerCase().split(' ');
251
+ const titleLower = paper.title?.toLowerCase() || '';
252
+ const matchCount = queryTerms.filter(term => titleLower.includes(term)).length;
253
+ score += (matchCount / queryTerms.length) * 0.1;
254
+
255
+ return Math.min(score, 1.0);
256
+ }
257
+
258
+ private calculateCrossRefScore(item: any, query: string): number {
259
+ let score = 0.65; // Base score
260
+
261
+ if (item['cited-by-count'] > 50) score += 0.1;
262
+ if (item.abstract) score += 0.1;
263
+
264
+ // Title relevance
265
+ const queryTerms = query.toLowerCase().split(' ');
266
+ const titleLower = (item.title?.[0] || '').toLowerCase();
267
+ const matchCount = queryTerms.filter(term => titleLower.includes(term)).length;
268
+ score += (matchCount / queryTerms.length) * 0.15;
269
+
270
+ return Math.min(score, 1.0);
271
+ }
272
+
273
+ private extractXmlValue(xml: string, tag: string): string | undefined {
274
+ const regex = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i');
275
+ const match = xml.match(regex);
276
+ return match ? match[1].trim() : undefined;
277
+ }
278
+
279
+ private extractXmlAuthors(xml: string): string[] {
280
+ const authors: string[] = [];
281
+ const authorMatches = xml.match(/<author>[\s\S]*?<\/author>/g) || [];
282
+
283
+ for (const authorXml of authorMatches) {
284
+ const name = this.extractXmlValue(authorXml, 'name');
285
+ if (name) authors.push(name);
286
+ }
287
+
288
+ return authors;
289
+ }
290
+ }
@@ -0,0 +1,205 @@
1
+ import axios from 'axios';
2
+ import { SearchResult } from '../../types';
3
+ import { elizaLogger } from '@elizaos/core';
4
+
5
+ export interface ExaConfig {
6
+ apiKey: string;
7
+ searchType?: 'neural' | 'keyword' | 'auto';
8
+ category?: string;
9
+ language?: string;
10
+ }
11
+
12
+ export class ExaSearchProvider {
13
+ private readonly apiKey: string;
14
+ private readonly baseUrl = 'https://api.exa.ai';
15
+ private readonly config: ExaConfig;
16
+ public readonly name = 'Exa';
17
+
18
+ constructor(config: ExaConfig) {
19
+ if (!config.apiKey) {
20
+ throw new Error('Exa API key is required');
21
+ }
22
+ this.apiKey = config.apiKey;
23
+ this.config = {
24
+ searchType: 'auto',
25
+ language: 'en',
26
+ ...config,
27
+ };
28
+ }
29
+
30
+ async search(query: string, maxResults?: number): Promise<SearchResult[]> {
31
+ try {
32
+ elizaLogger.info(`[Exa] Searching for: ${query}`);
33
+
34
+ const response = await axios.post(
35
+ `${this.baseUrl}/search`,
36
+ {
37
+ query,
38
+ type: this.config.searchType || 'auto',
39
+ numResults: maxResults || 10,
40
+ text: true, // Get text content
41
+ summary: {
42
+ query: "Key points and main findings"
43
+ },
44
+ highlights: {
45
+ numSentences: 3,
46
+ highlightsPerUrl: 2
47
+ }
48
+ },
49
+ {
50
+ headers: {
51
+ 'x-api-key': this.apiKey,
52
+ 'Content-Type': 'application/json',
53
+ },
54
+ }
55
+ );
56
+
57
+ if (!response.data || !response.data.results) {
58
+ elizaLogger.warn('[Exa] No results found');
59
+ return [];
60
+ }
61
+
62
+ const results: SearchResult[] = response.data.results.map((result: any, index: number) => ({
63
+ title: result.title || 'Untitled',
64
+ url: result.url,
65
+ snippet: result.summary || result.text?.substring(0, 200) || 'No description available',
66
+ score: result.score || (0.95 - index * 0.05),
67
+ provider: 'exa',
68
+ metadata: {
69
+ language: this.config.language || 'en',
70
+ type: this.config.searchType || 'auto',
71
+ resolvedSearchType: response.data.resolvedSearchType,
72
+ author: result.author,
73
+ publishedDate: result.publishedDate,
74
+ highlights: result.highlights,
75
+ image: result.image,
76
+ favicon: result.favicon,
77
+ },
78
+ }));
79
+
80
+ elizaLogger.info(`[Exa] Found ${results.length} results`);
81
+ return results;
82
+ } catch (error: any) {
83
+ if (error.response) {
84
+ elizaLogger.error(`[Exa] API error: ${error.response.status} - ${JSON.stringify(error.response.data)}`);
85
+ if (error.response.status === 401) {
86
+ throw new Error('Invalid Exa API key');
87
+ }
88
+ if (error.response.status === 429) {
89
+ throw new Error('Exa API rate limit exceeded');
90
+ }
91
+ } else {
92
+ elizaLogger.error(`[Exa] Search error:`, error);
93
+ }
94
+ throw error;
95
+ }
96
+ }
97
+
98
+ async searchAcademic(query: string, maxResults?: number): Promise<SearchResult[]> {
99
+ try {
100
+ elizaLogger.info(`[Exa] Searching academic papers for: ${query}`);
101
+
102
+ const response = await axios.post(
103
+ `${this.baseUrl}/search`,
104
+ {
105
+ query,
106
+ type: 'neural', // Neural search works better for academic content
107
+ category: 'research paper',
108
+ numResults: maxResults || 10,
109
+ text: true,
110
+ summary: {
111
+ query: "Main contributions and findings"
112
+ },
113
+ highlights: {
114
+ numSentences: 5,
115
+ highlightsPerUrl: 3
116
+ }
117
+ },
118
+ {
119
+ headers: {
120
+ 'x-api-key': this.apiKey,
121
+ 'Content-Type': 'application/json',
122
+ },
123
+ }
124
+ );
125
+
126
+ if (!response.data || !response.data.results) {
127
+ elizaLogger.warn('[Exa] No academic results found');
128
+ return [];
129
+ }
130
+
131
+ const results: SearchResult[] = response.data.results.map((result: any, index: number) => ({
132
+ title: result.title || 'Untitled',
133
+ url: result.url,
134
+ snippet: result.summary || result.text?.substring(0, 300) || 'No abstract available',
135
+ score: result.score || (0.95 - index * 0.03),
136
+ provider: 'exa',
137
+ metadata: {
138
+ language: 'en',
139
+ type: 'research_paper',
140
+ category: 'research paper',
141
+ author: result.author,
142
+ publishedDate: result.publishedDate,
143
+ highlights: result.highlights,
144
+ image: result.image,
145
+ },
146
+ }));
147
+
148
+ elizaLogger.info(`[Exa] Found ${results.length} academic results`);
149
+ return results;
150
+ } catch (error: any) {
151
+ elizaLogger.error(`[Exa] Academic search error:`, error);
152
+ throw error;
153
+ }
154
+ }
155
+
156
+ async findSimilar(url: string, maxResults?: number): Promise<SearchResult[]> {
157
+ try {
158
+ elizaLogger.info(`[Exa] Finding similar pages to: ${url}`);
159
+
160
+ const response = await axios.post(
161
+ `${this.baseUrl}/findSimilar`,
162
+ {
163
+ url,
164
+ numResults: maxResults || 10,
165
+ text: true,
166
+ summary: {
167
+ query: "Key similarities and main points"
168
+ }
169
+ },
170
+ {
171
+ headers: {
172
+ 'x-api-key': this.apiKey,
173
+ 'Content-Type': 'application/json',
174
+ },
175
+ }
176
+ );
177
+
178
+ if (!response.data || !response.data.results) {
179
+ elizaLogger.warn('[Exa] No similar results found');
180
+ return [];
181
+ }
182
+
183
+ const results: SearchResult[] = response.data.results.map((result: any, index: number) => ({
184
+ title: result.title || 'Untitled',
185
+ url: result.url,
186
+ snippet: result.summary || result.text?.substring(0, 200) || 'No description available',
187
+ score: result.score || (0.9 - index * 0.05),
188
+ provider: 'exa',
189
+ metadata: {
190
+ language: 'en',
191
+ type: 'similar',
192
+ author: result.author,
193
+ publishedDate: result.publishedDate,
194
+ image: result.image,
195
+ },
196
+ }));
197
+
198
+ elizaLogger.info(`[Exa] Found ${results.length} similar results`);
199
+ return results;
200
+ } catch (error: any) {
201
+ elizaLogger.error(`[Exa] Find similar error:`, error);
202
+ throw error;
203
+ }
204
+ }
205
+ }