@elizaos/plugin-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +400 -0
- package/dist/index.cjs +9366 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +9284 -0
- package/dist/index.js.map +1 -0
- package/package.json +80 -0
- package/src/__tests__/action-chaining.test.ts +532 -0
- package/src/__tests__/actions.test.ts +118 -0
- package/src/__tests__/cache-rate-limiter.test.ts +303 -0
- package/src/__tests__/content-extractors.test.ts +26 -0
- package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
- package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
- package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
- package/src/__tests__/e2e.test.ts +1870 -0
- package/src/__tests__/multi-benchmark-runner.ts +427 -0
- package/src/__tests__/providers.test.ts +156 -0
- package/src/__tests__/real-world.e2e.test.ts +788 -0
- package/src/__tests__/research-scenarios.test.ts +755 -0
- package/src/__tests__/research.e2e.test.ts +704 -0
- package/src/__tests__/research.test.ts +174 -0
- package/src/__tests__/search-providers.test.ts +174 -0
- package/src/__tests__/single-benchmark-runner.ts +735 -0
- package/src/__tests__/test-search-providers.ts +171 -0
- package/src/__tests__/verify-apis.test.ts +82 -0
- package/src/actions.ts +1677 -0
- package/src/benchmark/deepresearch-benchmark.ts +369 -0
- package/src/evaluation/research-evaluator.ts +444 -0
- package/src/examples/api-integration.md +498 -0
- package/src/examples/browserbase-integration.md +132 -0
- package/src/examples/debug-research-query.ts +162 -0
- package/src/examples/defi-code-scenarios.md +536 -0
- package/src/examples/defi-implementation-guide.md +454 -0
- package/src/examples/eliza-research-example.ts +142 -0
- package/src/examples/fix-renewable-energy-research.ts +209 -0
- package/src/examples/research-scenarios.md +408 -0
- package/src/examples/run-complete-renewable-research.ts +303 -0
- package/src/examples/run-deep-research.ts +352 -0
- package/src/examples/run-logged-research.ts +304 -0
- package/src/examples/run-real-research.ts +151 -0
- package/src/examples/save-research-output.ts +133 -0
- package/src/examples/test-file-logging.ts +199 -0
- package/src/examples/test-real-research.ts +67 -0
- package/src/examples/test-renewable-energy-research.ts +229 -0
- package/src/index.ts +28 -0
- package/src/integrations/cache.ts +128 -0
- package/src/integrations/content-extractors/firecrawl.ts +314 -0
- package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
- package/src/integrations/content-extractors/playwright.ts +420 -0
- package/src/integrations/factory.ts +419 -0
- package/src/integrations/index.ts +18 -0
- package/src/integrations/rate-limiter.ts +181 -0
- package/src/integrations/search-providers/academic.ts +290 -0
- package/src/integrations/search-providers/exa.ts +205 -0
- package/src/integrations/search-providers/npm.ts +330 -0
- package/src/integrations/search-providers/pypi.ts +211 -0
- package/src/integrations/search-providers/serpapi.ts +277 -0
- package/src/integrations/search-providers/serper.ts +358 -0
- package/src/integrations/search-providers/stagehand-google.ts +87 -0
- package/src/integrations/search-providers/tavily.ts +187 -0
- package/src/processing/relevance-analyzer.ts +353 -0
- package/src/processing/research-logger.ts +450 -0
- package/src/processing/result-processor.ts +372 -0
- package/src/prompts/research-prompts.ts +419 -0
- package/src/providers/cacheProvider.ts +164 -0
- package/src/providers.ts +173 -0
- package/src/service.ts +2588 -0
- package/src/services/swe-bench.ts +286 -0
- package/src/strategies/research-strategies.ts +790 -0
- package/src/types/pdf-parse.d.ts +34 -0
- package/src/types.ts +551 -0
- package/src/verification/claim-verifier.ts +443 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import { elizaLogger } from '@elizaos/core';
|
|
2
|
+
import { ResearchService } from '../service';
|
|
3
|
+
import { ResearchConfig, ResearchDepth, TaskType, ResearchStatus } from '../types';
|
|
4
|
+
import * as fs from 'fs/promises';
|
|
5
|
+
import * as path from 'path';
|
|
6
|
+
|
|
7
|
+
export interface SWEBenchTask {
|
|
8
|
+
id: string;
|
|
9
|
+
repository: string;
|
|
10
|
+
description: string;
|
|
11
|
+
files: string[];
|
|
12
|
+
expectedBehavior: string;
|
|
13
|
+
testCommand?: string;
|
|
14
|
+
category: 'bug_fix' | 'feature' | 'refactor' | 'documentation';
|
|
15
|
+
difficulty: 'easy' | 'medium' | 'hard';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface SWEBenchResult {
|
|
19
|
+
taskId: string;
|
|
20
|
+
research: any;
|
|
21
|
+
implementation?: string;
|
|
22
|
+
testPassed?: boolean;
|
|
23
|
+
duration: number;
|
|
24
|
+
tokenUsage: number;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export class SWEBenchService {
|
|
28
|
+
private tasks: Map<string, SWEBenchTask> = new Map();
|
|
29
|
+
private results: Map<string, SWEBenchResult> = new Map();
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
private runtime: any,
|
|
33
|
+
private researchService: ResearchService
|
|
34
|
+
) {}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Load SWE-bench TypeScript tasks
|
|
38
|
+
*/
|
|
39
|
+
async loadTasks(tasksPath?: string): Promise<void> {
|
|
40
|
+
const defaultPath = path.join(__dirname, '../../data/swe-bench-tasks.json');
|
|
41
|
+
const filePath = tasksPath || defaultPath;
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
const data = await fs.readFile(filePath, 'utf-8');
|
|
45
|
+
const tasks = JSON.parse(data) as SWEBenchTask[];
|
|
46
|
+
|
|
47
|
+
for (const task of tasks) {
|
|
48
|
+
this.tasks.set(task.id, task);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
elizaLogger.info(`[SWEBench] Loaded ${tasks.length} tasks`);
|
|
52
|
+
} catch (error) {
|
|
53
|
+
elizaLogger.warn('[SWEBench] No tasks file found, using default tasks');
|
|
54
|
+
// Load some default TypeScript-focused tasks
|
|
55
|
+
this.loadDefaultTasks();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Execute a SWE-bench task
|
|
61
|
+
*/
|
|
62
|
+
async executeTask(taskId: string): Promise<SWEBenchResult> {
|
|
63
|
+
const startTime = Date.now();
|
|
64
|
+
const task = this.tasks.get(taskId);
|
|
65
|
+
|
|
66
|
+
if (!task) {
|
|
67
|
+
throw new Error(`Task ${taskId} not found`);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
elizaLogger.info(`[SWEBench] Executing task: ${taskId}`);
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
// Step 1: Research the problem
|
|
74
|
+
const research = await this.researchForTask(task);
|
|
75
|
+
|
|
76
|
+
// Step 2: Generate implementation approach (optional)
|
|
77
|
+
let implementation: string | undefined;
|
|
78
|
+
if (task.category !== 'documentation') {
|
|
79
|
+
implementation = await this.generateImplementation(task, research);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Step 3: Test if possible (simplified for now)
|
|
83
|
+
const testPassed = task.testCommand ? await this.runTests(task) : undefined;
|
|
84
|
+
|
|
85
|
+
const result: SWEBenchResult = {
|
|
86
|
+
taskId,
|
|
87
|
+
research,
|
|
88
|
+
implementation,
|
|
89
|
+
testPassed,
|
|
90
|
+
duration: Date.now() - startTime,
|
|
91
|
+
tokenUsage: 0 // TODO: Track actual usage
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
this.results.set(taskId, result);
|
|
95
|
+
return result;
|
|
96
|
+
|
|
97
|
+
} catch (error) {
|
|
98
|
+
elizaLogger.error(`[SWEBench] Task ${taskId} failed:`, error);
|
|
99
|
+
throw error;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Research for a specific task
|
|
105
|
+
*/
|
|
106
|
+
private async researchForTask(task: SWEBenchTask): Promise<any> {
|
|
107
|
+
// Build a research query based on the task
|
|
108
|
+
const query = this.buildResearchQuery(task);
|
|
109
|
+
|
|
110
|
+
// Configure research based on task difficulty
|
|
111
|
+
const config: Partial<ResearchConfig> = {
|
|
112
|
+
researchDepth: this.getDepthForDifficulty(task.difficulty),
|
|
113
|
+
maxDepth: task.difficulty === 'hard' ? 3 : 1,
|
|
114
|
+
maxSearchResults: task.difficulty === 'hard' ? 30 : 20
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Start research project
|
|
118
|
+
const project = await this.researchService.createResearchProject(query, config);
|
|
119
|
+
|
|
120
|
+
// Wait for completion
|
|
121
|
+
const projectId = project.id;
|
|
122
|
+
let currentProject = project;
|
|
123
|
+
do {
|
|
124
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
125
|
+
const updated = await this.researchService.getProject(projectId);
|
|
126
|
+
if (updated) {
|
|
127
|
+
currentProject = updated;
|
|
128
|
+
}
|
|
129
|
+
} while (currentProject.status === ResearchStatus.ACTIVE);
|
|
130
|
+
|
|
131
|
+
// Return the final project
|
|
132
|
+
return currentProject;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Build research query from task
|
|
137
|
+
*/
|
|
138
|
+
private buildResearchQuery(task: SWEBenchTask): string {
|
|
139
|
+
const parts = [
|
|
140
|
+
task.description,
|
|
141
|
+
`Repository: ${task.repository}`,
|
|
142
|
+
task.files.length > 0 ? `Related files: ${task.files.join(', ')}` : '',
|
|
143
|
+
`Expected: ${task.expectedBehavior}`
|
|
144
|
+
].filter(Boolean);
|
|
145
|
+
|
|
146
|
+
return parts.join('. ');
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Generate implementation based on research
|
|
151
|
+
*/
|
|
152
|
+
private async generateImplementation(task: SWEBenchTask, research: any): Promise<string> {
|
|
153
|
+
// Simplified implementation generation
|
|
154
|
+
// In a real system, this would use the research to generate actual code
|
|
155
|
+
return `// Implementation for ${task.id}\n// Based on research findings\n// TODO: Actual implementation`;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Run tests for a task (simplified)
|
|
160
|
+
*/
|
|
161
|
+
private async runTests(task: SWEBenchTask): Promise<boolean> {
|
|
162
|
+
// In a real implementation, this would execute the test command
|
|
163
|
+
// For now, return a mock result
|
|
164
|
+
return Math.random() > 0.3; // 70% pass rate
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Get research depth based on difficulty
|
|
169
|
+
*/
|
|
170
|
+
private getDepthForDifficulty(difficulty: string): ResearchDepth {
|
|
171
|
+
switch (difficulty) {
|
|
172
|
+
case 'easy': return ResearchDepth.SURFACE;
|
|
173
|
+
case 'medium': return ResearchDepth.MODERATE;
|
|
174
|
+
case 'hard': return ResearchDepth.DEEP;
|
|
175
|
+
default: return ResearchDepth.MODERATE;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Get task type based on category
|
|
181
|
+
*/
|
|
182
|
+
private getTaskTypeForCategory(category: string): TaskType {
|
|
183
|
+
switch (category) {
|
|
184
|
+
case 'bug_fix': return TaskType.ANALYTICAL;
|
|
185
|
+
case 'feature': return TaskType.EXPLORATORY;
|
|
186
|
+
case 'refactor': return TaskType.EVALUATIVE;
|
|
187
|
+
case 'documentation': return TaskType.SYNTHETIC;
|
|
188
|
+
default: return TaskType.EXPLORATORY;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Load default TypeScript-focused tasks
|
|
194
|
+
*/
|
|
195
|
+
private loadDefaultTasks(): void {
|
|
196
|
+
const defaultTasks: SWEBenchTask[] = [
|
|
197
|
+
{
|
|
198
|
+
id: 'ts-express-middleware',
|
|
199
|
+
repository: 'expressjs/express',
|
|
200
|
+
description: 'Research how to implement custom TypeScript middleware in Express with proper type safety',
|
|
201
|
+
files: ['lib/router/index.js', 'lib/middleware/init.js'],
|
|
202
|
+
expectedBehavior: 'Understand middleware typing patterns and best practices',
|
|
203
|
+
category: 'feature',
|
|
204
|
+
difficulty: 'medium'
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
id: 'ts-typeorm-relations',
|
|
208
|
+
repository: 'typeorm/typeorm',
|
|
209
|
+
description: 'Research TypeORM many-to-many relations with custom join table properties',
|
|
210
|
+
files: ['src/decorator/relations/ManyToMany.ts', 'src/metadata/RelationMetadata.ts'],
|
|
211
|
+
expectedBehavior: 'Understand how to implement complex relations with TypeORM',
|
|
212
|
+
category: 'feature',
|
|
213
|
+
difficulty: 'hard'
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
id: 'ts-zod-validation',
|
|
217
|
+
repository: 'colinhacks/zod',
|
|
218
|
+
description: 'Research how Zod implements recursive schema validation',
|
|
219
|
+
files: ['src/types.ts', 'src/ZodError.ts'],
|
|
220
|
+
expectedBehavior: 'Understand Zod\'s validation architecture',
|
|
221
|
+
category: 'bug_fix',
|
|
222
|
+
difficulty: 'medium'
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
id: 'ts-prisma-migrations',
|
|
226
|
+
repository: 'prisma/prisma',
|
|
227
|
+
description: 'Research Prisma migration system and how it handles schema changes',
|
|
228
|
+
files: ['packages/migrate/src/commands/MigrateDev.ts'],
|
|
229
|
+
expectedBehavior: 'Understand Prisma\'s migration strategy',
|
|
230
|
+
category: 'refactor',
|
|
231
|
+
difficulty: 'hard'
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
id: 'ts-async-patterns',
|
|
235
|
+
repository: 'nodejs/node',
|
|
236
|
+
description: 'Research best practices for async/await error handling in Node.js',
|
|
237
|
+
files: ['lib/async_hooks.js', 'lib/internal/async_hooks.js'],
|
|
238
|
+
expectedBehavior: 'Document async error handling patterns',
|
|
239
|
+
category: 'documentation',
|
|
240
|
+
difficulty: 'easy'
|
|
241
|
+
}
|
|
242
|
+
];
|
|
243
|
+
|
|
244
|
+
for (const task of defaultTasks) {
|
|
245
|
+
this.tasks.set(task.id, task);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get all available tasks
|
|
251
|
+
*/
|
|
252
|
+
getTasks(): SWEBenchTask[] {
|
|
253
|
+
return Array.from(this.tasks.values());
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Get results for a task
|
|
258
|
+
*/
|
|
259
|
+
getResult(taskId: string): SWEBenchResult | undefined {
|
|
260
|
+
return this.results.get(taskId);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Evaluate overall performance
|
|
265
|
+
*/
|
|
266
|
+
evaluatePerformance(): {
|
|
267
|
+
totalTasks: number;
|
|
268
|
+
completedTasks: number;
|
|
269
|
+
passRate: number;
|
|
270
|
+
avgDuration: number;
|
|
271
|
+
avgTokenUsage: number;
|
|
272
|
+
} {
|
|
273
|
+
const results = Array.from(this.results.values());
|
|
274
|
+
const passed = results.filter(r => r.testPassed === true).length;
|
|
275
|
+
const totalDuration = results.reduce((sum, r) => sum + r.duration, 0);
|
|
276
|
+
const totalTokens = results.reduce((sum, r) => sum + r.tokenUsage, 0);
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
totalTasks: this.tasks.size,
|
|
280
|
+
completedTasks: results.length,
|
|
281
|
+
passRate: results.length > 0 ? passed / results.length : 0,
|
|
282
|
+
avgDuration: results.length > 0 ? totalDuration / results.length : 0,
|
|
283
|
+
avgTokenUsage: results.length > 0 ? totalTokens / results.length : 0
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
}
|