@elizaos/plugin-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +400 -0
- package/dist/index.cjs +9366 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +9284 -0
- package/dist/index.js.map +1 -0
- package/package.json +80 -0
- package/src/__tests__/action-chaining.test.ts +532 -0
- package/src/__tests__/actions.test.ts +118 -0
- package/src/__tests__/cache-rate-limiter.test.ts +303 -0
- package/src/__tests__/content-extractors.test.ts +26 -0
- package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
- package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
- package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
- package/src/__tests__/e2e.test.ts +1870 -0
- package/src/__tests__/multi-benchmark-runner.ts +427 -0
- package/src/__tests__/providers.test.ts +156 -0
- package/src/__tests__/real-world.e2e.test.ts +788 -0
- package/src/__tests__/research-scenarios.test.ts +755 -0
- package/src/__tests__/research.e2e.test.ts +704 -0
- package/src/__tests__/research.test.ts +174 -0
- package/src/__tests__/search-providers.test.ts +174 -0
- package/src/__tests__/single-benchmark-runner.ts +735 -0
- package/src/__tests__/test-search-providers.ts +171 -0
- package/src/__tests__/verify-apis.test.ts +82 -0
- package/src/actions.ts +1677 -0
- package/src/benchmark/deepresearch-benchmark.ts +369 -0
- package/src/evaluation/research-evaluator.ts +444 -0
- package/src/examples/api-integration.md +498 -0
- package/src/examples/browserbase-integration.md +132 -0
- package/src/examples/debug-research-query.ts +162 -0
- package/src/examples/defi-code-scenarios.md +536 -0
- package/src/examples/defi-implementation-guide.md +454 -0
- package/src/examples/eliza-research-example.ts +142 -0
- package/src/examples/fix-renewable-energy-research.ts +209 -0
- package/src/examples/research-scenarios.md +408 -0
- package/src/examples/run-complete-renewable-research.ts +303 -0
- package/src/examples/run-deep-research.ts +352 -0
- package/src/examples/run-logged-research.ts +304 -0
- package/src/examples/run-real-research.ts +151 -0
- package/src/examples/save-research-output.ts +133 -0
- package/src/examples/test-file-logging.ts +199 -0
- package/src/examples/test-real-research.ts +67 -0
- package/src/examples/test-renewable-energy-research.ts +229 -0
- package/src/index.ts +28 -0
- package/src/integrations/cache.ts +128 -0
- package/src/integrations/content-extractors/firecrawl.ts +314 -0
- package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
- package/src/integrations/content-extractors/playwright.ts +420 -0
- package/src/integrations/factory.ts +419 -0
- package/src/integrations/index.ts +18 -0
- package/src/integrations/rate-limiter.ts +181 -0
- package/src/integrations/search-providers/academic.ts +290 -0
- package/src/integrations/search-providers/exa.ts +205 -0
- package/src/integrations/search-providers/npm.ts +330 -0
- package/src/integrations/search-providers/pypi.ts +211 -0
- package/src/integrations/search-providers/serpapi.ts +277 -0
- package/src/integrations/search-providers/serper.ts +358 -0
- package/src/integrations/search-providers/stagehand-google.ts +87 -0
- package/src/integrations/search-providers/tavily.ts +187 -0
- package/src/processing/relevance-analyzer.ts +353 -0
- package/src/processing/research-logger.ts +450 -0
- package/src/processing/result-processor.ts +372 -0
- package/src/prompts/research-prompts.ts +419 -0
- package/src/providers/cacheProvider.ts +164 -0
- package/src/providers.ts +173 -0
- package/src/service.ts +2588 -0
- package/src/services/swe-bench.ts +286 -0
- package/src/strategies/research-strategies.ts +790 -0
- package/src/types/pdf-parse.d.ts +34 -0
- package/src/types.ts +551 -0
- package/src/verification/claim-verifier.ts +443 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
import { IAgentRuntime, Memory, elizaLogger } from '@elizaos/core';
|
|
2
|
+
import { ResearchService } from '../service';
|
|
3
|
+
import { ResearchProject, ResearchStatus } from '../types';
|
|
4
|
+
|
|
5
|
+
// Test helper to wait for research completion with longer timeout for real operations
|
|
6
|
+
async function waitForResearchCompletion(
|
|
7
|
+
service: ResearchService,
|
|
8
|
+
projectId: string,
|
|
9
|
+
maxWaitTime: number = 300000 // 5 minutes for real web operations
|
|
10
|
+
): Promise<boolean> {
|
|
11
|
+
const startTime = Date.now();
|
|
12
|
+
|
|
13
|
+
while (Date.now() - startTime < maxWaitTime) {
|
|
14
|
+
const project = await service.getProject(projectId);
|
|
15
|
+
if (!project) return false;
|
|
16
|
+
|
|
17
|
+
if (project.status === ResearchStatus.COMPLETED ||
|
|
18
|
+
project.status === ResearchStatus.FAILED) {
|
|
19
|
+
return project.status === ResearchStatus.COMPLETED;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Wait 5 seconds before checking again (longer for real operations)
|
|
23
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Helper to validate research quality
|
|
30
|
+
function validateResearchQuality(project: ResearchProject): {
|
|
31
|
+
isValid: boolean;
|
|
32
|
+
issues: string[];
|
|
33
|
+
} {
|
|
34
|
+
const issues: string[] = [];
|
|
35
|
+
|
|
36
|
+
if (project.sources.length === 0) {
|
|
37
|
+
issues.push('No sources found');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (project.findings.length === 0) {
|
|
41
|
+
issues.push('No findings collected');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Check for diverse sources
|
|
45
|
+
const uniqueDomains = new Set(
|
|
46
|
+
project.sources.map(s => {
|
|
47
|
+
try {
|
|
48
|
+
return new URL(s.url).hostname;
|
|
49
|
+
} catch {
|
|
50
|
+
return 'unknown';
|
|
51
|
+
}
|
|
52
|
+
})
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
if (uniqueDomains.size < 2 && project.sources.length > 2) {
|
|
56
|
+
issues.push('Sources lack diversity (all from same domain)');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Check findings have reasonable content
|
|
60
|
+
const avgFindingLength = project.findings.reduce((sum, f) => sum + f.content.length, 0) / (project.findings.length || 1);
|
|
61
|
+
if (avgFindingLength < 100) {
|
|
62
|
+
issues.push('Findings are too short (average < 100 chars)');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Check relevance scores
|
|
66
|
+
const avgRelevance = project.findings.reduce((sum, f) => sum + f.relevance, 0) / (project.findings.length || 1);
|
|
67
|
+
if (avgRelevance < 0.3) {
|
|
68
|
+
issues.push('Low average relevance score');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
isValid: issues.length === 0,
|
|
73
|
+
issues
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Test 1: Code/Feature Research - Simulating research before building a feature
|
|
78
|
+
export async function testCodeFeatureResearch(runtime: IAgentRuntime): Promise<void> {
|
|
79
|
+
elizaLogger.info('Starting E2E Test: Code/Feature Research');
|
|
80
|
+
|
|
81
|
+
const service = runtime.getService<ResearchService>('research');
|
|
82
|
+
if (!service) {
|
|
83
|
+
throw new Error('Research service not available');
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Research query that a developer would make before implementing a feature
|
|
87
|
+
const featureQuery = 'How to implement OAuth2 authentication in Node.js with TypeScript best practices security 2024';
|
|
88
|
+
|
|
89
|
+
const project = await service.createResearchProject(featureQuery, {
|
|
90
|
+
maxSearchResults: 5,
|
|
91
|
+
language: 'en',
|
|
92
|
+
metadata: {
|
|
93
|
+
researchType: 'technical',
|
|
94
|
+
purpose: 'feature_implementation'
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
elizaLogger.info(`Created code research project: ${project.id}`);
|
|
99
|
+
|
|
100
|
+
// Monitor progress
|
|
101
|
+
let lastUpdate = Date.now();
|
|
102
|
+
let phaseProgress: Record<string, number> = {};
|
|
103
|
+
|
|
104
|
+
const checkProgress = setInterval(async () => {
|
|
105
|
+
const current = await service.getProject(project.id);
|
|
106
|
+
if (!current) return;
|
|
107
|
+
|
|
108
|
+
if (!phaseProgress[current.phase]) {
|
|
109
|
+
phaseProgress[current.phase] = Date.now() - lastUpdate;
|
|
110
|
+
elizaLogger.info(`Phase ${current.phase} started after ${phaseProgress[current.phase]}ms`);
|
|
111
|
+
lastUpdate = Date.now();
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (current.status === ResearchStatus.COMPLETED ||
|
|
115
|
+
current.status === ResearchStatus.FAILED) {
|
|
116
|
+
clearInterval(checkProgress);
|
|
117
|
+
}
|
|
118
|
+
}, 3000);
|
|
119
|
+
|
|
120
|
+
// Wait for completion
|
|
121
|
+
const completed = await waitForResearchCompletion(service, project.id, 180000);
|
|
122
|
+
clearInterval(checkProgress);
|
|
123
|
+
|
|
124
|
+
if (!completed) {
|
|
125
|
+
throw new Error('Code research did not complete within timeout');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const finalProject = await service.getProject(project.id);
|
|
129
|
+
if (!finalProject) {
|
|
130
|
+
throw new Error('Could not retrieve completed project');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Validate research quality
|
|
134
|
+
const quality = validateResearchQuality(finalProject);
|
|
135
|
+
if (!quality.isValid) {
|
|
136
|
+
elizaLogger.warn(`Research quality issues: ${quality.issues.join(', ')}`);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Verify we found technical content
|
|
140
|
+
const hasCodeExamples = finalProject.findings.some(f =>
|
|
141
|
+
f.content.includes('npm') ||
|
|
142
|
+
f.content.includes('const') ||
|
|
143
|
+
f.content.includes('import') ||
|
|
144
|
+
f.content.includes('OAuth')
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
if (!hasCodeExamples) {
|
|
148
|
+
elizaLogger.warn('No code examples found in technical research');
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Check for security considerations (important for auth research)
|
|
152
|
+
const hasSecurityInfo = finalProject.findings.some(f =>
|
|
153
|
+
f.content.toLowerCase().includes('security') ||
|
|
154
|
+
f.content.toLowerCase().includes('csrf') ||
|
|
155
|
+
f.content.toLowerCase().includes('token')
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
if (!hasSecurityInfo) {
|
|
159
|
+
elizaLogger.warn('No security information found in OAuth research');
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
elizaLogger.info(`Code research completed with ${finalProject.sources.length} sources and ${finalProject.findings.length} findings`);
|
|
163
|
+
elizaLogger.info(`Found code examples: ${hasCodeExamples}, Security info: ${hasSecurityInfo}`);
|
|
164
|
+
|
|
165
|
+
if (finalProject.report) {
|
|
166
|
+
elizaLogger.info(`Report generated with ${finalProject.report.sections.length} sections`);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
elizaLogger.success('E2E Test Passed: Code/Feature Research');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Test 2: Person Research - Researching a public figure
|
|
173
|
+
export async function testPersonResearch(runtime: IAgentRuntime): Promise<void> {
|
|
174
|
+
elizaLogger.info('Starting E2E Test: Person Research');
|
|
175
|
+
|
|
176
|
+
const service = runtime.getService<ResearchService>('research');
|
|
177
|
+
if (!service) {
|
|
178
|
+
throw new Error('Research service not available');
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Research a well-known tech figure
|
|
182
|
+
const personQuery = 'Vitalik Buterin Ethereum founder recent projects writings 2024';
|
|
183
|
+
|
|
184
|
+
const project = await service.createResearchProject(personQuery, {
|
|
185
|
+
maxSearchResults: 4,
|
|
186
|
+
metadata: {
|
|
187
|
+
researchType: 'person',
|
|
188
|
+
purpose: 'background_research'
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
elizaLogger.info(`Created person research project: ${project.id}`);
|
|
193
|
+
|
|
194
|
+
// Wait for completion
|
|
195
|
+
const completed = await waitForResearchCompletion(service, project.id, 150000);
|
|
196
|
+
|
|
197
|
+
if (!completed) {
|
|
198
|
+
const partial = await service.getProject(project.id);
|
|
199
|
+
if (partial && partial.findings.length > 0) {
|
|
200
|
+
elizaLogger.warn(`Person research incomplete but found ${partial.findings.length} findings`);
|
|
201
|
+
} else {
|
|
202
|
+
throw new Error('Person research did not complete and no findings collected');
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const finalProject = await service.getProject(project.id);
|
|
207
|
+
if (!finalProject) {
|
|
208
|
+
throw new Error('Could not retrieve completed project');
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Validate we found relevant information about the person
|
|
212
|
+
const relevantFindings = finalProject.findings.filter(f =>
|
|
213
|
+
f.content.toLowerCase().includes('vitalik') ||
|
|
214
|
+
f.content.toLowerCase().includes('ethereum') ||
|
|
215
|
+
f.content.toLowerCase().includes('buterin')
|
|
216
|
+
);
|
|
217
|
+
|
|
218
|
+
if (relevantFindings.length === 0) {
|
|
219
|
+
throw new Error('No relevant findings about the person');
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Check for recent information
|
|
223
|
+
const hasRecentInfo = finalProject.findings.some(f =>
|
|
224
|
+
f.content.includes('2024') ||
|
|
225
|
+
f.content.includes('2023') ||
|
|
226
|
+
f.content.toLowerCase().includes('recent')
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
elizaLogger.info(`Person research completed: ${relevantFindings.length}/${finalProject.findings.length} relevant findings`);
|
|
230
|
+
elizaLogger.info(`Found recent information: ${hasRecentInfo}`);
|
|
231
|
+
|
|
232
|
+
// Sample a finding
|
|
233
|
+
if (relevantFindings.length > 0) {
|
|
234
|
+
const sample = relevantFindings[0].content.substring(0, 200);
|
|
235
|
+
elizaLogger.info(`Sample finding: "${sample}..."`);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
elizaLogger.success('E2E Test Passed: Person Research');
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Test 3: News/Current Events Research
|
|
242
|
+
export async function testNewsResearch(runtime: IAgentRuntime): Promise<void> {
|
|
243
|
+
elizaLogger.info('Starting E2E Test: News/Current Events Research');
|
|
244
|
+
|
|
245
|
+
const service = runtime.getService<ResearchService>('research');
|
|
246
|
+
if (!service) {
|
|
247
|
+
throw new Error('Research service not available');
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Research current AI developments
|
|
251
|
+
const newsQuery = 'latest artificial intelligence breakthroughs news December 2024 ChatGPT Claude Gemini';
|
|
252
|
+
|
|
253
|
+
const project = await service.createResearchProject(newsQuery, {
|
|
254
|
+
maxSearchResults: 5,
|
|
255
|
+
metadata: {
|
|
256
|
+
researchType: 'news',
|
|
257
|
+
timeframe: 'current',
|
|
258
|
+
purpose: 'market_intelligence'
|
|
259
|
+
}
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
elizaLogger.info(`Created news research project: ${project.id}`);
|
|
263
|
+
|
|
264
|
+
// Monitor for news-specific content
|
|
265
|
+
let checkCount = 0;
|
|
266
|
+
const newsCheckInterval = setInterval(async () => {
|
|
267
|
+
checkCount++;
|
|
268
|
+
const current = await service.getProject(project.id);
|
|
269
|
+
if (!current) return;
|
|
270
|
+
|
|
271
|
+
// Check if we're finding news sources
|
|
272
|
+
const newsSource = current.sources.find(s =>
|
|
273
|
+
s.url.includes('news') ||
|
|
274
|
+
s.url.includes('article') ||
|
|
275
|
+
s.url.includes('blog') ||
|
|
276
|
+
s.title.toLowerCase().includes('2024')
|
|
277
|
+
);
|
|
278
|
+
|
|
279
|
+
if (newsSource && checkCount === 1) {
|
|
280
|
+
elizaLogger.info(`Found news source: ${newsSource.title}`);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (current.status === ResearchStatus.COMPLETED ||
|
|
284
|
+
current.status === ResearchStatus.FAILED ||
|
|
285
|
+
checkCount > 30) {
|
|
286
|
+
clearInterval(newsCheckInterval);
|
|
287
|
+
}
|
|
288
|
+
}, 5000);
|
|
289
|
+
|
|
290
|
+
// Wait for completion
|
|
291
|
+
const completed = await waitForResearchCompletion(service, project.id, 180000);
|
|
292
|
+
clearInterval(newsCheckInterval);
|
|
293
|
+
|
|
294
|
+
const finalProject = await service.getProject(project.id);
|
|
295
|
+
if (!finalProject) {
|
|
296
|
+
throw new Error('Could not retrieve completed project');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Validate news research quality
|
|
300
|
+
const quality = validateResearchQuality(finalProject);
|
|
301
|
+
|
|
302
|
+
// Check for AI-related content
|
|
303
|
+
const aiFindings = finalProject.findings.filter(f => {
|
|
304
|
+
const content = f.content.toLowerCase();
|
|
305
|
+
return content.includes('ai') ||
|
|
306
|
+
content.includes('artificial intelligence') ||
|
|
307
|
+
content.includes('chatgpt') ||
|
|
308
|
+
content.includes('claude') ||
|
|
309
|
+
content.includes('gemini') ||
|
|
310
|
+
content.includes('machine learning');
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
if (aiFindings.length === 0) {
|
|
314
|
+
elizaLogger.warn('No AI-related findings in news research');
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Check for recent dates
|
|
318
|
+
const hasRecentDates = finalProject.findings.some(f => {
|
|
319
|
+
const content = f.content;
|
|
320
|
+
return content.includes('2024') ||
|
|
321
|
+
content.includes('December') ||
|
|
322
|
+
content.includes('November') ||
|
|
323
|
+
content.includes('recent');
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
elizaLogger.info(`News research completed: ${aiFindings.length} AI-related findings out of ${finalProject.findings.length} total`);
|
|
327
|
+
elizaLogger.info(`Contains recent dates: ${hasRecentDates}`);
|
|
328
|
+
elizaLogger.info(`Research quality: ${quality.isValid ? 'Good' : 'Issues: ' + quality.issues.join(', ')}`);
|
|
329
|
+
|
|
330
|
+
elizaLogger.success('E2E Test Passed: News/Current Events Research');
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Test 4: Technical Documentation Research
|
|
334
|
+
export async function testDocumentationResearch(runtime: IAgentRuntime): Promise<void> {
|
|
335
|
+
elizaLogger.info('Starting E2E Test: Technical Documentation Research');
|
|
336
|
+
|
|
337
|
+
const service = runtime.getService<ResearchService>('research');
|
|
338
|
+
if (!service) {
|
|
339
|
+
throw new Error('Research service not available');
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Research technical documentation
|
|
343
|
+
const docQuery = 'React Server Components documentation examples best practices performance optimization';
|
|
344
|
+
|
|
345
|
+
const project = await service.createResearchProject(docQuery, {
|
|
346
|
+
maxSearchResults: 4,
|
|
347
|
+
metadata: {
|
|
348
|
+
researchType: 'documentation',
|
|
349
|
+
purpose: 'learning'
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
elizaLogger.info(`Created documentation research project: ${project.id}`);
|
|
354
|
+
|
|
355
|
+
// Wait for completion
|
|
356
|
+
const completed = await waitForResearchCompletion(service, project.id, 150000);
|
|
357
|
+
|
|
358
|
+
const finalProject = await service.getProject(project.id);
|
|
359
|
+
if (!finalProject) {
|
|
360
|
+
throw new Error('Could not retrieve completed project');
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Check for documentation-specific content
|
|
364
|
+
const hasDocSources = finalProject.sources.some(s =>
|
|
365
|
+
s.url.includes('react.dev') ||
|
|
366
|
+
s.url.includes('docs') ||
|
|
367
|
+
s.url.includes('documentation') ||
|
|
368
|
+
s.url.includes('github.com')
|
|
369
|
+
);
|
|
370
|
+
|
|
371
|
+
const hasCodeExamples = finalProject.findings.some(f =>
|
|
372
|
+
f.content.includes('```') ||
|
|
373
|
+
f.content.includes('<') ||
|
|
374
|
+
f.content.includes('/>') ||
|
|
375
|
+
f.content.includes('function') ||
|
|
376
|
+
f.content.includes('const')
|
|
377
|
+
);
|
|
378
|
+
|
|
379
|
+
const hasBestPractices = finalProject.findings.some(f =>
|
|
380
|
+
f.content.toLowerCase().includes('best practice') ||
|
|
381
|
+
f.content.toLowerCase().includes('recommendation') ||
|
|
382
|
+
f.content.toLowerCase().includes('should') ||
|
|
383
|
+
f.content.toLowerCase().includes('avoid')
|
|
384
|
+
);
|
|
385
|
+
|
|
386
|
+
elizaLogger.info(`Documentation research completed:`);
|
|
387
|
+
elizaLogger.info(`- Found documentation sources: ${hasDocSources}`);
|
|
388
|
+
elizaLogger.info(`- Contains code examples: ${hasCodeExamples}`);
|
|
389
|
+
elizaLogger.info(`- Includes best practices: ${hasBestPractices}`);
|
|
390
|
+
elizaLogger.info(`- Total findings: ${finalProject.findings.length}`);
|
|
391
|
+
|
|
392
|
+
elizaLogger.success('E2E Test Passed: Technical Documentation Research');
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Test 5: Competitive Analysis Research
|
|
396
|
+
export async function testCompetitiveAnalysis(runtime: IAgentRuntime): Promise<void> {
|
|
397
|
+
elizaLogger.info('Starting E2E Test: Competitive Analysis Research');
|
|
398
|
+
|
|
399
|
+
const service = runtime.getService<ResearchService>('research');
|
|
400
|
+
if (!service) {
|
|
401
|
+
throw new Error('Research service not available');
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Research competitors in the AI agent space
|
|
405
|
+
const competitorQuery = 'AI agent frameworks comparison AutoGPT BabyAGI LangChain CrewAI features pricing';
|
|
406
|
+
|
|
407
|
+
const project = await service.createResearchProject(competitorQuery, {
|
|
408
|
+
maxSearchResults: 5,
|
|
409
|
+
metadata: {
|
|
410
|
+
researchType: 'competitive_analysis',
|
|
411
|
+
purpose: 'market_research'
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
elizaLogger.info(`Created competitive analysis project: ${project.id}`);
|
|
416
|
+
|
|
417
|
+
// Wait for completion
|
|
418
|
+
const completed = await waitForResearchCompletion(service, project.id, 180000);
|
|
419
|
+
|
|
420
|
+
const finalProject = await service.getProject(project.id);
|
|
421
|
+
if (!finalProject) {
|
|
422
|
+
throw new Error('Could not retrieve completed project');
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Check for competitor mentions
|
|
426
|
+
const competitors = ['AutoGPT', 'BabyAGI', 'LangChain', 'CrewAI'];
|
|
427
|
+
const competitorMentions: Record<string, number> = {};
|
|
428
|
+
|
|
429
|
+
competitors.forEach(competitor => {
|
|
430
|
+
competitorMentions[competitor] = finalProject.findings.filter(f =>
|
|
431
|
+
f.content.toLowerCase().includes(competitor.toLowerCase())
|
|
432
|
+
).length;
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
const totalMentions = Object.values(competitorMentions).reduce((a, b) => a + b, 0);
|
|
436
|
+
|
|
437
|
+
// Check for comparison content
|
|
438
|
+
const hasComparison = finalProject.findings.some(f => {
|
|
439
|
+
const content = f.content.toLowerCase();
|
|
440
|
+
return content.includes('compar') ||
|
|
441
|
+
content.includes('versus') ||
|
|
442
|
+
content.includes('vs') ||
|
|
443
|
+
content.includes('better') ||
|
|
444
|
+
content.includes('advantage');
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
// Check for feature analysis
|
|
448
|
+
const hasFeatures = finalProject.findings.some(f => {
|
|
449
|
+
const content = f.content.toLowerCase();
|
|
450
|
+
return content.includes('feature') ||
|
|
451
|
+
content.includes('capability') ||
|
|
452
|
+
content.includes('functionality') ||
|
|
453
|
+
content.includes('support');
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
elizaLogger.info(`Competitive analysis completed:`);
|
|
457
|
+
elizaLogger.info(`- Competitor mentions: ${JSON.stringify(competitorMentions)}`);
|
|
458
|
+
elizaLogger.info(`- Total competitor mentions: ${totalMentions}`);
|
|
459
|
+
elizaLogger.info(`- Contains comparisons: ${hasComparison}`);
|
|
460
|
+
elizaLogger.info(`- Analyzes features: ${hasFeatures}`);
|
|
461
|
+
|
|
462
|
+
if (totalMentions < 2) {
|
|
463
|
+
elizaLogger.warn('Limited competitor information found');
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
elizaLogger.success('E2E Test Passed: Competitive Analysis Research');
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Test 6: DeFi Research Integration
|
|
470
|
+
export async function testDeFiResearch(runtime: IAgentRuntime): Promise<void> {
|
|
471
|
+
elizaLogger.info('Starting E2E Test: DeFi Research Integration');
|
|
472
|
+
|
|
473
|
+
const service = runtime.getService<ResearchService>('research');
|
|
474
|
+
if (!service) {
|
|
475
|
+
throw new Error('Research service not available');
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Import DeFi action to test
|
|
479
|
+
const { defiSecurityResearchAction } = await import('../actions/defi-actions');
|
|
480
|
+
|
|
481
|
+
// Create a mock message for DeFi research
|
|
482
|
+
const mockMessage: Memory = {
|
|
483
|
+
id: '00000000-0000-0000-0000-000000000001' as `${string}-${string}-${string}-${string}-${string}`,
|
|
484
|
+
entityId: '00000000-0000-0000-0000-000000000002' as `${string}-${string}-${string}-${string}-${string}`,
|
|
485
|
+
roomId: '00000000-0000-0000-0000-000000000003' as `${string}-${string}-${string}-${string}-${string}`,
|
|
486
|
+
content: {
|
|
487
|
+
text: 'Research smart contract security vulnerabilities and audit best practices for DeFi protocols'
|
|
488
|
+
},
|
|
489
|
+
createdAt: Date.now()
|
|
490
|
+
};
|
|
491
|
+
|
|
492
|
+
// Execute DeFi security research
|
|
493
|
+
const result = await defiSecurityResearchAction.handler(
|
|
494
|
+
runtime,
|
|
495
|
+
mockMessage,
|
|
496
|
+
undefined,
|
|
497
|
+
{}
|
|
498
|
+
);
|
|
499
|
+
|
|
500
|
+
if (!result || typeof result !== 'object') {
|
|
501
|
+
throw new Error('DeFi research action returned invalid result');
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Extract project ID from response
|
|
505
|
+
const responseText = (result as any).text || '';
|
|
506
|
+
const projectIdMatch = responseText.match(/([a-f0-9-]{36})/);
|
|
507
|
+
|
|
508
|
+
if (!projectIdMatch) {
|
|
509
|
+
throw new Error('Could not extract project ID from DeFi research response');
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
const projectId = projectIdMatch[1];
|
|
513
|
+
elizaLogger.info(`DeFi research started with project ID: ${projectId}`);
|
|
514
|
+
|
|
515
|
+
// Monitor DeFi-specific findings
|
|
516
|
+
let defiCheckCount = 0;
|
|
517
|
+
const defiCheckInterval = setInterval(async () => {
|
|
518
|
+
defiCheckCount++;
|
|
519
|
+
const current = await service.getProject(projectId);
|
|
520
|
+
if (!current) return;
|
|
521
|
+
|
|
522
|
+
// Check for DeFi-specific content
|
|
523
|
+
const defiFindings = current.findings.filter(f => {
|
|
524
|
+
const content = f.content.toLowerCase();
|
|
525
|
+
return content.includes('smart contract') ||
|
|
526
|
+
content.includes('defi') ||
|
|
527
|
+
content.includes('audit') ||
|
|
528
|
+
content.includes('vulnerability') ||
|
|
529
|
+
content.includes('security');
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
if (defiFindings.length > 0 && defiCheckCount === 1) {
|
|
533
|
+
elizaLogger.info(`Found ${defiFindings.length} DeFi-specific findings`);
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
if (current.status === ResearchStatus.COMPLETED ||
|
|
537
|
+
current.status === ResearchStatus.FAILED ||
|
|
538
|
+
defiCheckCount > 20) {
|
|
539
|
+
clearInterval(defiCheckInterval);
|
|
540
|
+
}
|
|
541
|
+
}, 5000);
|
|
542
|
+
|
|
543
|
+
// Wait for completion
|
|
544
|
+
const completed = await waitForResearchCompletion(service, projectId, 120000);
|
|
545
|
+
clearInterval(defiCheckInterval);
|
|
546
|
+
|
|
547
|
+
const finalProject = await service.getProject(projectId);
|
|
548
|
+
if (!finalProject) {
|
|
549
|
+
throw new Error('Could not retrieve DeFi research project');
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// Validate DeFi research quality
|
|
553
|
+
const hasSecurityContent = finalProject.findings.some(f =>
|
|
554
|
+
f.content.toLowerCase().includes('security') ||
|
|
555
|
+
f.content.toLowerCase().includes('vulnerability') ||
|
|
556
|
+
f.content.toLowerCase().includes('exploit')
|
|
557
|
+
);
|
|
558
|
+
|
|
559
|
+
const hasAuditContent = finalProject.findings.some(f =>
|
|
560
|
+
f.content.toLowerCase().includes('audit') ||
|
|
561
|
+
f.content.toLowerCase().includes('review') ||
|
|
562
|
+
f.content.toLowerCase().includes('verification')
|
|
563
|
+
);
|
|
564
|
+
|
|
565
|
+
const hasCodeExamples = finalProject.findings.some(f =>
|
|
566
|
+
f.content.includes('solidity') ||
|
|
567
|
+
f.content.includes('contract') ||
|
|
568
|
+
f.content.includes('function') ||
|
|
569
|
+
f.content.includes('require')
|
|
570
|
+
);
|
|
571
|
+
|
|
572
|
+
elizaLogger.info(`DeFi research completed:`);
|
|
573
|
+
elizaLogger.info(`- Security content found: ${hasSecurityContent}`);
|
|
574
|
+
elizaLogger.info(`- Audit content found: ${hasAuditContent}`);
|
|
575
|
+
elizaLogger.info(`- Code examples found: ${hasCodeExamples}`);
|
|
576
|
+
elizaLogger.info(`- Total findings: ${finalProject.findings.length}`);
|
|
577
|
+
|
|
578
|
+
elizaLogger.success('E2E Test Passed: DeFi Research Integration');
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// Test 7: Research Quality and Relevance
|
|
582
|
+
export async function testResearchQualityAssurance(runtime: IAgentRuntime): Promise<void> {
|
|
583
|
+
elizaLogger.info('Starting E2E Test: Research Quality Assurance');
|
|
584
|
+
|
|
585
|
+
const service = runtime.getService<ResearchService>('research');
|
|
586
|
+
if (!service) {
|
|
587
|
+
throw new Error('Research service not available');
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// Create a very specific research query
|
|
591
|
+
const specificQuery = 'ElizaOS plugin development tutorial TypeScript examples 2024';
|
|
592
|
+
|
|
593
|
+
const project = await service.createResearchProject(specificQuery, {
|
|
594
|
+
maxSearchResults: 3,
|
|
595
|
+
metadata: {
|
|
596
|
+
researchType: 'tutorial',
|
|
597
|
+
expectedKeywords: ['ElizaOS', 'plugin', 'TypeScript', 'development']
|
|
598
|
+
}
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
elizaLogger.info(`Created quality assurance research project: ${project.id}`);
|
|
602
|
+
|
|
603
|
+
// Wait for completion
|
|
604
|
+
const completed = await waitForResearchCompletion(service, project.id, 120000);
|
|
605
|
+
|
|
606
|
+
const finalProject = await service.getProject(project.id);
|
|
607
|
+
if (!finalProject) {
|
|
608
|
+
throw new Error('Could not retrieve completed project');
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// Detailed quality checks
|
|
612
|
+
const expectedKeywords = ['ElizaOS', 'plugin', 'TypeScript', 'development', 'tutorial'];
|
|
613
|
+
const keywordCoverage: Record<string, number> = {};
|
|
614
|
+
|
|
615
|
+
expectedKeywords.forEach(keyword => {
|
|
616
|
+
keywordCoverage[keyword] = finalProject.findings.filter(f =>
|
|
617
|
+
f.content.toLowerCase().includes(keyword.toLowerCase())
|
|
618
|
+
).length;
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
// Calculate relevance metrics
|
|
622
|
+
const totalFindings = finalProject.findings.length;
|
|
623
|
+
const highRelevanceFindings = finalProject.findings.filter(f => f.relevance > 0.7).length;
|
|
624
|
+
const mediumRelevanceFindings = finalProject.findings.filter(f => f.relevance > 0.4 && f.relevance <= 0.7).length;
|
|
625
|
+
const lowRelevanceFindings = finalProject.findings.filter(f => f.relevance <= 0.4).length;
|
|
626
|
+
|
|
627
|
+
// Check source diversity
|
|
628
|
+
const sourceDomains = new Set(
|
|
629
|
+
finalProject.sources.map(s => {
|
|
630
|
+
try {
|
|
631
|
+
return new URL(s.url).hostname;
|
|
632
|
+
} catch {
|
|
633
|
+
return 'unknown';
|
|
634
|
+
}
|
|
635
|
+
})
|
|
636
|
+
);
|
|
637
|
+
|
|
638
|
+
// Quality report
|
|
639
|
+
elizaLogger.info(`Research Quality Analysis:`);
|
|
640
|
+
elizaLogger.info(`- Keyword coverage: ${JSON.stringify(keywordCoverage)}`);
|
|
641
|
+
elizaLogger.info(`- Relevance distribution:`);
|
|
642
|
+
elizaLogger.info(` * High (>0.7): ${highRelevanceFindings}/${totalFindings}`);
|
|
643
|
+
elizaLogger.info(` * Medium (0.4-0.7): ${mediumRelevanceFindings}/${totalFindings}`);
|
|
644
|
+
elizaLogger.info(` * Low (<0.4): ${lowRelevanceFindings}/${totalFindings}`);
|
|
645
|
+
elizaLogger.info(`- Source diversity: ${sourceDomains.size} unique domains`);
|
|
646
|
+
elizaLogger.info(`- Average finding length: ${Math.round(finalProject.findings.reduce((sum, f) => sum + f.content.length, 0) / totalFindings)} chars`);
|
|
647
|
+
|
|
648
|
+
// Validate minimum quality standards
|
|
649
|
+
const keywordsCovered = Object.values(keywordCoverage).filter(count => count > 0).length;
|
|
650
|
+
if (keywordsCovered < 2) {
|
|
651
|
+
elizaLogger.warn(`Low keyword coverage: only ${keywordsCovered}/${expectedKeywords.length} keywords found`);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
if (highRelevanceFindings < totalFindings * 0.3) {
|
|
655
|
+
elizaLogger.warn(`Low proportion of high-relevance findings: ${highRelevanceFindings}/${totalFindings}`);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
elizaLogger.success('E2E Test Passed: Research Quality Assurance');
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Export all tests as a TestSuite for the ElizaOS test runner
|
|
662
|
+
export const researchE2ETests = [
|
|
663
|
+
{
|
|
664
|
+
name: 'Research Plugin E2E Tests - Real World Scenarios',
|
|
665
|
+
description: 'Comprehensive end-to-end tests simulating real-world research use cases',
|
|
666
|
+
tests: [
|
|
667
|
+
{
|
|
668
|
+
name: 'Code/Feature Research',
|
|
669
|
+
description: 'Simulates researching technical implementation details before building a feature',
|
|
670
|
+
fn: testCodeFeatureResearch
|
|
671
|
+
},
|
|
672
|
+
{
|
|
673
|
+
name: 'Person Research',
|
|
674
|
+
description: 'Tests researching information about a public figure',
|
|
675
|
+
fn: testPersonResearch
|
|
676
|
+
},
|
|
677
|
+
{
|
|
678
|
+
name: 'News/Current Events Research',
|
|
679
|
+
description: 'Tests researching latest news and current developments',
|
|
680
|
+
fn: testNewsResearch
|
|
681
|
+
},
|
|
682
|
+
{
|
|
683
|
+
name: 'Technical Documentation Research',
|
|
684
|
+
description: 'Tests researching technical documentation and best practices',
|
|
685
|
+
fn: testDocumentationResearch
|
|
686
|
+
},
|
|
687
|
+
{
|
|
688
|
+
name: 'Competitive Analysis',
|
|
689
|
+
description: 'Tests researching competitors and market analysis',
|
|
690
|
+
fn: testCompetitiveAnalysis
|
|
691
|
+
},
|
|
692
|
+
{
|
|
693
|
+
name: 'DeFi Research Integration',
|
|
694
|
+
description: 'Tests DeFi-specific research scenarios',
|
|
695
|
+
fn: testDeFiResearch
|
|
696
|
+
},
|
|
697
|
+
{
|
|
698
|
+
name: 'Research Quality Assurance',
|
|
699
|
+
description: 'Tests research quality, relevance, and comprehensive coverage',
|
|
700
|
+
fn: testResearchQualityAssurance
|
|
701
|
+
}
|
|
702
|
+
]
|
|
703
|
+
}
|
|
704
|
+
];
|