@elizaos/plugin-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +400 -0
- package/dist/index.cjs +9366 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +9284 -0
- package/dist/index.js.map +1 -0
- package/package.json +80 -0
- package/src/__tests__/action-chaining.test.ts +532 -0
- package/src/__tests__/actions.test.ts +118 -0
- package/src/__tests__/cache-rate-limiter.test.ts +303 -0
- package/src/__tests__/content-extractors.test.ts +26 -0
- package/src/__tests__/deepresearch-bench-integration.test.ts +520 -0
- package/src/__tests__/deepresearch-bench-simplified.e2e.test.ts +290 -0
- package/src/__tests__/deepresearch-bench.e2e.test.ts +376 -0
- package/src/__tests__/e2e.test.ts +1870 -0
- package/src/__tests__/multi-benchmark-runner.ts +427 -0
- package/src/__tests__/providers.test.ts +156 -0
- package/src/__tests__/real-world.e2e.test.ts +788 -0
- package/src/__tests__/research-scenarios.test.ts +755 -0
- package/src/__tests__/research.e2e.test.ts +704 -0
- package/src/__tests__/research.test.ts +174 -0
- package/src/__tests__/search-providers.test.ts +174 -0
- package/src/__tests__/single-benchmark-runner.ts +735 -0
- package/src/__tests__/test-search-providers.ts +171 -0
- package/src/__tests__/verify-apis.test.ts +82 -0
- package/src/actions.ts +1677 -0
- package/src/benchmark/deepresearch-benchmark.ts +369 -0
- package/src/evaluation/research-evaluator.ts +444 -0
- package/src/examples/api-integration.md +498 -0
- package/src/examples/browserbase-integration.md +132 -0
- package/src/examples/debug-research-query.ts +162 -0
- package/src/examples/defi-code-scenarios.md +536 -0
- package/src/examples/defi-implementation-guide.md +454 -0
- package/src/examples/eliza-research-example.ts +142 -0
- package/src/examples/fix-renewable-energy-research.ts +209 -0
- package/src/examples/research-scenarios.md +408 -0
- package/src/examples/run-complete-renewable-research.ts +303 -0
- package/src/examples/run-deep-research.ts +352 -0
- package/src/examples/run-logged-research.ts +304 -0
- package/src/examples/run-real-research.ts +151 -0
- package/src/examples/save-research-output.ts +133 -0
- package/src/examples/test-file-logging.ts +199 -0
- package/src/examples/test-real-research.ts +67 -0
- package/src/examples/test-renewable-energy-research.ts +229 -0
- package/src/index.ts +28 -0
- package/src/integrations/cache.ts +128 -0
- package/src/integrations/content-extractors/firecrawl.ts +314 -0
- package/src/integrations/content-extractors/pdf-extractor.ts +350 -0
- package/src/integrations/content-extractors/playwright.ts +420 -0
- package/src/integrations/factory.ts +419 -0
- package/src/integrations/index.ts +18 -0
- package/src/integrations/rate-limiter.ts +181 -0
- package/src/integrations/search-providers/academic.ts +290 -0
- package/src/integrations/search-providers/exa.ts +205 -0
- package/src/integrations/search-providers/npm.ts +330 -0
- package/src/integrations/search-providers/pypi.ts +211 -0
- package/src/integrations/search-providers/serpapi.ts +277 -0
- package/src/integrations/search-providers/serper.ts +358 -0
- package/src/integrations/search-providers/stagehand-google.ts +87 -0
- package/src/integrations/search-providers/tavily.ts +187 -0
- package/src/processing/relevance-analyzer.ts +353 -0
- package/src/processing/research-logger.ts +450 -0
- package/src/processing/result-processor.ts +372 -0
- package/src/prompts/research-prompts.ts +419 -0
- package/src/providers/cacheProvider.ts +164 -0
- package/src/providers.ts +173 -0
- package/src/service.ts +2588 -0
- package/src/services/swe-bench.ts +286 -0
- package/src/strategies/research-strategies.ts +790 -0
- package/src/types/pdf-parse.d.ts +34 -0
- package/src/types.ts +551 -0
- package/src/verification/claim-verifier.ts +443 -0
|
@@ -0,0 +1,788 @@
|
|
|
1
|
+
import { IAgentRuntime, Memory, elizaLogger } from '@elizaos/core';
|
|
2
|
+
import { ResearchService } from '../service';
|
|
3
|
+
import { ResearchProject, ResearchStatus, ResearchPhase } from '../types';
|
|
4
|
+
|
|
5
|
+
// Helper to create a realistic test memory object
|
|
6
|
+
function createTestMemory(text: string): Memory {
|
|
7
|
+
return {
|
|
8
|
+
id: `${Date.now()}-${Math.random().toString(36).substr(2, 9)}` as `${string}-${string}-${string}-${string}-${string}`,
|
|
9
|
+
entityId:
|
|
10
|
+
'00000000-0000-0000-0000-000000000001' as `${string}-${string}-${string}-${string}-${string}`,
|
|
11
|
+
roomId:
|
|
12
|
+
'00000000-0000-0000-0000-000000000002' as `${string}-${string}-${string}-${string}-${string}`,
|
|
13
|
+
content: { text },
|
|
14
|
+
createdAt: Date.now(),
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Helper to wait and monitor research progress
|
|
19
|
+
async function monitorResearch(
|
|
20
|
+
service: ResearchService,
|
|
21
|
+
projectId: string,
|
|
22
|
+
options: {
|
|
23
|
+
timeout?: number;
|
|
24
|
+
checkInterval?: number;
|
|
25
|
+
onProgress?: (project: ResearchProject) => void;
|
|
26
|
+
} = {}
|
|
27
|
+
): Promise<ResearchProject | null> {
|
|
28
|
+
const {
|
|
29
|
+
timeout = 180000, // 3 minutes default
|
|
30
|
+
checkInterval = 5000,
|
|
31
|
+
onProgress,
|
|
32
|
+
} = options;
|
|
33
|
+
|
|
34
|
+
const startTime = Date.now();
|
|
35
|
+
let lastPhase: ResearchPhase | null = null;
|
|
36
|
+
|
|
37
|
+
while (Date.now() - startTime < timeout) {
|
|
38
|
+
const project = await service.getProject(projectId);
|
|
39
|
+
if (!project) return null;
|
|
40
|
+
|
|
41
|
+
// Log phase changes
|
|
42
|
+
if (project.phase !== lastPhase) {
|
|
43
|
+
elizaLogger.info(`Research phase: ${lastPhase || 'START'} â ${project.phase}`);
|
|
44
|
+
lastPhase = project.phase;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Call progress callback
|
|
48
|
+
if (onProgress) {
|
|
49
|
+
onProgress(project);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Check completion
|
|
53
|
+
if (project.status === ResearchStatus.COMPLETED || project.status === ResearchStatus.FAILED) {
|
|
54
|
+
return project;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Return whatever we have after timeout
|
|
61
|
+
const finalProject = await service.getProject(projectId);
|
|
62
|
+
return finalProject || null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Test 1: Research for Building a New Feature (Real Developer Workflow)
|
|
66
|
+
export async function testFeatureDevelopmentResearch(runtime: IAgentRuntime): Promise<void> {
|
|
67
|
+
elizaLogger.info('đ¨ Starting Real-World Test: Feature Development Research');
|
|
68
|
+
|
|
69
|
+
const service = runtime.getService<ResearchService>('research');
|
|
70
|
+
if (!service) throw new Error('Research service not available');
|
|
71
|
+
|
|
72
|
+
// Scenario: Developer needs to implement WebSocket real-time features
|
|
73
|
+
const queries = [
|
|
74
|
+
'WebSocket implementation Node.js TypeScript scaling best practices 2024',
|
|
75
|
+
'Socket.io vs native WebSocket performance comparison production',
|
|
76
|
+
'WebSocket authentication JWT security implementation examples',
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
elizaLogger.info('Researching WebSocket implementation across multiple aspects...');
|
|
80
|
+
|
|
81
|
+
const projects = await Promise.all(
|
|
82
|
+
queries.map((query, index) =>
|
|
83
|
+
service.createResearchProject(query, {
|
|
84
|
+
maxSearchResults: 3,
|
|
85
|
+
metadata: {
|
|
86
|
+
aspect: ['implementation', 'comparison', 'security'][index],
|
|
87
|
+
featureType: 'websocket',
|
|
88
|
+
},
|
|
89
|
+
})
|
|
90
|
+
)
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
// Monitor all projects
|
|
94
|
+
const results = await Promise.all(
|
|
95
|
+
projects.map((project) =>
|
|
96
|
+
monitorResearch(service, project.id, {
|
|
97
|
+
timeout: 120000,
|
|
98
|
+
onProgress: (p) => {
|
|
99
|
+
if (p.findings.length > 0 && p.findings.length % 3 === 0) {
|
|
100
|
+
elizaLogger.info(
|
|
101
|
+
`Project ${project.query.substring(0, 30)}... has ${p.findings.length} findings`
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
})
|
|
106
|
+
)
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
// Analyze combined results
|
|
110
|
+
const allFindings = results.flatMap((r) => r?.findings || []);
|
|
111
|
+
const allSources = results.flatMap((r) => r?.sources || []);
|
|
112
|
+
|
|
113
|
+
// Check for implementation details
|
|
114
|
+
const hasImplementationDetails = allFindings.some((f) => {
|
|
115
|
+
const content = f.content.toLowerCase();
|
|
116
|
+
return (
|
|
117
|
+
content.includes('const') ||
|
|
118
|
+
content.includes('server') ||
|
|
119
|
+
content.includes('client') ||
|
|
120
|
+
content.includes('connection')
|
|
121
|
+
);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
// Check for security considerations
|
|
125
|
+
const hasSecurityInfo = allFindings.some((f) => {
|
|
126
|
+
const content = f.content.toLowerCase();
|
|
127
|
+
return (
|
|
128
|
+
content.includes('auth') ||
|
|
129
|
+
content.includes('security') ||
|
|
130
|
+
content.includes('jwt') ||
|
|
131
|
+
content.includes('cors')
|
|
132
|
+
);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// Check for performance insights
|
|
136
|
+
const hasPerformanceInfo = allFindings.some((f) => {
|
|
137
|
+
const content = f.content.toLowerCase();
|
|
138
|
+
return (
|
|
139
|
+
content.includes('performance') ||
|
|
140
|
+
content.includes('scaling') ||
|
|
141
|
+
content.includes('benchmark') ||
|
|
142
|
+
content.includes('latency')
|
|
143
|
+
);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// Find Stack Overflow or GitHub sources (developer favorites)
|
|
147
|
+
const devSources = allSources.filter(
|
|
148
|
+
(s) =>
|
|
149
|
+
s.url.includes('stackoverflow.com') ||
|
|
150
|
+
s.url.includes('github.com') ||
|
|
151
|
+
s.url.includes('dev.to') ||
|
|
152
|
+
s.url.includes('medium.com')
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
elizaLogger.info('đ Feature Development Research Results:');
|
|
156
|
+
elizaLogger.info(`- Total findings across aspects: ${allFindings.length}`);
|
|
157
|
+
elizaLogger.info(`- Implementation details found: ${hasImplementationDetails}`);
|
|
158
|
+
elizaLogger.info(`- Security considerations found: ${hasSecurityInfo}`);
|
|
159
|
+
elizaLogger.info(`- Performance insights found: ${hasPerformanceInfo}`);
|
|
160
|
+
elizaLogger.info(`- Developer-focused sources: ${devSources.length}/${allSources.length}`);
|
|
161
|
+
|
|
162
|
+
if (devSources.length > 0) {
|
|
163
|
+
elizaLogger.info(`- Sample dev source: ${devSources[0].title}`);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Simulate decision-making based on research
|
|
167
|
+
if (hasImplementationDetails && hasSecurityInfo && hasPerformanceInfo) {
|
|
168
|
+
elizaLogger.success(
|
|
169
|
+
'â
Research provides comprehensive information for feature implementation'
|
|
170
|
+
);
|
|
171
|
+
} else {
|
|
172
|
+
elizaLogger.warn('â ī¸ Some aspects missing - may need additional research');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
elizaLogger.success('â
Real-World Test Passed: Feature Development Research');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Test 2: Research a Person for Hiring/Partnership (Real HR/Business Workflow)
|
|
179
|
+
export async function testPersonBackgroundResearch(runtime: IAgentRuntime): Promise<void> {
|
|
180
|
+
elizaLogger.info('đ¤ Starting Real-World Test: Person Background Research');
|
|
181
|
+
|
|
182
|
+
const service = runtime.getService<ResearchService>('research');
|
|
183
|
+
if (!service) throw new Error('Research service not available');
|
|
184
|
+
|
|
185
|
+
// Scenario: Researching a potential technical advisor or hire
|
|
186
|
+
const personQuery = 'Andrej Karpathy AI research contributions Tesla OpenAI recent projects 2024';
|
|
187
|
+
|
|
188
|
+
const project = await service.createResearchProject(personQuery, {
|
|
189
|
+
maxSearchResults: 5,
|
|
190
|
+
metadata: {
|
|
191
|
+
researchType: 'person_background',
|
|
192
|
+
purpose: 'professional_evaluation',
|
|
193
|
+
},
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
elizaLogger.info('Researching professional background...');
|
|
197
|
+
|
|
198
|
+
const result = await monitorResearch(service, project.id, {
|
|
199
|
+
timeout: 150000,
|
|
200
|
+
onProgress: (p) => {
|
|
201
|
+
// Track what types of sources we're finding
|
|
202
|
+
const sources = p.sources.map((s) => new URL(s.url).hostname);
|
|
203
|
+
const hasLinkedIn = sources.some((s) => s.includes('linkedin'));
|
|
204
|
+
const hasTwitter = sources.some((s) => s.includes('twitter') || s.includes('x.com'));
|
|
205
|
+
const hasGitHub = sources.some((s) => s.includes('github'));
|
|
206
|
+
|
|
207
|
+
if ((hasLinkedIn || hasTwitter || hasGitHub) && p.phase === ResearchPhase.SEARCHING) {
|
|
208
|
+
elizaLogger.info(
|
|
209
|
+
`Found professional profiles: LinkedIn=${hasLinkedIn}, Twitter=${hasTwitter}, GitHub=${hasGitHub}`
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
},
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
if (!result) throw new Error('Person research failed to complete');
|
|
216
|
+
|
|
217
|
+
// Analyze findings for key information
|
|
218
|
+
const findings = result.findings;
|
|
219
|
+
|
|
220
|
+
// Professional history
|
|
221
|
+
const hasTeslaInfo = findings.some((f) => f.content.toLowerCase().includes('tesla'));
|
|
222
|
+
const hasOpenAIInfo = findings.some((f) => f.content.toLowerCase().includes('openai'));
|
|
223
|
+
const hasEducation = findings.some(
|
|
224
|
+
(f) =>
|
|
225
|
+
f.content.toLowerCase().includes('stanford') ||
|
|
226
|
+
f.content.toLowerCase().includes('phd') ||
|
|
227
|
+
f.content.toLowerCase().includes('university')
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
// Recent activities
|
|
231
|
+
const hasRecentActivity = findings.some((f) => {
|
|
232
|
+
const content = f.content;
|
|
233
|
+
return (
|
|
234
|
+
content.includes('2024') ||
|
|
235
|
+
content.includes('2023') ||
|
|
236
|
+
content.toLowerCase().includes('recent') ||
|
|
237
|
+
content.toLowerCase().includes('latest')
|
|
238
|
+
);
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
// Technical contributions
|
|
242
|
+
const hasTechnicalWork = findings.some((f) => {
|
|
243
|
+
const content = f.content.toLowerCase();
|
|
244
|
+
return (
|
|
245
|
+
content.includes('paper') ||
|
|
246
|
+
content.includes('research') ||
|
|
247
|
+
content.includes('model') ||
|
|
248
|
+
content.includes('algorithm') ||
|
|
249
|
+
content.includes('course')
|
|
250
|
+
);
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
// Extract key achievements
|
|
254
|
+
const achievements = findings.filter((f) => {
|
|
255
|
+
const content = f.content.toLowerCase();
|
|
256
|
+
return (
|
|
257
|
+
content.includes('founded') ||
|
|
258
|
+
content.includes('created') ||
|
|
259
|
+
content.includes('developed') ||
|
|
260
|
+
content.includes('led') ||
|
|
261
|
+
content.includes('published')
|
|
262
|
+
);
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
elizaLogger.info('đ Person Background Research Results:');
|
|
266
|
+
elizaLogger.info(
|
|
267
|
+
`- Professional history coverage: Tesla=${hasTeslaInfo}, OpenAI=${hasOpenAIInfo}`
|
|
268
|
+
);
|
|
269
|
+
elizaLogger.info(`- Education info found: ${hasEducation}`);
|
|
270
|
+
elizaLogger.info(`- Recent activity (2023-2024): ${hasRecentActivity}`);
|
|
271
|
+
elizaLogger.info(`- Technical contributions: ${hasTechnicalWork}`);
|
|
272
|
+
elizaLogger.info(`- Key achievements identified: ${achievements.length}`);
|
|
273
|
+
|
|
274
|
+
if (achievements.length > 0) {
|
|
275
|
+
const sample = achievements[0].content.substring(0, 150);
|
|
276
|
+
elizaLogger.info(`- Sample achievement: "${sample}..."`);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Professional assessment
|
|
280
|
+
const professionalScore = [
|
|
281
|
+
hasTeslaInfo,
|
|
282
|
+
hasOpenAIInfo,
|
|
283
|
+
hasEducation,
|
|
284
|
+
hasRecentActivity,
|
|
285
|
+
hasTechnicalWork,
|
|
286
|
+
].filter(Boolean).length;
|
|
287
|
+
|
|
288
|
+
if (professionalScore >= 4) {
|
|
289
|
+
elizaLogger.success('â
Comprehensive professional profile assembled');
|
|
290
|
+
} else if (professionalScore >= 2) {
|
|
291
|
+
elizaLogger.info('âšī¸ Partial professional profile - may need additional sources');
|
|
292
|
+
} else {
|
|
293
|
+
elizaLogger.warn('â ī¸ Limited professional information found');
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
elizaLogger.success('â
Real-World Test Passed: Person Background Research');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Test 3: Breaking News Research (Real Journalist/Analyst Workflow)
|
|
300
|
+
export async function testBreakingNewsResearch(runtime: IAgentRuntime): Promise<void> {
|
|
301
|
+
elizaLogger.info('đ° Starting Real-World Test: Breaking News Research');
|
|
302
|
+
|
|
303
|
+
const service = runtime.getService<ResearchService>('research');
|
|
304
|
+
if (!service) throw new Error('Research service not available');
|
|
305
|
+
|
|
306
|
+
// Scenario: Researching breaking AI news
|
|
307
|
+
const newsQuery =
|
|
308
|
+
'AI artificial intelligence news today latest announcements breakthroughs December 2024';
|
|
309
|
+
|
|
310
|
+
const project = await service.createResearchProject(newsQuery, {
|
|
311
|
+
maxSearchResults: 6,
|
|
312
|
+
metadata: {
|
|
313
|
+
researchType: 'breaking_news',
|
|
314
|
+
timeframe: 'current',
|
|
315
|
+
industry: 'AI/ML',
|
|
316
|
+
},
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
elizaLogger.info('Scanning for breaking AI news...');
|
|
320
|
+
|
|
321
|
+
const result = await monitorResearch(service, project.id, {
|
|
322
|
+
timeout: 150000,
|
|
323
|
+
checkInterval: 3000,
|
|
324
|
+
onProgress: (p) => {
|
|
325
|
+
// Track news sources as they're found
|
|
326
|
+
if (p.phase === ResearchPhase.SEARCHING && p.sources.length > 0) {
|
|
327
|
+
const newsSources = p.sources.filter((s) => {
|
|
328
|
+
const url = s.url.toLowerCase();
|
|
329
|
+
return (
|
|
330
|
+
url.includes('news') ||
|
|
331
|
+
url.includes('article') ||
|
|
332
|
+
url.includes('press') ||
|
|
333
|
+
url.includes('announcement')
|
|
334
|
+
);
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
if (newsSources.length > 0) {
|
|
338
|
+
elizaLogger.info(`Found ${newsSources.length} news sources`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
},
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
if (!result) throw new Error('News research failed to complete');
|
|
345
|
+
|
|
346
|
+
// Analyze news findings
|
|
347
|
+
const findings = result.findings;
|
|
348
|
+
const sources = result.sources;
|
|
349
|
+
|
|
350
|
+
// Identify news sources by domain
|
|
351
|
+
const newsSourceTypes = {
|
|
352
|
+
mainstream: sources.filter(
|
|
353
|
+
(s) =>
|
|
354
|
+
s.url.includes('reuters.com') ||
|
|
355
|
+
s.url.includes('bloomberg.com') ||
|
|
356
|
+
s.url.includes('wsj.com') ||
|
|
357
|
+
s.url.includes('nytimes.com')
|
|
358
|
+
),
|
|
359
|
+
tech: sources.filter(
|
|
360
|
+
(s) =>
|
|
361
|
+
s.url.includes('techcrunch.com') ||
|
|
362
|
+
s.url.includes('theverge.com') ||
|
|
363
|
+
s.url.includes('arstechnica.com') ||
|
|
364
|
+
s.url.includes('wired.com')
|
|
365
|
+
),
|
|
366
|
+
ai_specific: sources.filter(
|
|
367
|
+
(s) =>
|
|
368
|
+
s.url.includes('openai.com') ||
|
|
369
|
+
s.url.includes('anthropic.com') ||
|
|
370
|
+
s.url.includes('deepmind.com') ||
|
|
371
|
+
s.url.includes('ai.')
|
|
372
|
+
),
|
|
373
|
+
social: sources.filter(
|
|
374
|
+
(s) =>
|
|
375
|
+
s.url.includes('twitter.com') || s.url.includes('x.com') || s.url.includes('reddit.com')
|
|
376
|
+
),
|
|
377
|
+
};
|
|
378
|
+
|
|
379
|
+
// Check for time-sensitive content
|
|
380
|
+
const currentMonth = new Date().toLocaleDateString('en-US', { month: 'long' });
|
|
381
|
+
const currentYear = new Date().getFullYear();
|
|
382
|
+
|
|
383
|
+
const hasCurrentMonth = findings.some((f) => f.content.includes(currentMonth));
|
|
384
|
+
const hasCurrentYear = findings.some((f) => f.content.includes(currentYear.toString()));
|
|
385
|
+
const hasTimeWords = findings.some((f) => {
|
|
386
|
+
const content = f.content.toLowerCase();
|
|
387
|
+
return (
|
|
388
|
+
content.includes('today') ||
|
|
389
|
+
content.includes('yesterday') ||
|
|
390
|
+
content.includes('this week') ||
|
|
391
|
+
content.includes('announced') ||
|
|
392
|
+
content.includes('just')
|
|
393
|
+
);
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
// Identify major announcements
|
|
397
|
+
const announcements = findings.filter((f) => {
|
|
398
|
+
const content = f.content.toLowerCase();
|
|
399
|
+
return (
|
|
400
|
+
content.includes('announc') ||
|
|
401
|
+
content.includes('launch') ||
|
|
402
|
+
content.includes('releas') ||
|
|
403
|
+
content.includes('unveil') ||
|
|
404
|
+
content.includes('introduc')
|
|
405
|
+
);
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
// Extract companies/organizations mentioned
|
|
409
|
+
const companies = ['OpenAI', 'Google', 'Microsoft', 'Anthropic', 'Meta', 'Amazon', 'Apple'];
|
|
410
|
+
const companyMentions: Record<string, number> = {};
|
|
411
|
+
|
|
412
|
+
companies.forEach((company) => {
|
|
413
|
+
companyMentions[company] = findings.filter((f) => f.content.includes(company)).length;
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
elizaLogger.info('đ Breaking News Research Results:');
|
|
417
|
+
elizaLogger.info(`- Source distribution:`);
|
|
418
|
+
elizaLogger.info(` * Mainstream media: ${newsSourceTypes.mainstream.length}`);
|
|
419
|
+
elizaLogger.info(` * Tech media: ${newsSourceTypes.tech.length}`);
|
|
420
|
+
elizaLogger.info(` * AI companies: ${newsSourceTypes.ai_specific.length}`);
|
|
421
|
+
elizaLogger.info(` * Social media: ${newsSourceTypes.social.length}`);
|
|
422
|
+
elizaLogger.info(`- Timeliness indicators:`);
|
|
423
|
+
elizaLogger.info(` * Current month mentioned: ${hasCurrentMonth}`);
|
|
424
|
+
elizaLogger.info(` * Current year mentioned: ${hasCurrentYear}`);
|
|
425
|
+
elizaLogger.info(` * Time-sensitive words: ${hasTimeWords}`);
|
|
426
|
+
elizaLogger.info(`- Announcements found: ${announcements.length}`);
|
|
427
|
+
elizaLogger.info(
|
|
428
|
+
`- Company mentions: ${Object.entries(companyMentions)
|
|
429
|
+
.filter(([_, count]) => count > 0)
|
|
430
|
+
.map(([company, count]) => `${company}=${count}`)
|
|
431
|
+
.join(', ')}`
|
|
432
|
+
);
|
|
433
|
+
|
|
434
|
+
if (announcements.length > 0) {
|
|
435
|
+
const latestAnnouncement = announcements[0].content.substring(0, 200);
|
|
436
|
+
elizaLogger.info(`- Latest announcement: "${latestAnnouncement}..."`);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// News quality assessment
|
|
440
|
+
const hasRecentNews = hasCurrentMonth || hasTimeWords;
|
|
441
|
+
const hasDiverseSources =
|
|
442
|
+
Object.values(newsSourceTypes).filter((arr) => arr.length > 0).length >= 2;
|
|
443
|
+
const hasAnnouncements = announcements.length > 0;
|
|
444
|
+
|
|
445
|
+
if (hasRecentNews && hasDiverseSources && hasAnnouncements) {
|
|
446
|
+
elizaLogger.success('â
High-quality breaking news coverage achieved');
|
|
447
|
+
} else {
|
|
448
|
+
elizaLogger.info(
|
|
449
|
+
`âšī¸ News coverage: Recent=${hasRecentNews}, Diverse=${hasDiverseSources}, Announcements=${hasAnnouncements}`
|
|
450
|
+
);
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
elizaLogger.success('â
Real-World Test Passed: Breaking News Research');
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Test 4: Market/Competitive Intelligence (Real Business Strategy Workflow)
|
|
457
|
+
export async function testMarketIntelligenceResearch(runtime: IAgentRuntime): Promise<void> {
|
|
458
|
+
elizaLogger.info('đ Starting Real-World Test: Market Intelligence Research');
|
|
459
|
+
|
|
460
|
+
const service = runtime.getService<ResearchService>('research');
|
|
461
|
+
if (!service) throw new Error('Research service not available');
|
|
462
|
+
|
|
463
|
+
// Scenario: Analyzing the AI agent framework market
|
|
464
|
+
const marketQuery =
|
|
465
|
+
'AI agent frameworks market analysis 2024 LangChain AutoGPT CrewAI pricing features comparison adoption';
|
|
466
|
+
|
|
467
|
+
const project = await service.createResearchProject(marketQuery, {
|
|
468
|
+
maxSearchResults: 5,
|
|
469
|
+
metadata: {
|
|
470
|
+
researchType: 'market_intelligence',
|
|
471
|
+
competitors: ['LangChain', 'AutoGPT', 'CrewAI'],
|
|
472
|
+
analysisType: 'competitive',
|
|
473
|
+
},
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
elizaLogger.info('Conducting market intelligence analysis...');
|
|
477
|
+
|
|
478
|
+
const result = await monitorResearch(service, project.id, {
|
|
479
|
+
timeout: 180000,
|
|
480
|
+
onProgress: (p) => {
|
|
481
|
+
if (p.phase === ResearchPhase.ANALYZING && p.findings.length > 5) {
|
|
482
|
+
elizaLogger.info(`Analyzing ${p.findings.length} market data points...`);
|
|
483
|
+
}
|
|
484
|
+
},
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
if (!result) throw new Error('Market research failed to complete');
|
|
488
|
+
|
|
489
|
+
// Market analysis
|
|
490
|
+
const findings = result.findings;
|
|
491
|
+
const competitors = ['LangChain', 'AutoGPT', 'CrewAI', 'BabyAGI', 'AgentGPT'];
|
|
492
|
+
|
|
493
|
+
// Competitor analysis
|
|
494
|
+
const competitorData: Record<
|
|
495
|
+
string,
|
|
496
|
+
{
|
|
497
|
+
mentions: number;
|
|
498
|
+
features: string[];
|
|
499
|
+
pricing: boolean;
|
|
500
|
+
adoption: boolean;
|
|
501
|
+
}
|
|
502
|
+
> = {};
|
|
503
|
+
|
|
504
|
+
competitors.forEach((competitor) => {
|
|
505
|
+
const competitorFindings = findings.filter((f) =>
|
|
506
|
+
f.content.toLowerCase().includes(competitor.toLowerCase())
|
|
507
|
+
);
|
|
508
|
+
|
|
509
|
+
const features: string[] = [];
|
|
510
|
+
let hasPricing = false;
|
|
511
|
+
let hasAdoption = false;
|
|
512
|
+
|
|
513
|
+
competitorFindings.forEach((f) => {
|
|
514
|
+
const content = f.content.toLowerCase();
|
|
515
|
+
|
|
516
|
+
// Extract features
|
|
517
|
+
if (content.includes('feature') || content.includes('capability')) {
|
|
518
|
+
if (content.includes('memory')) features.push('memory');
|
|
519
|
+
if (content.includes('tool') || content.includes('function')) features.push('tools');
|
|
520
|
+
if (content.includes('chain') || content.includes('workflow')) features.push('workflow');
|
|
521
|
+
if (content.includes('llm') || content.includes('model')) features.push('multi-llm');
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Check for pricing info
|
|
525
|
+
if (
|
|
526
|
+
content.includes('price') ||
|
|
527
|
+
content.includes('cost') ||
|
|
528
|
+
content.includes('free') ||
|
|
529
|
+
content.includes('$')
|
|
530
|
+
) {
|
|
531
|
+
hasPricing = true;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Check for adoption metrics
|
|
535
|
+
if (
|
|
536
|
+
content.includes('user') ||
|
|
537
|
+
content.includes('download') ||
|
|
538
|
+
content.includes('star') ||
|
|
539
|
+
content.includes('popular')
|
|
540
|
+
) {
|
|
541
|
+
hasAdoption = true;
|
|
542
|
+
}
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
competitorData[competitor] = {
|
|
546
|
+
mentions: competitorFindings.length,
|
|
547
|
+
features: [...new Set(features)],
|
|
548
|
+
pricing: hasPricing,
|
|
549
|
+
adoption: hasAdoption,
|
|
550
|
+
};
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
// Market trends
|
|
554
|
+
const trendKeywords = ['growth', 'trend', 'future', 'emerging', 'adoption', 'market size'];
|
|
555
|
+
const trendsFound = trendKeywords.filter((keyword) =>
|
|
556
|
+
findings.some((f) => f.content.toLowerCase().includes(keyword))
|
|
557
|
+
);
|
|
558
|
+
|
|
559
|
+
// Technical comparisons
|
|
560
|
+
const hasComparisons = findings.some((f) => {
|
|
561
|
+
const content = f.content.toLowerCase();
|
|
562
|
+
return (
|
|
563
|
+
content.includes('compar') ||
|
|
564
|
+
content.includes('versus') ||
|
|
565
|
+
content.includes('vs') ||
|
|
566
|
+
content.includes('better') ||
|
|
567
|
+
content.includes('advantage')
|
|
568
|
+
);
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
// Use cases and applications
|
|
572
|
+
const useCases = findings.filter((f) => {
|
|
573
|
+
const content = f.content.toLowerCase();
|
|
574
|
+
return (
|
|
575
|
+
content.includes('use case') ||
|
|
576
|
+
content.includes('application') ||
|
|
577
|
+
content.includes('example') ||
|
|
578
|
+
content.includes('implementation')
|
|
579
|
+
);
|
|
580
|
+
});
|
|
581
|
+
|
|
582
|
+
elizaLogger.info('đ Market Intelligence Results:');
|
|
583
|
+
elizaLogger.info('- Competitor Analysis:');
|
|
584
|
+
Object.entries(competitorData).forEach(([competitor, data]) => {
|
|
585
|
+
if (data.mentions > 0) {
|
|
586
|
+
elizaLogger.info(
|
|
587
|
+
` * ${competitor}: ${data.mentions} mentions, features=[${data.features.join(',')}], pricing=${data.pricing}, adoption=${data.adoption}`
|
|
588
|
+
);
|
|
589
|
+
}
|
|
590
|
+
});
|
|
591
|
+
elizaLogger.info(`- Market trends identified: ${trendsFound.join(', ')}`);
|
|
592
|
+
elizaLogger.info(`- Comparative analysis found: ${hasComparisons}`);
|
|
593
|
+
elizaLogger.info(`- Use cases documented: ${useCases.length}`);
|
|
594
|
+
|
|
595
|
+
// Strategic insights
|
|
596
|
+
const wellCoveredCompetitors = Object.entries(competitorData)
|
|
597
|
+
.filter(([_, data]) => data.mentions >= 2)
|
|
598
|
+
.map(([name, _]) => name);
|
|
599
|
+
|
|
600
|
+
const hasComprehensiveData =
|
|
601
|
+
wellCoveredCompetitors.length >= 2 && hasComparisons && useCases.length > 0;
|
|
602
|
+
|
|
603
|
+
if (hasComprehensiveData) {
|
|
604
|
+
elizaLogger.success('â
Comprehensive market intelligence gathered');
|
|
605
|
+
elizaLogger.info(`Key competitors analyzed: ${wellCoveredCompetitors.join(', ')}`);
|
|
606
|
+
} else {
|
|
607
|
+
elizaLogger.info('âšī¸ Partial market intelligence - consider additional research');
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
elizaLogger.success('â
Real-World Test Passed: Market Intelligence Research');
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// Test 5: Technical Problem Solving Research (Real Developer Debug Workflow)
|
|
614
|
+
export async function testProblemSolvingResearch(runtime: IAgentRuntime): Promise<void> {
|
|
615
|
+
elizaLogger.info('đ§ Starting Real-World Test: Technical Problem Solving Research');
|
|
616
|
+
|
|
617
|
+
const service = runtime.getService<ResearchService>('research');
|
|
618
|
+
if (!service) throw new Error('Research service not available');
|
|
619
|
+
|
|
620
|
+
// Scenario: Debugging a complex technical issue
|
|
621
|
+
const problemQuery =
|
|
622
|
+
'TypeError cannot read property undefined JavaScript async await Promise debugging stack trace fix';
|
|
623
|
+
|
|
624
|
+
const project = await service.createResearchProject(problemQuery, {
|
|
625
|
+
maxSearchResults: 4,
|
|
626
|
+
metadata: {
|
|
627
|
+
researchType: 'debugging',
|
|
628
|
+
problemType: 'runtime_error',
|
|
629
|
+
technology: 'JavaScript',
|
|
630
|
+
},
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
elizaLogger.info('Researching technical problem solutions...');
|
|
634
|
+
|
|
635
|
+
const result = await monitorResearch(service, project.id, {
|
|
636
|
+
timeout: 120000,
|
|
637
|
+
onProgress: (p) => {
|
|
638
|
+
// Look for Stack Overflow as it appears
|
|
639
|
+
const hasStackOverflow = p.sources.some((s) => s.url.includes('stackoverflow.com'));
|
|
640
|
+
if (hasStackOverflow && p.sources.length === 1) {
|
|
641
|
+
elizaLogger.info('Found Stack Overflow - good sign for debugging!');
|
|
642
|
+
}
|
|
643
|
+
},
|
|
644
|
+
});
|
|
645
|
+
|
|
646
|
+
if (!result) throw new Error('Problem solving research failed to complete');
|
|
647
|
+
|
|
648
|
+
// Analyze debugging findings
|
|
649
|
+
const findings = result.findings;
|
|
650
|
+
const sources = result.sources;
|
|
651
|
+
|
|
652
|
+
// Categorize sources
|
|
653
|
+
const debuggingSources = {
|
|
654
|
+
stackoverflow: sources.filter((s) => s.url.includes('stackoverflow.com')),
|
|
655
|
+
github: sources.filter((s) => s.url.includes('github.com')),
|
|
656
|
+
documentation: sources.filter(
|
|
657
|
+
(s) =>
|
|
658
|
+
s.url.includes('developer.mozilla.org') ||
|
|
659
|
+
s.url.includes('javascript.info') ||
|
|
660
|
+
s.url.includes('docs.')
|
|
661
|
+
),
|
|
662
|
+
blogs: sources.filter(
|
|
663
|
+
(s) => s.url.includes('blog') || s.url.includes('medium.com') || s.url.includes('dev.to')
|
|
664
|
+
),
|
|
665
|
+
};
|
|
666
|
+
|
|
667
|
+
// Look for solutions
|
|
668
|
+
const hasSolutions = findings.filter((f) => {
|
|
669
|
+
const content = f.content.toLowerCase();
|
|
670
|
+
return (
|
|
671
|
+
content.includes('solution') ||
|
|
672
|
+
content.includes('fix') ||
|
|
673
|
+
content.includes('resolve') ||
|
|
674
|
+
content.includes('solved') ||
|
|
675
|
+
content.includes('work')
|
|
676
|
+
);
|
|
677
|
+
});
|
|
678
|
+
|
|
679
|
+
// Look for code examples
|
|
680
|
+
const hasCodeExamples = findings.filter((f) => {
|
|
681
|
+
const content = f.content;
|
|
682
|
+
return (
|
|
683
|
+
content.includes('```') ||
|
|
684
|
+
content.includes('const ') ||
|
|
685
|
+
content.includes('let ') ||
|
|
686
|
+
content.includes('function') ||
|
|
687
|
+
content.includes('async ') ||
|
|
688
|
+
content.includes('await ') ||
|
|
689
|
+
content.includes('try') ||
|
|
690
|
+
content.includes('catch')
|
|
691
|
+
);
|
|
692
|
+
});
|
|
693
|
+
|
|
694
|
+
// Look for explanations
|
|
695
|
+
const hasExplanations = findings.filter((f) => {
|
|
696
|
+
const content = f.content.toLowerCase();
|
|
697
|
+
return (
|
|
698
|
+
content.includes('because') ||
|
|
699
|
+
content.includes('reason') ||
|
|
700
|
+
content.includes('cause') ||
|
|
701
|
+
content.includes('happen') ||
|
|
702
|
+
content.includes('occur')
|
|
703
|
+
);
|
|
704
|
+
});
|
|
705
|
+
|
|
706
|
+
// Check for similar issues
|
|
707
|
+
const similarIssues = findings.filter((f) => {
|
|
708
|
+
const content = f.content.toLowerCase();
|
|
709
|
+
return (
|
|
710
|
+
content.includes('similar') ||
|
|
711
|
+
content.includes('same error') ||
|
|
712
|
+
content.includes('same issue') ||
|
|
713
|
+
content.includes('also')
|
|
714
|
+
);
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
elizaLogger.info('đ Problem Solving Research Results:');
|
|
718
|
+
elizaLogger.info('- Source distribution:');
|
|
719
|
+
elizaLogger.info(` * Stack Overflow: ${debuggingSources.stackoverflow.length}`);
|
|
720
|
+
elizaLogger.info(` * GitHub Issues: ${debuggingSources.github.length}`);
|
|
721
|
+
elizaLogger.info(` * Documentation: ${debuggingSources.documentation.length}`);
|
|
722
|
+
elizaLogger.info(` * Technical Blogs: ${debuggingSources.blogs.length}`);
|
|
723
|
+
elizaLogger.info(`- Solutions found: ${hasSolutions.length}`);
|
|
724
|
+
elizaLogger.info(`- Code examples: ${hasCodeExamples.length}`);
|
|
725
|
+
elizaLogger.info(`- Explanations: ${hasExplanations.length}`);
|
|
726
|
+
elizaLogger.info(`- Similar issues: ${similarIssues.length}`);
|
|
727
|
+
|
|
728
|
+
// Extract a solution if found
|
|
729
|
+
if (hasSolutions.length > 0 && hasCodeExamples.length > 0) {
|
|
730
|
+
elizaLogger.success('â
Found solutions with code examples!');
|
|
731
|
+
|
|
732
|
+
// Find the most relevant solution
|
|
733
|
+
const bestSolution = hasSolutions.sort((a, b) => b.relevance - a.relevance)[0];
|
|
734
|
+
const preview = bestSolution.content.substring(0, 250);
|
|
735
|
+
elizaLogger.info(`Top solution preview: "${preview}..."`);
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
// Problem solving quality
|
|
739
|
+
const hasGoodSources =
|
|
740
|
+
debuggingSources.stackoverflow.length > 0 || debuggingSources.documentation.length > 0;
|
|
741
|
+
const hasGoodContent = hasSolutions.length > 0 && hasCodeExamples.length > 0;
|
|
742
|
+
const hasContext = hasExplanations.length > 0;
|
|
743
|
+
|
|
744
|
+
if (hasGoodSources && hasGoodContent && hasContext) {
|
|
745
|
+
elizaLogger.success('â
Comprehensive debugging information found');
|
|
746
|
+
} else {
|
|
747
|
+
elizaLogger.info(
|
|
748
|
+
`âšī¸ Debugging info: Sources=${hasGoodSources}, Solutions=${hasGoodContent}, Context=${hasContext}`
|
|
749
|
+
);
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
elizaLogger.success('â
Real-World Test Passed: Technical Problem Solving Research');
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// Export all tests as a TestSuite for the ElizaOS test runner
|
|
756
|
+
export const realWorldE2ETests = [
|
|
757
|
+
{
|
|
758
|
+
name: 'Real-World Research E2E Tests',
|
|
759
|
+
description: 'End-to-end tests simulating real-world research workflows',
|
|
760
|
+
tests: [
|
|
761
|
+
{
|
|
762
|
+
name: 'Feature Development Research',
|
|
763
|
+
description: 'Simulates a developer researching how to implement a new AI feature',
|
|
764
|
+
fn: testFeatureDevelopmentResearch,
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
name: 'Person Background Research',
|
|
768
|
+
description: 'Simulates researching professional background information',
|
|
769
|
+
fn: testPersonBackgroundResearch,
|
|
770
|
+
},
|
|
771
|
+
{
|
|
772
|
+
name: 'Breaking News Research',
|
|
773
|
+
description: 'Simulates journalist/analyst workflow for current events',
|
|
774
|
+
fn: testBreakingNewsResearch,
|
|
775
|
+
},
|
|
776
|
+
{
|
|
777
|
+
name: 'Market Intelligence Research',
|
|
778
|
+
description: 'Simulates business strategy and competitive analysis workflow',
|
|
779
|
+
fn: testMarketIntelligenceResearch,
|
|
780
|
+
},
|
|
781
|
+
{
|
|
782
|
+
name: 'Technical Problem Solving Research',
|
|
783
|
+
description: 'Simulates developer debugging and troubleshooting workflow',
|
|
784
|
+
fn: testProblemSolvingResearch,
|
|
785
|
+
},
|
|
786
|
+
],
|
|
787
|
+
},
|
|
788
|
+
];
|