elsabro 2.3.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +668 -20
  2. package/bin/install.js +0 -0
  3. package/flows/development-flow.json +452 -0
  4. package/flows/quick-flow.json +118 -0
  5. package/package.json +3 -2
  6. package/references/SYSTEM_INDEX.md +379 -5
  7. package/references/agent-marketplace.md +2274 -0
  8. package/references/agent-protocol.md +1126 -0
  9. package/references/ai-code-suggestions.md +2413 -0
  10. package/references/checkpointing.md +595 -0
  11. package/references/collaboration-patterns.md +851 -0
  12. package/references/collaborative-sessions.md +1081 -0
  13. package/references/configuration-management.md +1810 -0
  14. package/references/cost-tracking.md +1095 -0
  15. package/references/enterprise-sso.md +2001 -0
  16. package/references/error-contracts-v2.md +968 -0
  17. package/references/event-driven.md +1031 -0
  18. package/references/flow-orchestration.md +940 -0
  19. package/references/flow-visualization.md +1557 -0
  20. package/references/ide-integrations.md +3513 -0
  21. package/references/interrupt-system.md +681 -0
  22. package/references/kubernetes-deployment.md +3099 -0
  23. package/references/memory-system.md +683 -0
  24. package/references/mobile-companion.md +3236 -0
  25. package/references/multi-llm-providers.md +2494 -0
  26. package/references/multi-project-memory.md +1182 -0
  27. package/references/observability.md +793 -0
  28. package/references/output-schemas.md +858 -0
  29. package/references/performance-profiler.md +955 -0
  30. package/references/plugin-system.md +1526 -0
  31. package/references/prompt-management.md +292 -0
  32. package/references/sandbox-execution.md +303 -0
  33. package/references/security-system.md +1253 -0
  34. package/references/streaming.md +696 -0
  35. package/references/testing-framework.md +1151 -0
  36. package/references/time-travel.md +802 -0
  37. package/references/tool-registry.md +886 -0
  38. package/references/voice-commands.md +3296 -0
  39. package/templates/agent-marketplace-config.json +220 -0
  40. package/templates/agent-protocol-config.json +136 -0
  41. package/templates/ai-suggestions-config.json +100 -0
  42. package/templates/checkpoint-state.json +61 -0
  43. package/templates/collaboration-config.json +157 -0
  44. package/templates/collaborative-sessions-config.json +153 -0
  45. package/templates/configuration-config.json +245 -0
  46. package/templates/cost-tracking-config.json +148 -0
  47. package/templates/enterprise-sso-config.json +438 -0
  48. package/templates/events-config.json +148 -0
  49. package/templates/flow-visualization-config.json +196 -0
  50. package/templates/ide-integrations-config.json +442 -0
  51. package/templates/kubernetes-config.json +764 -0
  52. package/templates/memory-state.json +84 -0
  53. package/templates/mobile-companion-config.json +600 -0
  54. package/templates/multi-llm-config.json +544 -0
  55. package/templates/multi-project-memory-config.json +145 -0
  56. package/templates/observability-config.json +109 -0
  57. package/templates/performance-profiler-config.json +125 -0
  58. package/templates/plugin-config.json +170 -0
  59. package/templates/prompt-management-config.json +86 -0
  60. package/templates/sandbox-config.json +185 -0
  61. package/templates/schemas-config.json +65 -0
  62. package/templates/security-config.json +120 -0
  63. package/templates/streaming-config.json +72 -0
  64. package/templates/testing-config.json +81 -0
  65. package/templates/timetravel-config.json +62 -0
  66. package/templates/tool-registry-config.json +109 -0
  67. package/templates/voice-commands-config.json +658 -0
@@ -0,0 +1,1151 @@
1
+ # Testing Framework (v3.4)
2
+
3
+ Framework de testing para agentes AI con mocking, simulación, fixtures y assertions comportamentales.
4
+
5
+ ## Arquitectura
6
+
7
+ ```
8
+ ┌─────────────────────────────────────────────────────────────────────────┐
9
+ │ TESTING FRAMEWORK │
10
+ ├─────────────────────────────────────────────────────────────────────────┤
11
+ │ │
12
+ │ ┌─────────────────────────────────────────────────────────────────┐ │
13
+ │ │ TEST RUNNER │ │
14
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
15
+ │ │ │ Unit │ │ Integration │ │ E2E │ │ │
16
+ │ │ │ Tests │ │ Tests │ │ Tests │ │ │
17
+ │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
18
+ │ └─────────────────────────────────────────────────────────────────┘ │
19
+ │ │ │
20
+ │ ▼ │
21
+ │ ┌─────────────────────────────────────────────────────────────────┐ │
22
+ │ │ MOCK PROVIDERS │ │
23
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
24
+ │ │ │ MockLLM │ │ MockTools │ │ MockEvents │ │ │
25
+ │ │ │ (responses) │ │ (behavior) │ │ (triggers) │ │ │
26
+ │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
27
+ │ └─────────────────────────────────────────────────────────────────┘ │
28
+ │ │ │
29
+ │ ▼ │
30
+ │ ┌─────────────────────────────────────────────────────────────────┐ │
31
+ │ │ SIMULATION ENGINE │ │
32
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
33
+ │ │ │ Scenarios │ │ Personas │ │ Environment │ │ │
34
+ │ │ │ (workflows) │ │ (users) │ │ (context) │ │ │
35
+ │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
36
+ │ └─────────────────────────────────────────────────────────────────┘ │
37
+ │ │ │
38
+ │ ▼ │
39
+ │ ┌─────────────────────────────────────────────────────────────────┐ │
40
+ │ │ ASSERTIONS │ │
41
+ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
42
+ │ │ │ Behavioral │ │ Property │ │ Semantic │ │ │
43
+ │ │ │ (actions) │ │(invariants) │ │ (meaning) │ │ │
44
+ │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
45
+ │ └─────────────────────────────────────────────────────────────────┘ │
46
+ │ │
47
+ └─────────────────────────────────────────────────────────────────────────┘
48
+ ```
49
+
50
+ ---
51
+
52
+ ## AgentTestRunner
53
+
54
+ ### API Principal
55
+
56
+ ```typescript
57
+ interface TestCase {
58
+ name: string;
59
+ description?: string;
60
+ setup?: () => Promise<void>;
61
+ teardown?: () => Promise<void>;
62
+ timeout?: number;
63
+ retries?: number;
64
+ tags?: string[];
65
+ }
66
+
67
+ interface AgentTestCase extends TestCase {
68
+ agent: string | AgentConfig;
69
+ input: AgentInput;
70
+ expected?: ExpectedBehavior;
71
+ mocks?: MockConfig;
72
+ assertions: Assertion[];
73
+ }
74
+
75
+ interface TestResult {
76
+ name: string;
77
+ status: 'passed' | 'failed' | 'skipped' | 'error';
78
+ duration_ms: number;
79
+ assertions: AssertionResult[];
80
+ error?: Error;
81
+ logs?: string[];
82
+ metrics?: TestMetrics;
83
+ }
84
+
85
+ interface TestSuite {
86
+ name: string;
87
+ tests: AgentTestCase[];
88
+ beforeAll?: () => Promise<void>;
89
+ afterAll?: () => Promise<void>;
90
+ beforeEach?: () => Promise<void>;
91
+ afterEach?: () => Promise<void>;
92
+ }
93
+
94
+ class AgentTestRunner {
95
+ private mockProvider: MockProvider;
96
+ private assertionEngine: AssertionEngine;
97
+ private config: TestRunnerConfig;
98
+
99
+ constructor(config: TestRunnerConfig) {
100
+ this.config = config;
101
+ this.mockProvider = new MockProvider();
102
+ this.assertionEngine = new AssertionEngine();
103
+ }
104
+
105
+ // Run single test
106
+ async runTest(test: AgentTestCase): Promise<TestResult> {
107
+ const startTime = Date.now();
108
+ const logs: string[] = [];
109
+ const assertionResults: AssertionResult[] = [];
110
+
111
+ try {
112
+ // Setup
113
+ if (test.setup) await test.setup();
114
+
115
+ // Configure mocks
116
+ if (test.mocks) {
117
+ this.mockProvider.configure(test.mocks);
118
+ }
119
+
120
+ // Create agent with mocks injected
121
+ const agent = await this.createTestAgent(test.agent, test.mocks);
122
+
123
+ // Run agent
124
+ const result = await this.runWithTimeout(
125
+ agent.run(test.input),
126
+ test.timeout || this.config.defaultTimeout
127
+ );
128
+
129
+ // Run assertions
130
+ for (const assertion of test.assertions) {
131
+ const assertionResult = await this.assertionEngine.check(
132
+ assertion,
133
+ result,
134
+ { input: test.input, agent }
135
+ );
136
+ assertionResults.push(assertionResult);
137
+ }
138
+
139
+ // Check if all passed
140
+ const allPassed = assertionResults.every(r => r.passed);
141
+
142
+ return {
143
+ name: test.name,
144
+ status: allPassed ? 'passed' : 'failed',
145
+ duration_ms: Date.now() - startTime,
146
+ assertions: assertionResults,
147
+ logs,
148
+ metrics: this.collectMetrics(result)
149
+ };
150
+
151
+ } catch (error) {
152
+ return {
153
+ name: test.name,
154
+ status: 'error',
155
+ duration_ms: Date.now() - startTime,
156
+ assertions: assertionResults,
157
+ error: error as Error,
158
+ logs
159
+ };
160
+
161
+ } finally {
162
+ // Teardown
163
+ if (test.teardown) await test.teardown();
164
+ this.mockProvider.reset();
165
+ }
166
+ }
167
+
168
+ // Run test suite
169
+ async runSuite(suite: TestSuite): Promise<SuiteResult> {
170
+ const results: TestResult[] = [];
171
+
172
+ // Before all
173
+ if (suite.beforeAll) await suite.beforeAll();
174
+
175
+ for (const test of suite.tests) {
176
+ // Before each
177
+ if (suite.beforeEach) await suite.beforeEach();
178
+
179
+ // Run with retries
180
+ let result = await this.runTest(test);
181
+ let attempts = 1;
182
+
183
+ while (result.status === 'failed' && attempts < (test.retries || 1)) {
184
+ attempts++;
185
+ result = await this.runTest(test);
186
+ }
187
+
188
+ results.push(result);
189
+
190
+ // After each
191
+ if (suite.afterEach) await suite.afterEach();
192
+
193
+ // Stop on first failure if configured
194
+ if (this.config.stopOnFirstFailure && result.status === 'failed') {
195
+ break;
196
+ }
197
+ }
198
+
199
+ // After all
200
+ if (suite.afterAll) await suite.afterAll();
201
+
202
+ return {
203
+ name: suite.name,
204
+ results,
205
+ summary: this.summarize(results)
206
+ };
207
+ }
208
+
209
+ // Run all tests matching pattern
210
+ async runAll(pattern?: string): Promise<SuiteResult[]> {
211
+ const suites = await this.discoverTests(pattern);
212
+ const results: SuiteResult[] = [];
213
+
214
+ for (const suite of suites) {
215
+ results.push(await this.runSuite(suite));
216
+ }
217
+
218
+ return results;
219
+ }
220
+
221
+ // Watch mode
222
+ async watch(pattern?: string): Promise<void> {
223
+ console.log('Watching for changes...');
224
+
225
+ // Initial run
226
+ await this.runAll(pattern);
227
+
228
+ // Watch for file changes
229
+ const watcher = fs.watch(this.config.testDir, { recursive: true });
230
+
231
+ watcher.on('change', async (eventType, filename) => {
232
+ if (filename?.endsWith('.test.ts') || filename?.endsWith('.test.json')) {
233
+ console.log(`\nFile changed: ${filename}`);
234
+ await this.runAll(pattern);
235
+ }
236
+ });
237
+ }
238
+
239
+ private async createTestAgent(
240
+ agentConfig: string | AgentConfig,
241
+ mocks?: MockConfig
242
+ ): Promise<Agent> {
243
+ // Create agent with mock providers injected
244
+ const agent = typeof agentConfig === 'string'
245
+ ? await AgentFactory.create(agentConfig)
246
+ : await AgentFactory.createFromConfig(agentConfig);
247
+
248
+ if (mocks?.llm) {
249
+ agent.setLLMProvider(this.mockProvider.getLLM());
250
+ }
251
+ if (mocks?.tools) {
252
+ agent.setToolProvider(this.mockProvider.getTools());
253
+ }
254
+
255
+ return agent;
256
+ }
257
+
258
+ private async runWithTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
259
+ return Promise.race([
260
+ promise,
261
+ new Promise<never>((_, reject) =>
262
+ setTimeout(() => reject(new Error('Test timeout')), timeout)
263
+ )
264
+ ]);
265
+ }
266
+
267
+ private summarize(results: TestResult[]): TestSummary {
268
+ return {
269
+ total: results.length,
270
+ passed: results.filter(r => r.status === 'passed').length,
271
+ failed: results.filter(r => r.status === 'failed').length,
272
+ skipped: results.filter(r => r.status === 'skipped').length,
273
+ errors: results.filter(r => r.status === 'error').length,
274
+ duration_ms: results.reduce((sum, r) => sum + r.duration_ms, 0)
275
+ };
276
+ }
277
+
278
+ private collectMetrics(result: AgentResult): TestMetrics {
279
+ return {
280
+ tokens_used: result.usage?.total_tokens || 0,
281
+ tool_calls: result.toolCalls?.length || 0,
282
+ cost: result.cost?.total || 0
283
+ };
284
+ }
285
+
286
+ private async discoverTests(pattern?: string): Promise<TestSuite[]> {
287
+ // Discover test files
288
+ const testFiles = await glob(
289
+ pattern || `${this.config.testDir}/**/*.test.{ts,json}`
290
+ );
291
+
292
+ const suites: TestSuite[] = [];
293
+
294
+ for (const file of testFiles) {
295
+ const suite = await this.loadTestFile(file);
296
+ suites.push(suite);
297
+ }
298
+
299
+ return suites;
300
+ }
301
+
302
+ private async loadTestFile(file: string): Promise<TestSuite> {
303
+ if (file.endsWith('.json')) {
304
+ const content = await fs.readFile(file, 'utf-8');
305
+ return JSON.parse(content);
306
+ }
307
+
308
+ // Dynamic import for .ts files
309
+ const module = await import(file);
310
+ return module.default || module;
311
+ }
312
+ }
313
+ ```
314
+
315
+ ---
316
+
317
+ ## MockProvider
318
+
319
+ ```typescript
320
+ interface MockLLMConfig {
321
+ responses?: MockResponse[];
322
+ defaultResponse?: string;
323
+ delay?: number;
324
+ errorRate?: number;
325
+ tokenUsage?: TokenUsage;
326
+ }
327
+
328
+ interface MockResponse {
329
+ match: string | RegExp | ((input: string) => boolean);
330
+ response: string | (() => string);
331
+ toolCalls?: ToolCall[];
332
+ }
333
+
334
+ interface MockToolConfig {
335
+ tools: Record<string, MockToolBehavior>;
336
+ }
337
+
338
+ interface MockToolBehavior {
339
+ response?: unknown;
340
+ error?: string;
341
+ delay?: number;
342
+ sideEffect?: (params: unknown) => void;
343
+ }
344
+
345
+ class MockProvider {
346
+ private llmConfig: MockLLMConfig = {};
347
+ private toolConfig: MockToolConfig = { tools: {} };
348
+ private callHistory: CallRecord[] = [];
349
+
350
+ configure(config: MockConfig): void {
351
+ if (config.llm) this.llmConfig = config.llm;
352
+ if (config.tools) this.toolConfig = config.tools;
353
+ }
354
+
355
+ reset(): void {
356
+ this.llmConfig = {};
357
+ this.toolConfig = { tools: {} };
358
+ this.callHistory = [];
359
+ }
360
+
361
+ // Get mock LLM provider
362
+ getLLM(): MockLLM {
363
+ return {
364
+ complete: async (prompt: string) => {
365
+ // Record call
366
+ this.callHistory.push({
367
+ type: 'llm',
368
+ input: prompt,
369
+ timestamp: new Date()
370
+ });
371
+
372
+ // Simulate delay
373
+ if (this.llmConfig.delay) {
374
+ await this.delay(this.llmConfig.delay);
375
+ }
376
+
377
+ // Simulate errors
378
+ if (this.llmConfig.errorRate && Math.random() < this.llmConfig.errorRate) {
379
+ throw new Error('Mock LLM error');
380
+ }
381
+
382
+ // Find matching response
383
+ const response = this.findMatchingResponse(prompt);
384
+
385
+ return {
386
+ content: response.text,
387
+ toolCalls: response.toolCalls,
388
+ usage: this.llmConfig.tokenUsage || {
389
+ input_tokens: prompt.length / 4,
390
+ output_tokens: response.text.length / 4,
391
+ total_tokens: (prompt.length + response.text.length) / 4
392
+ }
393
+ };
394
+ }
395
+ };
396
+ }
397
+
398
+ // Get mock tool provider
399
+ getTools(): MockToolProvider {
400
+ return {
401
+ call: async (name: string, params: unknown) => {
402
+ // Record call
403
+ this.callHistory.push({
404
+ type: 'tool',
405
+ name,
406
+ input: params,
407
+ timestamp: new Date()
408
+ });
409
+
410
+ const behavior = this.toolConfig.tools[name];
411
+
412
+ if (!behavior) {
413
+ throw new Error(`No mock configured for tool: ${name}`);
414
+ }
415
+
416
+ // Simulate delay
417
+ if (behavior.delay) {
418
+ await this.delay(behavior.delay);
419
+ }
420
+
421
+ // Execute side effect
422
+ if (behavior.sideEffect) {
423
+ behavior.sideEffect(params);
424
+ }
425
+
426
+ // Simulate error
427
+ if (behavior.error) {
428
+ throw new Error(behavior.error);
429
+ }
430
+
431
+ return behavior.response;
432
+ }
433
+ };
434
+ }
435
+
436
+ // Get call history for assertions
437
+ getCallHistory(): CallRecord[] {
438
+ return [...this.callHistory];
439
+ }
440
+
441
+ // Helpers for setting up mocks
442
+ mockLLMResponse(match: string | RegExp, response: string): void {
443
+ if (!this.llmConfig.responses) {
444
+ this.llmConfig.responses = [];
445
+ }
446
+ this.llmConfig.responses.push({ match, response });
447
+ }
448
+
449
+ mockTool(name: string, behavior: MockToolBehavior): void {
450
+ this.toolConfig.tools[name] = behavior;
451
+ }
452
+
453
+ mockToolSuccess(name: string, response: unknown): void {
454
+ this.toolConfig.tools[name] = { response };
455
+ }
456
+
457
+ mockToolError(name: string, error: string): void {
458
+ this.toolConfig.tools[name] = { error };
459
+ }
460
+
461
+ private findMatchingResponse(prompt: string): { text: string; toolCalls?: ToolCall[] } {
462
+ for (const mock of this.llmConfig.responses || []) {
463
+ let matches = false;
464
+
465
+ if (typeof mock.match === 'string') {
466
+ matches = prompt.includes(mock.match);
467
+ } else if (mock.match instanceof RegExp) {
468
+ matches = mock.match.test(prompt);
469
+ } else if (typeof mock.match === 'function') {
470
+ matches = mock.match(prompt);
471
+ }
472
+
473
+ if (matches) {
474
+ const text = typeof mock.response === 'function'
475
+ ? mock.response()
476
+ : mock.response;
477
+ return { text, toolCalls: mock.toolCalls };
478
+ }
479
+ }
480
+
481
+ return {
482
+ text: this.llmConfig.defaultResponse || 'Mock response'
483
+ };
484
+ }
485
+
486
+ private delay(ms: number): Promise<void> {
487
+ return new Promise(resolve => setTimeout(resolve, ms));
488
+ }
489
+ }
490
+ ```
491
+
492
+ ---
493
+
494
+ ## SimulationEngine
495
+
496
+ ```typescript
497
+ interface Scenario {
498
+ name: string;
499
+ description: string;
500
+ steps: ScenarioStep[];
501
+ environment?: EnvironmentConfig;
502
+ persona?: PersonaConfig;
503
+ }
504
+
505
+ interface ScenarioStep {
506
+ action: 'user_input' | 'wait' | 'trigger_event' | 'assert' | 'checkpoint';
507
+ data?: unknown;
508
+ timeout?: number;
509
+ }
510
+
511
+ interface EnvironmentConfig {
512
+ files?: Record<string, string>;
513
+ env?: Record<string, string>;
514
+ time?: string; // Frozen time
515
+ }
516
+
517
+ interface PersonaConfig {
518
+ name: string;
519
+ behavior: 'cooperative' | 'adversarial' | 'confused' | 'expert';
520
+ style?: 'verbose' | 'terse' | 'random';
521
+ }
522
+
523
+ class SimulationEngine {
524
+ private scenarios: Map<string, Scenario>;
525
+ private currentSimulation: SimulationState | null = null;
526
+
527
+ constructor() {
528
+ this.scenarios = new Map();
529
+ }
530
+
531
+ // Register scenario
532
+ registerScenario(scenario: Scenario): void {
533
+ this.scenarios.set(scenario.name, scenario);
534
+ }
535
+
536
+ // Run simulation
537
+ async simulate(
538
+ scenarioName: string,
539
+ agent: Agent,
540
+ options?: SimulationOptions
541
+ ): Promise<SimulationResult> {
542
+ const scenario = this.scenarios.get(scenarioName);
543
+ if (!scenario) throw new Error(`Scenario not found: ${scenarioName}`);
544
+
545
+ // Setup environment
546
+ const env = await this.setupEnvironment(scenario.environment);
547
+
548
+ // Create simulation state
549
+ this.currentSimulation = {
550
+ scenario,
551
+ agent,
552
+ env,
553
+ stepResults: [],
554
+ startedAt: new Date()
555
+ };
556
+
557
+ try {
558
+ // Run each step
559
+ for (let i = 0; i < scenario.steps.length; i++) {
560
+ const step = scenario.steps[i];
561
+ const stepResult = await this.runStep(step, i);
562
+
563
+ this.currentSimulation.stepResults.push(stepResult);
564
+
565
+ // Stop on failure if configured
566
+ if (!stepResult.success && options?.stopOnFailure) {
567
+ break;
568
+ }
569
+ }
570
+
571
+ return this.buildResult();
572
+
573
+ } finally {
574
+ // Cleanup
575
+ await this.cleanup(env);
576
+ this.currentSimulation = null;
577
+ }
578
+ }
579
+
580
+ // Generate test cases from scenario
581
+ async generateTestCases(scenarioName: string): Promise<AgentTestCase[]> {
582
+ const scenario = this.scenarios.get(scenarioName);
583
+ if (!scenario) throw new Error(`Scenario not found: ${scenarioName}`);
584
+
585
+ const testCases: AgentTestCase[] = [];
586
+
587
+ // Generate test case for each meaningful step
588
+ for (let i = 0; i < scenario.steps.length; i++) {
589
+ const step = scenario.steps[i];
590
+
591
+ if (step.action === 'user_input') {
592
+ testCases.push({
593
+ name: `${scenario.name} - Step ${i + 1}`,
594
+ description: `Test step: ${step.action}`,
595
+ agent: 'default',
596
+ input: { message: step.data as string },
597
+ assertions: [
598
+ { type: 'responds', timeout: 30000 },
599
+ { type: 'no_error' }
600
+ ]
601
+ });
602
+ }
603
+ }
604
+
605
+ return testCases;
606
+ }
607
+
608
+ private async runStep(step: ScenarioStep, index: number): Promise<StepResult> {
609
+ const startTime = Date.now();
610
+
611
+ try {
612
+ switch (step.action) {
613
+ case 'user_input':
614
+ return await this.handleUserInput(step.data as string);
615
+
616
+ case 'wait':
617
+ await this.delay(step.data as number);
618
+ return { success: true, duration_ms: step.data as number };
619
+
620
+ case 'trigger_event':
621
+ await EventBus.publish(
622
+ (step.data as any).event,
623
+ (step.data as any).payload
624
+ );
625
+ return { success: true, duration_ms: Date.now() - startTime };
626
+
627
+ case 'assert':
628
+ const assertion = step.data as Assertion;
629
+ const result = await this.checkAssertion(assertion);
630
+ return {
631
+ success: result.passed,
632
+ duration_ms: Date.now() - startTime,
633
+ assertion: result
634
+ };
635
+
636
+ case 'checkpoint':
637
+ // Save simulation state for later analysis
638
+ return {
639
+ success: true,
640
+ duration_ms: Date.now() - startTime,
641
+ checkpoint: this.captureState()
642
+ };
643
+
644
+ default:
645
+ throw new Error(`Unknown step action: ${step.action}`);
646
+ }
647
+ } catch (error) {
648
+ return {
649
+ success: false,
650
+ duration_ms: Date.now() - startTime,
651
+ error: error as Error
652
+ };
653
+ }
654
+ }
655
+
656
+ private async handleUserInput(input: string): Promise<StepResult> {
657
+ if (!this.currentSimulation) throw new Error('No simulation running');
658
+
659
+ const startTime = Date.now();
660
+ const result = await this.currentSimulation.agent.run({ message: input });
661
+
662
+ return {
663
+ success: true,
664
+ duration_ms: Date.now() - startTime,
665
+ agentResult: result
666
+ };
667
+ }
668
+
669
+ private async setupEnvironment(config?: EnvironmentConfig): Promise<SimulationEnv> {
670
+ const env: SimulationEnv = {
671
+ tempDir: await fs.mkdtemp(path.join(os.tmpdir(), 'elsabro-sim-')),
672
+ originalEnv: { ...process.env }
673
+ };
674
+
675
+ // Create mock files
676
+ if (config?.files) {
677
+ for (const [filePath, content] of Object.entries(config.files)) {
678
+ const fullPath = path.join(env.tempDir, filePath);
679
+ await fs.mkdir(path.dirname(fullPath), { recursive: true });
680
+ await fs.writeFile(fullPath, content);
681
+ }
682
+ }
683
+
684
+ // Set environment variables
685
+ if (config?.env) {
686
+ Object.assign(process.env, config.env);
687
+ }
688
+
689
+ return env;
690
+ }
691
+
692
+ private async cleanup(env: SimulationEnv): Promise<void> {
693
+ // Restore environment
694
+ process.env = env.originalEnv;
695
+
696
+ // Remove temp directory
697
+ await fs.rm(env.tempDir, { recursive: true, force: true });
698
+ }
699
+
700
+ private buildResult(): SimulationResult {
701
+ if (!this.currentSimulation) throw new Error('No simulation');
702
+
703
+ const { scenario, stepResults, startedAt } = this.currentSimulation;
704
+
705
+ return {
706
+ scenario: scenario.name,
707
+ success: stepResults.every(r => r.success),
708
+ steps: stepResults,
709
+ duration_ms: Date.now() - startedAt.getTime(),
710
+ summary: {
711
+ totalSteps: stepResults.length,
712
+ passedSteps: stepResults.filter(r => r.success).length,
713
+ failedSteps: stepResults.filter(r => !r.success).length
714
+ }
715
+ };
716
+ }
717
+
718
+ private captureState(): SimulationCheckpoint {
719
+ return {
720
+ timestamp: new Date().toISOString(),
721
+ memory: MemoryManager.export(),
722
+ tasks: TaskList()
723
+ };
724
+ }
725
+
726
+ private async checkAssertion(assertion: Assertion): Promise<AssertionResult> {
727
+ return this.assertionEngine.check(assertion, this.currentSimulation);
728
+ }
729
+
730
+ private delay(ms: number): Promise<void> {
731
+ return new Promise(resolve => setTimeout(resolve, ms));
732
+ }
733
+ }
734
+ ```
735
+
736
+ ---
737
+
738
+ ## AssertionEngine
739
+
740
+ ```typescript
741
+ type AssertionType =
742
+ | 'responds' // Agent responded
743
+ | 'no_error' // No errors occurred
744
+ | 'tool_called' // Specific tool was called
745
+ | 'tool_not_called' // Tool was not called
746
+ | 'output_contains' // Output contains text
747
+ | 'output_matches' // Output matches regex
748
+ | 'semantic_match' // Semantic similarity
749
+ | 'behavior' // Custom behavior check
750
+ | 'property' // Property-based test
751
+ | 'cost_under' // Cost below threshold
752
+ | 'time_under'; // Time below threshold
753
+
754
+ interface Assertion {
755
+ type: AssertionType;
756
+ value?: unknown;
757
+ message?: string;
758
+ timeout?: number;
759
+ }
760
+
761
+ interface AssertionResult {
762
+ assertion: Assertion;
763
+ passed: boolean;
764
+ actual?: unknown;
765
+ expected?: unknown;
766
+ message?: string;
767
+ }
768
+
769
+ class AssertionEngine {
770
+ private customAssertions: Map<string, AssertionHandler>;
771
+
772
+ constructor() {
773
+ this.customAssertions = new Map();
774
+ }
775
+
776
+ // Check assertion
777
+ async check(
778
+ assertion: Assertion,
779
+ result: AgentResult,
780
+ context?: AssertionContext
781
+ ): Promise<AssertionResult> {
782
+ switch (assertion.type) {
783
+ case 'responds':
784
+ return this.assertResponds(result);
785
+
786
+ case 'no_error':
787
+ return this.assertNoError(result);
788
+
789
+ case 'tool_called':
790
+ return this.assertToolCalled(result, assertion.value as string);
791
+
792
+ case 'tool_not_called':
793
+ return this.assertToolNotCalled(result, assertion.value as string);
794
+
795
+ case 'output_contains':
796
+ return this.assertOutputContains(result, assertion.value as string);
797
+
798
+ case 'output_matches':
799
+ return this.assertOutputMatches(result, assertion.value as string);
800
+
801
+ case 'semantic_match':
802
+ return await this.assertSemanticMatch(result, assertion.value as string);
803
+
804
+ case 'behavior':
805
+ return await this.assertBehavior(result, assertion.value as BehaviorSpec, context);
806
+
807
+ case 'property':
808
+ return this.assertProperty(result, assertion.value as PropertySpec);
809
+
810
+ case 'cost_under':
811
+ return this.assertCostUnder(result, assertion.value as number);
812
+
813
+ case 'time_under':
814
+ return this.assertTimeUnder(result, assertion.value as number);
815
+
816
+ default:
817
+ // Check custom assertions
818
+ const handler = this.customAssertions.get(assertion.type);
819
+ if (handler) {
820
+ return handler(assertion, result, context);
821
+ }
822
+ throw new Error(`Unknown assertion type: ${assertion.type}`);
823
+ }
824
+ }
825
+
826
+ // Register custom assertion
827
+ registerAssertion(type: string, handler: AssertionHandler): void {
828
+ this.customAssertions.set(type, handler);
829
+ }
830
+
831
+ // Built-in assertions
832
+ private assertResponds(result: AgentResult): AssertionResult {
833
+ const passed = result.output !== undefined && result.output !== null;
834
+ return {
835
+ assertion: { type: 'responds' },
836
+ passed,
837
+ actual: result.output,
838
+ message: passed ? 'Agent responded' : 'Agent did not respond'
839
+ };
840
+ }
841
+
842
+ private assertNoError(result: AgentResult): AssertionResult {
843
+ const passed = !result.error;
844
+ return {
845
+ assertion: { type: 'no_error' },
846
+ passed,
847
+ actual: result.error,
848
+ message: passed ? 'No error' : `Error: ${result.error?.message}`
849
+ };
850
+ }
851
+
852
+ private assertToolCalled(result: AgentResult, toolName: string): AssertionResult {
853
+ const called = result.toolCalls?.some(tc => tc.name === toolName) || false;
854
+ return {
855
+ assertion: { type: 'tool_called', value: toolName },
856
+ passed: called,
857
+ actual: result.toolCalls?.map(tc => tc.name),
858
+ expected: toolName,
859
+ message: called ? `Tool ${toolName} was called` : `Tool ${toolName} was not called`
860
+ };
861
+ }
862
+
863
+ private assertToolNotCalled(result: AgentResult, toolName: string): AssertionResult {
864
+ const called = result.toolCalls?.some(tc => tc.name === toolName) || false;
865
+ return {
866
+ assertion: { type: 'tool_not_called', value: toolName },
867
+ passed: !called,
868
+ actual: result.toolCalls?.map(tc => tc.name),
869
+ message: !called ? `Tool ${toolName} was not called` : `Tool ${toolName} was called`
870
+ };
871
+ }
872
+
873
+ private assertOutputContains(result: AgentResult, text: string): AssertionResult {
874
+ const output = String(result.output || '');
875
+ const contains = output.includes(text);
876
+ return {
877
+ assertion: { type: 'output_contains', value: text },
878
+ passed: contains,
879
+ actual: output.slice(0, 200),
880
+ expected: text,
881
+ message: contains ? 'Output contains expected text' : 'Output does not contain expected text'
882
+ };
883
+ }
884
+
885
+ private assertOutputMatches(result: AgentResult, pattern: string): AssertionResult {
886
+ const output = String(result.output || '');
887
+ const regex = new RegExp(pattern);
888
+ const matches = regex.test(output);
889
+ return {
890
+ assertion: { type: 'output_matches', value: pattern },
891
+ passed: matches,
892
+ actual: output.slice(0, 200),
893
+ expected: pattern,
894
+ message: matches ? 'Output matches pattern' : 'Output does not match pattern'
895
+ };
896
+ }
897
+
898
+ private async assertSemanticMatch(result: AgentResult, expected: string): Promise<AssertionResult> {
899
+ // Use LLM to check semantic similarity
900
+ const output = String(result.output || '');
901
+
902
+ // Simple semantic check (could use embeddings in production)
903
+ const prompt = `Compare these two texts for semantic similarity.
904
+ Text 1: "${output.slice(0, 500)}"
905
+ Text 2: "${expected}"
906
+
907
+ Are they semantically similar in meaning? Answer only "yes" or "no".`;
908
+
909
+ const response = await this.quickLLMCheck(prompt);
910
+ const similar = response.toLowerCase().includes('yes');
911
+
912
+ return {
913
+ assertion: { type: 'semantic_match', value: expected },
914
+ passed: similar,
915
+ actual: output.slice(0, 200),
916
+ expected,
917
+ message: similar ? 'Semantically similar' : 'Not semantically similar'
918
+ };
919
+ }
920
+
921
+ private async assertBehavior(
922
+ result: AgentResult,
923
+ spec: BehaviorSpec,
924
+ context?: AssertionContext
925
+ ): Promise<AssertionResult> {
926
+ // Check behavioral specification
927
+ let passed = true;
928
+ const violations: string[] = [];
929
+
930
+ if (spec.mustCallTools) {
931
+ for (const tool of spec.mustCallTools) {
932
+ if (!result.toolCalls?.some(tc => tc.name === tool)) {
933
+ passed = false;
934
+ violations.push(`Did not call required tool: ${tool}`);
935
+ }
936
+ }
937
+ }
938
+
939
+ if (spec.mustNotCallTools) {
940
+ for (const tool of spec.mustNotCallTools) {
941
+ if (result.toolCalls?.some(tc => tc.name === tool)) {
942
+ passed = false;
943
+ violations.push(`Called forbidden tool: ${tool}`);
944
+ }
945
+ }
946
+ }
947
+
948
+ if (spec.mustAsk && !result.askedUser) {
949
+ passed = false;
950
+ violations.push('Did not ask user when expected');
951
+ }
952
+
953
+ if (spec.customCheck) {
954
+ const customResult = await spec.customCheck(result, context);
955
+ if (!customResult.passed) {
956
+ passed = false;
957
+ violations.push(customResult.reason || 'Custom check failed');
958
+ }
959
+ }
960
+
961
+ return {
962
+ assertion: { type: 'behavior', value: spec },
963
+ passed,
964
+ message: passed ? 'Behavior matches spec' : violations.join('; ')
965
+ };
966
+ }
967
+
968
+ private assertProperty(result: AgentResult, spec: PropertySpec): AssertionResult {
969
+ // Property-based testing
970
+ let passed = true;
971
+ const violations: string[] = [];
972
+
973
+ if (spec.invariant) {
974
+ const holds = spec.invariant(result);
975
+ if (!holds) {
976
+ passed = false;
977
+ violations.push('Invariant violated');
978
+ }
979
+ }
980
+
981
+ if (spec.postcondition) {
982
+ const holds = spec.postcondition(result);
983
+ if (!holds) {
984
+ passed = false;
985
+ violations.push('Postcondition not satisfied');
986
+ }
987
+ }
988
+
989
+ return {
990
+ assertion: { type: 'property', value: spec },
991
+ passed,
992
+ message: passed ? 'Properties satisfied' : violations.join('; ')
993
+ };
994
+ }
995
+
996
+ private assertCostUnder(result: AgentResult, maxCost: number): AssertionResult {
997
+ const cost = result.cost?.total || 0;
998
+ const passed = cost <= maxCost;
999
+ return {
1000
+ assertion: { type: 'cost_under', value: maxCost },
1001
+ passed,
1002
+ actual: cost,
1003
+ expected: `<= ${maxCost}`,
1004
+ message: passed ? `Cost $${cost} is under $${maxCost}` : `Cost $${cost} exceeds $${maxCost}`
1005
+ };
1006
+ }
1007
+
1008
+ private assertTimeUnder(result: AgentResult, maxTime: number): AssertionResult {
1009
+ const time = result.duration_ms || 0;
1010
+ const passed = time <= maxTime;
1011
+ return {
1012
+ assertion: { type: 'time_under', value: maxTime },
1013
+ passed,
1014
+ actual: time,
1015
+ expected: `<= ${maxTime}ms`,
1016
+ message: passed ? `Time ${time}ms is under ${maxTime}ms` : `Time ${time}ms exceeds ${maxTime}ms`
1017
+ };
1018
+ }
1019
+
1020
+ private async quickLLMCheck(prompt: string): Promise<string> {
1021
+ // Quick LLM check for semantic assertions
1022
+ // In production, use a small/fast model
1023
+ return 'yes'; // Placeholder
1024
+ }
1025
+ }
1026
+ ```
1027
+
1028
+ ---
1029
+
1030
+ ## Test Fixtures
1031
+
1032
+ ```typescript
1033
+ class TestFixtures {
1034
+ private fixtures: Map<string, unknown>;
1035
+
1036
+ constructor() {
1037
+ this.fixtures = new Map();
1038
+ this.loadBuiltinFixtures();
1039
+ }
1040
+
1041
+ // Get fixture
1042
+ get<T>(name: string): T {
1043
+ const fixture = this.fixtures.get(name);
1044
+ if (!fixture) throw new Error(`Fixture not found: ${name}`);
1045
+ return fixture as T;
1046
+ }
1047
+
1048
+ // Register fixture
1049
+ register(name: string, value: unknown): void {
1050
+ this.fixtures.set(name, value);
1051
+ }
1052
+
1053
+ // Load fixture from file
1054
+ async loadFile(name: string, path: string): Promise<void> {
1055
+ const content = await fs.readFile(path, 'utf-8');
1056
+ this.fixtures.set(name, content);
1057
+ }
1058
+
1059
+ // Create temporary file fixture
1060
+ async createTempFile(name: string, content: string): Promise<string> {
1061
+ const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'fixture-'));
1062
+ const filePath = path.join(tempDir, name);
1063
+ await fs.writeFile(filePath, content);
1064
+ this.fixtures.set(`file:${name}`, filePath);
1065
+ return filePath;
1066
+ }
1067
+
1068
+ private loadBuiltinFixtures(): void {
1069
+ // Common test fixtures
1070
+ this.fixtures.set('emptyInput', { message: '' });
1071
+ this.fixtures.set('simpleTask', { message: 'Hello, world!' });
1072
+ this.fixtures.set('codeTask', {
1073
+ message: 'Write a function to add two numbers'
1074
+ });
1075
+ this.fixtures.set('errorInput', { message: null });
1076
+ }
1077
+ }
1078
+ ```
1079
+
1080
+ ---
1081
+
1082
+ ## Comandos
1083
+
1084
+ ```bash
1085
+ /elsabro:test # Ejecutar todos los tests
1086
+ /elsabro:test run explore.test # Ejecutar test específico
1087
+ /elsabro:test watch # Modo watch
1088
+ /elsabro:test coverage # Reporte de cobertura
1089
+ /elsabro:test simulate scenario1 # Ejecutar simulación
1090
+ ```
1091
+
1092
+ ---
1093
+
1094
+ ## Ejemplo de Test
1095
+
1096
+ ```typescript
1097
+ // tests/explore-agent.test.ts
1098
+ export default {
1099
+ name: 'Explore Agent Tests',
1100
+ tests: [
1101
+ {
1102
+ name: 'should find TypeScript files',
1103
+ agent: 'Explore',
1104
+ input: { message: 'Find all TypeScript files in src/' },
1105
+ mocks: {
1106
+ tools: {
1107
+ Glob: { response: ['src/index.ts', 'src/utils.ts'] }
1108
+ }
1109
+ },
1110
+ assertions: [
1111
+ { type: 'responds' },
1112
+ { type: 'no_error' },
1113
+ { type: 'tool_called', value: 'Glob' },
1114
+ { type: 'output_contains', value: '.ts' }
1115
+ ]
1116
+ }
1117
+ ]
1118
+ };
1119
+ ```
1120
+
1121
+ ---
1122
+
1123
+ ## Configuración
1124
+
1125
+ ```json
1126
+ {
1127
+ "testing": {
1128
+ "enabled": true,
1129
+ "testDir": ".elsabro/tests",
1130
+ "defaultTimeout": 30000,
1131
+ "stopOnFirstFailure": false,
1132
+ "retries": 1,
1133
+ "parallel": false,
1134
+ "coverage": {
1135
+ "enabled": true,
1136
+ "threshold": 80
1137
+ }
1138
+ }
1139
+ }
1140
+ ```
1141
+
1142
+ ---
1143
+
1144
+ ## Changelog
1145
+
1146
+ - **v3.4.0**: Initial Testing Framework
1147
+ - AgentTestRunner with suites
1148
+ - MockProvider for LLM and tools
1149
+ - SimulationEngine for scenarios
1150
+ - AssertionEngine with 12+ assertion types
1151
+ - TestFixtures for reusable data