npm - @operor/testing - Versions diffs - 0.1.0 - Mend

@operor/testing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/API_VALIDATION.md +572 -0
package/dist/index.d.ts +414 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +1608 -0
package/dist/index.js.map +1 -0
package/fixtures/sample-tests.csv +10 -0
package/package.json +31 -0
package/src/CSVLoader.ts +83 -0
package/src/ConversationEvaluator.ts +254 -0
package/src/ConversationRunner.ts +267 -0
package/src/CustomerSimulator.ts +106 -0
package/src/MockShopifySkill.ts +336 -0
package/src/SimulationRunner.ts +425 -0
package/src/SkillTestHarness.ts +220 -0
package/src/TestCaseEvaluator.ts +296 -0
package/src/TestSuiteRunner.ts +151 -0
package/src/__tests__/CSVLoader.test.ts +122 -0
package/src/__tests__/ConversationEvaluator.test.ts +221 -0
package/src/__tests__/ConversationRunner.test.ts +270 -0
package/src/__tests__/CustomerSimulator.test.ts +160 -0
package/src/__tests__/SimulationRunner.test.ts +281 -0
package/src/__tests__/SkillTestHarness.test.ts +181 -0
package/src/__tests__/scenarios.test.ts +71 -0
package/src/index.ts +32 -0
package/src/scenarios/edge-cases.ts +52 -0
package/src/scenarios/general.ts +37 -0
package/src/scenarios/index.ts +32 -0
package/src/scenarios/order-tracking.ts +56 -0
package/src/scenarios.ts +142 -0
package/src/types.ts +133 -0
package/src/utils.ts +6 -0
package/tsconfig.json +9 -0
package/tsdown.config.ts +10 -0

package/src/__tests__/SimulationRunner.test.ts ADDED Viewed

@@ -0,0 +1,281 @@
+import { describe, it, expect } from 'vitest';
+import { SimulationRunner } from '../SimulationRunner.js';
+import { ECOMMERCE_SCENARIOS } from '../scenarios.js';
+import type { SimulationReport, ConversationScenario } from '../types.js';
+import { EventEmitter } from 'node:events';
+// Minimal mock Operor that emits events for each simulated message
+function createMockOperor(responseText = 'I can help with that.') {
+  const emitter = new EventEmitter();
+  const mockProvider = {
+    name: 'mock',
+    simulateIncomingMessage: (_from: string, _text: string) => {
+      setTimeout(() => {
+        emitter.emit('message:processed', {
+          response: {
+            text: responseText,
+            toolCalls: [],
+          },
+          cost: 0.001,
+        });
+      }, 5);
+    },
+  };
+  const providers = new Map();
+  providers.set('mock', mockProvider);
+  return Object.assign(emitter, { providers });
+}
+describe('SimulationRunner', () => {
+  it('runs conversation scenarios and produces a report', async () => {
+    const agentOS = createMockOperor('Your order is on its way!');
+    const scenario: ConversationScenario = {
+      id: 'test-scenario',
+      name: 'Test Scenario',
+      description: 'Simple test',
+      persona: 'polite',
+      maxTurns: 1,
+      scriptedResponses: ['Hello'],
+      successCriteria: [{ type: 'turns_under', value: 10 }],
+    };
+    const runner = new SimulationRunner({
+      agentOS: agentOS as any,
+      config: {
+        conversationScenarios: [scenario],
+      },
+    });
+    const report = await runner.run();
+    expect(report.timestamp).toBeInstanceOf(Date);
+    expect(report.duration).toBeGreaterThanOrEqual(0);
+    expect(report.conversationResults).toHaveLength(1);
+    expect(report.summary.totalConversations).toBe(1);
+    expect(report.summary.passedConversations).toBe(1);
+    expect(report.summary.overallPassRate).toBe(1);
+    expect(report.overallPassed).toBe(true);
+  });
+  it('resolves "builtin" to ECOMMERCE_SCENARIOS', async () => {
+    const agentOS = createMockOperor('Here is your order info.');
+    const runner = new SimulationRunner({
+      agentOS: agentOS as any,
+      config: {
+        conversationScenarios: 'builtin',
+      },
+    });
+    const report = await runner.run();
+    expect(report.conversationResults).toHaveLength(ECOMMERCE_SCENARIOS.length);
+    expect(report.summary.totalConversations).toBe(ECOMMERCE_SCENARIOS.length);
+  });
+  it('returns empty report when no scenarios or test files configured', async () => {
+    const agentOS = createMockOperor();
+    const runner = new SimulationRunner({
+      agentOS: agentOS as any,
+      config: {},
+    });
+    const report = await runner.run();
+    expect(report.testSuiteResults).toHaveLength(0);
+    expect(report.conversationResults).toHaveLength(0);
+    expect(report.summary.totalTests).toBe(0);
+    expect(report.summary.totalConversations).toBe(0);
+    expect(report.overallPassed).toBe(false); // No items = not passed
+  });
+  it('handles failed scenarios in the report', async () => {
+    const agentOS = createMockOperor('Sorry, I cannot help.');
+    const scenario: ConversationScenario = {
+      id: 'fail-scenario',
+      name: 'Failing Scenario',
+      description: 'Should fail criteria',
+      persona: 'frustrated',
+      maxTurns: 1,
+      scriptedResponses: ['Fix my order!'],
+      successCriteria: [{ type: 'tool_called', value: 'get_order' }],
+    };
+    const runner = new SimulationRunner({
+      agentOS: agentOS as any,
+      config: {
+        conversationScenarios: [scenario],
+      },
+    });
+    const report = await runner.run();
+    expect(report.overallPassed).toBe(false);
+    expect(report.summary.failedConversations).toBe(1);
+    expect(report.summary.overallPassRate).toBe(0);
+  });
+  it('calculates mixed pass rate correctly', async () => {
+    const agentOS = createMockOperor('Your order is being processed.');
+    const passingScenario: ConversationScenario = {
+      id: 'pass',
+      name: 'Passing',
+      description: 'Will pass',
+      persona: 'polite',
+      maxTurns: 1,
+      scriptedResponses: ['Hi'],
+      successCriteria: [{ type: 'turns_under', value: 10 }],
+    };
+    const failingScenario: ConversationScenario = {
+      id: 'fail',
+      name: 'Failing',
+      description: 'Will fail',
+      persona: 'frustrated',
+      maxTurns: 1,
+      scriptedResponses: ['Help!'],
+      successCriteria: [{ type: 'tool_called', value: 'nonexistent' }],
+    };
+    const runner = new SimulationRunner({
+      agentOS: agentOS as any,
+      config: {
+        conversationScenarios: [passingScenario, failingScenario],
+      },
+    });
+    const report = await runner.run();
+    expect(report.summary.passedConversations).toBe(1);
+    expect(report.summary.failedConversations).toBe(1);
+    expect(report.summary.overallPassRate).toBe(0.5);
+    expect(report.overallPassed).toBe(false);
+  });
+  describe('formatReport', () => {
+    it('formats a report with conversations', () => {
+      const report: SimulationReport = {
+        timestamp: new Date('2026-02-15T00:00:00Z'),
+        duration: 1500,
+        totalConversations: 1,
+        passed: 1,
+        failed: 0,
+        averageScores: { accuracy: 4, toolUsage: 3, tone: 5, resolution: 4 },
+        scenarioBreakdown: [{ scenario: 'Order Check', runs: 1, passRate: 1, avgScore: 4 }],
+        toolUsageStats: {},
+        commonFailurePatterns: [],
+        recommendations: [],
+        totalCost: 0.01,
+        testSuiteResults: [],
+        conversationResults: [
+          {
+            scenario: {
+              id: 'test',
+              name: 'Order Check',
+              description: 'Test',
+              persona: 'polite',
+              maxTurns: 3,
+            },
+            passed: true,
+            turns: [
+              { role: 'customer', message: 'Hi' },
+              { role: 'agent', message: 'Hello!' },
+            ],
+            evaluation: {
+              overall: 'pass',
+              scores: { accuracy: 4, toolUsage: 3, tone: 5, resolution: 4 },
+              feedback: 'Good interaction',
+            },
+            duration: 500,
+            cost: 0.01,
+          },
+        ],
+        overallPassed: true,
+        summary: {
+          totalTests: 0,
+          passedTests: 0,
+          failedTests: 0,
+          totalConversations: 1,
+          passedConversations: 1,
+          failedConversations: 0,
+          overallPassRate: 1,
+        },
+      };
+      const output = SimulationRunner.formatReport(report);
+      expect(output).toContain('Simulation Report');
+      expect(output).toContain('1.5s');
+      expect(output).toContain('Order Check');
+      expect(output).toContain('100% pass rate');
+      expect(output).toContain('pass rate: 100.0%');
+      expect(output).toContain('PASSED');
+    });
+    it('formats a failed report', () => {
+      const report: SimulationReport = {
+        timestamp: new Date('2026-02-15T00:00:00Z'),
+        duration: 3000,
+        totalConversations: 1,
+        passed: 0,
+        failed: 1,
+        averageScores: { accuracy: 1, toolUsage: 1, tone: 2, resolution: 1 },
+        scenarioBreakdown: [{ scenario: 'Bad Scenario', runs: 1, passRate: 0, avgScore: 1.25 }],
+        toolUsageStats: {},
+        commonFailurePatterns: ['Did not resolve the issue'],
+        recommendations: [],
+        totalCost: 0.02,
+        testSuiteResults: [],
+        conversationResults: [
+          {
+            scenario: {
+              id: 'fail',
+              name: 'Bad Scenario',
+              description: 'Fails',
+              persona: 'frustrated',
+              maxTurns: 2,
+            },
+            passed: false,
+            turns: [
+              { role: 'customer', message: 'Help!' },
+              { role: 'agent', message: 'Sorry.' },
+              { role: 'customer', message: 'Useless!' },
+              { role: 'agent', message: 'I apologize.' },
+            ],
+            evaluation: {
+              overall: 'fail',
+              scores: { accuracy: 1, toolUsage: 1, tone: 2, resolution: 1 },
+              feedback: 'Did not resolve the issue',
+            },
+            duration: 2000,
+            cost: 0.02,
+          },
+        ],
+        overallPassed: false,
+        summary: {
+          totalTests: 0,
+          passedTests: 0,
+          failedTests: 0,
+          totalConversations: 1,
+          passedConversations: 0,
+          failedConversations: 1,
+          overallPassRate: 0,
+        },
+      };
+      const output = SimulationRunner.formatReport(report);
+      expect(output).toContain('Bad Scenario');
+      expect(output).toContain('0% pass rate');
+      expect(output).toContain('FAILED');
+      expect(output).toContain('pass rate: 0.0%');
+    });
+  });
+});

package/src/__tests__/SkillTestHarness.test.ts ADDED Viewed

@@ -0,0 +1,181 @@
+import { describe, it, expect, beforeEach } from 'vitest';
+import { SkillTestHarness } from '../SkillTestHarness.js';
+import type { Skill, Tool } from '@operor/core';
+function createMockSkill(): Skill {
+  return {
+    name: 'test-integration',
+    initialize: async () => {},
+    isReady: () => true,
+    tools: {
+      get_order: {
+        name: 'get_order',
+        description: 'Get order details',
+        parameters: { orderId: { type: 'string', required: true } },
+        execute: async (params: any) => ({ found: true, id: params.orderId, status: 'delivered' }),
+      },
+      search_products: {
+        name: 'search_products',
+        description: 'Search products',
+        parameters: { query: { type: 'string', required: true } },
+        execute: async (params: any) => ({ found: 1, products: [{ title: params.query }] }),
+      },
+      create_discount: {
+        name: 'create_discount',
+        description: 'Create discount',
+        parameters: { percent: { type: 'number', required: true } },
+        execute: async (params: any) => ({ code: `DISC${params.percent}`, percent: params.percent }),
+      },
+      stripe_create_refund: {
+        name: 'stripe_create_refund',
+        description: 'Create refund',
+        parameters: { amount: { type: 'number', required: true } },
+        execute: async (params: any) => ({ refunded: true, amount: params.amount }),
+      },
+      unknown_tool: {
+        name: 'unknown_tool',
+        description: 'Some unknown tool',
+        parameters: {},
+        execute: async () => ({ ok: true }),
+      },
+    },
+  };
+}
+describe('SkillTestHarness', () => {
+  let inner: Skill;
+  beforeEach(() => {
+    inner = createMockSkill();
+  });
+  it('should preserve the inner integration name', () => {
+    const harness = new SkillTestHarness(inner);
+    expect(harness.name).toBe('test-integration');
+  });
+  it('should delegate initialize and isReady', async () => {
+    const harness = new SkillTestHarness(inner);
+    await harness.initialize();
+    expect(harness.isReady()).toBe(true);
+  });
+  it('should expose all inner tools', () => {
+    const harness = new SkillTestHarness(inner);
+    expect(Object.keys(harness.tools)).toEqual(Object.keys(inner.tools));
+  });
+  describe('read operations', () => {
+    it('should always allow read tools', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: false });
+      const result = await harness.tools.get_order.execute({ orderId: '123' });
+      expect(result).toEqual({ found: true, id: '123', status: 'delivered' });
+    });
+    it('should allow search_products as read', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: false });
+      const result = await harness.tools.search_products.execute({ query: 'test' });
+      expect(result.found).toBe(1);
+    });
+  });
+  describe('write operations', () => {
+    it('should block write tools when allowWrites=false', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: false });
+      await expect(harness.tools.create_discount.execute({ percent: 10 }))
+        .rejects.toThrow('Write operation');
+    });
+    it('should allow write tools when allowWrites=true', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: true });
+      const result = await harness.tools.create_discount.execute({ percent: 10 });
+      expect(result).toEqual({ code: 'DISC10', percent: 10 });
+    });
+  });
+  describe('destructive operations', () => {
+    it('should block destructive tools by default', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: true });
+      await expect(harness.tools.stripe_create_refund.execute({ amount: 50 }))
+        .rejects.toThrow('Destructive operation');
+    });
+    it('should allow destructive tools when allowDestructive=true', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: true, allowDestructive: true });
+      const result = await harness.tools.stripe_create_refund.execute({ amount: 50 });
+      expect(result).toEqual({ refunded: true, amount: 50 });
+    });
+  });
+  describe('unknown tools', () => {
+    it('should classify unknown tools as write (safe by default)', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: false });
+      await expect(harness.tools.unknown_tool.execute({}))
+        .rejects.toThrow('Write operation');
+    });
+  });
+  describe('maxOperations', () => {
+    it('should enforce operation limit', async () => {
+      const harness = new SkillTestHarness(inner, { maxOperations: 2 });
+      await harness.tools.get_order.execute({ orderId: '1' });
+      await harness.tools.get_order.execute({ orderId: '2' });
+      await expect(harness.tools.get_order.execute({ orderId: '3' }))
+        .rejects.toThrow('Max operations limit reached');
+    });
+  });
+  describe('dryRun', () => {
+    it('should return dry-run result without executing', async () => {
+      const harness = new SkillTestHarness(inner, { dryRun: true, allowWrites: true });
+      const result = await harness.tools.create_discount.execute({ percent: 15 });
+      expect(result).toEqual({
+        dryRun: true,
+        wouldExecute: 'create_discount',
+        params: { percent: 15 },
+      });
+    });
+    it('should still enforce permission checks in dry-run mode', async () => {
+      const harness = new SkillTestHarness(inner, { dryRun: true, allowWrites: false });
+      await expect(harness.tools.create_discount.execute({ percent: 10 }))
+        .rejects.toThrow('Write operation');
+    });
+  });
+  describe('audit log', () => {
+    it('should record all operations', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: true });
+      await harness.tools.get_order.execute({ orderId: '123' });
+      await harness.tools.create_discount.execute({ percent: 10 });
+      const log = harness.getAuditLog();
+      expect(log).toHaveLength(2);
+      expect(log[0].name).toBe('get_order');
+      expect(log[0].classification).toBe('read');
+      expect(log[1].name).toBe('create_discount');
+      expect(log[1].classification).toBe('write');
+    });
+    it('should record failed operations in audit log', async () => {
+      const harness = new SkillTestHarness(inner, { allowWrites: false });
+      try {
+        await harness.tools.create_discount.execute({ percent: 10 });
+      } catch {}
+      // Blocked operations are not logged (they throw before incrementing)
+      const log = harness.getAuditLog();
+      expect(log).toHaveLength(0);
+    });
+    it('should reset audit log and operation count', async () => {
+      const harness = new SkillTestHarness(inner);
+      await harness.tools.get_order.execute({ orderId: '1' });
+      expect(harness.getOperationCount()).toBe(1);
+      harness.resetAuditLog();
+      expect(harness.getAuditLog()).toHaveLength(0);
+      expect(harness.getOperationCount()).toBe(0);
+    });
+  });
+});

package/src/__tests__/scenarios.test.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import { describe, it, expect } from 'vitest';
+import { ECOMMERCE_SCENARIOS } from '../scenarios.js';
+describe('ECOMMERCE_SCENARIOS', () => {
+  it('should export 10 scenarios', () => {
+    expect(ECOMMERCE_SCENARIOS).toHaveLength(10);
+  });
+  it('should have unique IDs', () => {
+    const ids = ECOMMERCE_SCENARIOS.map((s) => s.id);
+    expect(new Set(ids).size).toBe(ids.length);
+  });
+  it('should have unique names', () => {
+    const names = ECOMMERCE_SCENARIOS.map((s) => s.name);
+    expect(new Set(names).size).toBe(names.length);
+  });
+  it('every scenario should have required fields', () => {
+    for (const s of ECOMMERCE_SCENARIOS) {
+      expect(s.id).toBeTruthy();
+      expect(s.name).toBeTruthy();
+      expect(s.description).toBeTruthy();
+      expect(s.persona).toBeTruthy();
+      expect(s.maxTurns).toBeGreaterThan(0);
+      expect(s.scriptedResponses).toBeDefined();
+      expect(s.scriptedResponses!.length).toBeGreaterThan(0);
+      expect(s.scriptedResponses!.length).toBeLessThanOrEqual(s.maxTurns);
+    }
+  });
+  it('should include a delayed order scenario with get_order and create_discount', () => {
+    const delayed = ECOMMERCE_SCENARIOS.find((s) => s.id === 'delayed-order-compensation');
+    expect(delayed).toBeDefined();
+    expect(delayed!.expectedTools).toContain('get_order');
+    expect(delayed!.expectedTools).toContain('create_discount');
+  });
+  it('should include an order-not-found scenario', () => {
+    const notFound = ECOMMERCE_SCENARIOS.find((s) => s.id === 'order-not-found');
+    expect(notFound).toBeDefined();
+    expect(notFound!.expectedTools).toContain('get_order');
+  });
+  it('should include a product inquiry scenario with search_products', () => {
+    const inquiry = ECOMMERCE_SCENARIOS.find((s) => s.id === 'product-inquiry');
+    expect(inquiry).toBeDefined();
+    expect(inquiry!.expectedTools).toContain('search_products');
+  });
+  it('should include a greeting scenario with no tools', () => {
+    const greeting = ECOMMERCE_SCENARIOS.find((s) => s.id === 'greeting');
+    expect(greeting).toBeDefined();
+    expect(greeting!.expectedTools).toEqual([]);
+  });
+  it('should include a multi-issue scenario', () => {
+    const multi = ECOMMERCE_SCENARIOS.find((s) => s.id === 'multi-issue');
+    expect(multi).toBeDefined();
+    expect(multi!.expectedTools).toContain('get_order');
+    expect(multi!.expectedTools).toContain('search_products');
+  });
+  it('should include return, billing, lead-qualification, and escalation scenarios', () => {
+    const ids = ECOMMERCE_SCENARIOS.map((s) => s.id);
+    expect(ids).toContain('return-request');
+    expect(ids).toContain('billing-dispute');
+    expect(ids).toContain('lead-qualification');
+    expect(ids).toContain('frustrated-escalation');
+  });
+});

package/src/index.ts ADDED Viewed

@@ -0,0 +1,32 @@
+export type {
+  TestCase,
+  TestCaseResult,
+  TestSuiteResult,
+  CustomerPersona,
+  ConversationSuccessCriteria,
+  ConversationScenario,
+  ConversationTurn,
+  CustomerSimulatorResponse,
+  ConversationEvaluation,
+  ConversationTestResult,
+  CriteriaResult,
+  SimulationConfig,
+  SimulationReport,
+} from './types.js';
+export { CSVLoader } from './CSVLoader.js';
+export { TestCaseEvaluator } from './TestCaseEvaluator.js';
+export type { EvaluationResult } from './TestCaseEvaluator.js';
+export { TestSuiteRunner } from './TestSuiteRunner.js';
+export type { TestSuiteRunnerConfig } from './TestSuiteRunner.js';
+export { SkillTestHarness } from './SkillTestHarness.js';
+export type { SkillTestHarnessConfig, AuditLogEntry } from './SkillTestHarness.js';
+export { CustomerSimulator } from './CustomerSimulator.js';
+export { ConversationEvaluator } from './ConversationEvaluator.js';
+export { ConversationRunner } from './ConversationRunner.js';
+export type { ConversationRunnerConfig } from './ConversationRunner.js';
+export { SimulationRunner } from './SimulationRunner.js';
+export type { SimulationRunnerOptions } from './SimulationRunner.js';
+export { ECOMMERCE_SCENARIOS } from './scenarios.js';
+export { MockShopifySkill } from './MockShopifySkill.js';
+export type { MockOrder, MockProduct, MockDiscount } from './MockShopifySkill.js';
+export { formatTimestamp } from './utils.js';

package/src/scenarios/edge-cases.ts ADDED Viewed

@@ -0,0 +1,52 @@
+import type { ConversationScenario } from '../types.js';
+export const emptyMessageScenario: ConversationScenario = {
+  id: 'edge-empty-message',
+  name: 'Empty Message',
+  description: 'Customer sends an empty or whitespace-only message',
+  persona: 'terse',
+  maxTurns: 2,
+  expectedTools: [],
+  expectedOutcome: 'Agent handles gracefully and asks how it can help',
+  successCriteria: [
+    { type: 'turns_under', value: 3 },
+  ],
+  scriptedResponses: [
+    '   ',
+    'Oh sorry, I meant to ask about my order.',
+  ],
+};
+export const longMessageScenario: ConversationScenario = {
+  id: 'edge-long-message',
+  name: 'Very Long Message',
+  description: 'Customer sends an extremely long message with embedded order question',
+  persona: 'verbose',
+  maxTurns: 2,
+  expectedTools: ['get_order'],
+  expectedOutcome: 'Agent extracts the key question and responds appropriately',
+  successCriteria: [
+    { type: 'tool_called', value: 'get_order' },
+  ],
+  scriptedResponses: [
+    'So I ordered something a while back and I have been checking every day and it still has not arrived. I remember when I placed the order I was really excited because it was exactly what I was looking for and the price was great too. Anyway the order number is #12345 and I just want to know where it is because I really need it for an event coming up soon. Can you please help me track it down? I would really appreciate it. Thanks so much in advance for your help with this matter.',
+    'Got it, thanks for looking into that for me.',
+  ],
+};
+export const multiLanguageScenario: ConversationScenario = {
+  id: 'edge-multi-language',
+  name: 'Multi-language Message',
+  description: 'Customer writes in a different language (Spanish)',
+  persona: 'polite',
+  maxTurns: 2,
+  expectedTools: [],
+  expectedOutcome: 'Agent responds helpfully despite language difference',
+  successCriteria: [
+    { type: 'turns_under', value: 3 },
+  ],
+  scriptedResponses: [
+    'Hola, donde esta mi pedido numero 12345?',
+    'Gracias por la ayuda.',
+  ],
+};

package/src/scenarios/general.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import type { ConversationScenario } from '../types.js';
+export const greetingScenario: ConversationScenario = {
+  id: 'general-greeting',
+  name: 'Simple Greeting',
+  description: 'Customer says hello and expects a friendly response',
+  persona: 'polite',
+  maxTurns: 1,
+  expectedTools: [],
+  expectedOutcome: 'Agent responds with a friendly greeting',
+  successCriteria: [
+    { type: 'turns_under', value: 2 },
+  ],
+  scriptedResponses: [
+    'Hello!',
+  ],
+};
+export const frustratedCustomerScenario: ConversationScenario = {
+  id: 'general-frustrated',
+  name: 'Frustrated Customer with Multiple Complaints',
+  description: 'Angry customer escalates through multiple complaints; agent should stay professional',
+  persona: 'frustrated',
+  maxTurns: 4,
+  expectedTools: ['get_order'],
+  expectedOutcome: 'Agent remains professional and offers resolution',
+  successCriteria: [
+    { type: 'response_contains', value: 'sorry' },
+    { type: 'turns_under', value: 5 },
+  ],
+  scriptedResponses: [
+    'This is ridiculous! My order #12345 is STILL not here!',
+    'I have been waiting forever. This is the worst service I have ever experienced.',
+    'I want a refund or at least some kind of compensation for this mess.',
+    'Fine. I guess that will have to do.',
+  ],
+};

package/src/scenarios/index.ts ADDED Viewed

@@ -0,0 +1,32 @@
+export {
+  delayedOrderScenario,
+  onTimeOrderScenario,
+  orderNotFoundScenario,
+} from './order-tracking.js';
+export {
+  greetingScenario,
+  frustratedCustomerScenario,
+} from './general.js';
+export {
+  emptyMessageScenario,
+  longMessageScenario,
+  multiLanguageScenario,
+} from './edge-cases.js';
+import { delayedOrderScenario, onTimeOrderScenario, orderNotFoundScenario } from './order-tracking.js';
+import { greetingScenario, frustratedCustomerScenario } from './general.js';
+import { emptyMessageScenario, longMessageScenario, multiLanguageScenario } from './edge-cases.js';
+import type { ConversationScenario } from '../types.js';
+export const allScenarios: ConversationScenario[] = [
+  delayedOrderScenario,
+  onTimeOrderScenario,
+  orderNotFoundScenario,
+  greetingScenario,
+  frustratedCustomerScenario,
+  emptyMessageScenario,
+  longMessageScenario,
+  multiLanguageScenario,
+];