@orq-ai/evaluatorq 1.2.2 → 1.2.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/lib/integrations/ai-sdk/index.d.ts +2 -0
  2. package/dist/lib/integrations/ai-sdk/index.d.ts.map +1 -1
  3. package/dist/lib/integrations/ai-sdk/index.js +1 -0
  4. package/dist/lib/integrations/ai-sdk/simulation-adapter.d.ts +47 -0
  5. package/dist/lib/integrations/ai-sdk/simulation-adapter.d.ts.map +1 -0
  6. package/dist/lib/integrations/ai-sdk/simulation-adapter.js +58 -0
  7. package/dist/lib/integrations/langchain/index.d.ts +2 -0
  8. package/dist/lib/integrations/langchain/index.d.ts.map +1 -1
  9. package/dist/lib/integrations/langchain/index.js +1 -0
  10. package/dist/lib/integrations/langchain/simulation-adapter.d.ts +49 -0
  11. package/dist/lib/integrations/langchain/simulation-adapter.d.ts.map +1 -0
  12. package/dist/lib/integrations/langchain/simulation-adapter.js +110 -0
  13. package/dist/lib/integrations/simulation/adapters.d.ts +57 -0
  14. package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -0
  15. package/dist/lib/integrations/simulation/adapters.js +64 -0
  16. package/dist/lib/integrations/simulation/agents/base.d.ts +90 -0
  17. package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -0
  18. package/dist/lib/integrations/simulation/agents/base.js +227 -0
  19. package/dist/lib/integrations/simulation/agents/index.d.ts +10 -0
  20. package/dist/lib/integrations/simulation/agents/index.d.ts.map +1 -0
  21. package/dist/lib/integrations/simulation/agents/index.js +6 -0
  22. package/dist/lib/integrations/simulation/agents/judge.d.ts +50 -0
  23. package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -0
  24. package/dist/lib/integrations/simulation/agents/judge.js +313 -0
  25. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts +41 -0
  26. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -0
  27. package/dist/lib/integrations/simulation/agents/user-simulator.js +82 -0
  28. package/dist/lib/integrations/simulation/convert.d.ts +22 -0
  29. package/dist/lib/integrations/simulation/convert.d.ts.map +1 -0
  30. package/dist/lib/integrations/simulation/convert.js +124 -0
  31. package/dist/lib/integrations/simulation/evaluators/index.d.ts +50 -0
  32. package/dist/lib/integrations/simulation/evaluators/index.d.ts.map +1 -0
  33. package/dist/lib/integrations/simulation/evaluators/index.js +100 -0
  34. package/dist/lib/integrations/simulation/generators/datapoint-generator.d.ts +60 -0
  35. package/dist/lib/integrations/simulation/generators/datapoint-generator.d.ts.map +1 -0
  36. package/dist/lib/integrations/simulation/generators/datapoint-generator.js +223 -0
  37. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts +38 -0
  38. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -0
  39. package/dist/lib/integrations/simulation/generators/first-message-generator.js +131 -0
  40. package/dist/lib/integrations/simulation/generators/index.d.ts +15 -0
  41. package/dist/lib/integrations/simulation/generators/index.d.ts.map +1 -0
  42. package/dist/lib/integrations/simulation/generators/index.js +10 -0
  43. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts +60 -0
  44. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -0
  45. package/dist/lib/integrations/simulation/generators/persona-generator.js +333 -0
  46. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts +77 -0
  47. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -0
  48. package/dist/lib/integrations/simulation/generators/scenario-generator.js +545 -0
  49. package/dist/lib/integrations/simulation/index.d.ts +33 -0
  50. package/dist/lib/integrations/simulation/index.d.ts.map +1 -0
  51. package/dist/lib/integrations/simulation/index.js +35 -0
  52. package/dist/lib/integrations/simulation/quality/index.d.ts +5 -0
  53. package/dist/lib/integrations/simulation/quality/index.d.ts.map +1 -0
  54. package/dist/lib/integrations/simulation/quality/index.js +4 -0
  55. package/dist/lib/integrations/simulation/quality/message-perturbation.d.ts +25 -0
  56. package/dist/lib/integrations/simulation/quality/message-perturbation.d.ts.map +1 -0
  57. package/dist/lib/integrations/simulation/quality/message-perturbation.js +150 -0
  58. package/dist/lib/integrations/simulation/runner/index.d.ts +5 -0
  59. package/dist/lib/integrations/simulation/runner/index.d.ts.map +1 -0
  60. package/dist/lib/integrations/simulation/runner/index.js +4 -0
  61. package/dist/lib/integrations/simulation/runner/simulation.d.ts +57 -0
  62. package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -0
  63. package/dist/lib/integrations/simulation/runner/simulation.js +336 -0
  64. package/dist/lib/integrations/simulation/schemas.d.ts +104 -0
  65. package/dist/lib/integrations/simulation/schemas.d.ts.map +1 -0
  66. package/dist/lib/integrations/simulation/schemas.js +76 -0
  67. package/dist/lib/integrations/simulation/simulation/index.d.ts +49 -0
  68. package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -0
  69. package/dist/lib/integrations/simulation/simulation/index.js +159 -0
  70. package/dist/lib/integrations/simulation/types.d.ts +101 -0
  71. package/dist/lib/integrations/simulation/types.d.ts.map +1 -0
  72. package/dist/lib/integrations/simulation/types.js +90 -0
  73. package/dist/lib/integrations/simulation/utils/dataset-export.d.ts +31 -0
  74. package/dist/lib/integrations/simulation/utils/dataset-export.d.ts.map +1 -0
  75. package/dist/lib/integrations/simulation/utils/dataset-export.js +146 -0
  76. package/dist/lib/integrations/simulation/utils/extract-json.d.ts +17 -0
  77. package/dist/lib/integrations/simulation/utils/extract-json.d.ts.map +1 -0
  78. package/dist/lib/integrations/simulation/utils/extract-json.js +106 -0
  79. package/dist/lib/integrations/simulation/utils/prompt-builders.d.ts +34 -0
  80. package/dist/lib/integrations/simulation/utils/prompt-builders.d.ts.map +1 -0
  81. package/dist/lib/integrations/simulation/utils/prompt-builders.js +147 -0
  82. package/dist/lib/integrations/simulation/utils/sanitize.d.ts +15 -0
  83. package/dist/lib/integrations/simulation/utils/sanitize.d.ts.map +1 -0
  84. package/dist/lib/integrations/simulation/utils/sanitize.js +20 -0
  85. package/dist/lib/integrations/simulation/wrap-agent.d.ts +65 -0
  86. package/dist/lib/integrations/simulation/wrap-agent.d.ts.map +1 -0
  87. package/dist/lib/integrations/simulation/wrap-agent.js +140 -0
  88. package/dist/lib/send-results.d.ts.map +1 -1
  89. package/dist/lib/send-results.js +17 -2
  90. package/dist/lib/types.d.ts +2 -2
  91. package/dist/lib/types.d.ts.map +1 -1
  92. package/dist/tsconfig.lib.tsbuildinfo +1 -1
  93. package/package.json +24 -2
@@ -0,0 +1,38 @@
1
+ /**
2
+ * First message generator using LLM.
3
+ *
4
+ * Generates contextually appropriate first messages based on persona and scenario.
5
+ */
6
+ import OpenAI from "openai";
7
+ import type { Persona, Scenario } from "../types.js";
8
+ /**
9
+ * Configuration for FirstMessageGenerator.
10
+ */
11
+ export interface FirstMessageGeneratorConfig {
12
+ model?: string;
13
+ client?: OpenAI;
14
+ apiKey?: string;
15
+ }
16
+ /**
17
+ * Generates first messages for simulations.
18
+ *
19
+ * Creates contextually appropriate opening messages based on
20
+ * persona characteristics and scenario context.
21
+ */
22
+ export declare class FirstMessageGenerator {
23
+ private model;
24
+ private client;
25
+ constructor(config?: FirstMessageGeneratorConfig);
26
+ /**
27
+ * Generate a first message for a simulation.
28
+ *
29
+ * Uses the Persona's toSystemPrompt() and Scenario's toUserContext()
30
+ * methods to build the context for generation.
31
+ *
32
+ * @param persona - User persona
33
+ * @param scenario - Scenario context
34
+ * @returns Generated first message string
35
+ */
36
+ generate(persona: Persona, scenario: Scenario): Promise<string>;
37
+ }
38
+ //# sourceMappingURL=first-message-generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"first-message-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/first-message-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAuDrD;;GAEG;AACH,MAAM,WAAW,2BAA2B;IAC1C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;GAKG;AACH,qBAAa,qBAAqB;IAChC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,2BAA2B;IAkBhD;;;;;;;;;OASG;IACG,QAAQ,CAAC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;CAgDtE"}
@@ -0,0 +1,131 @@
1
+ /**
2
+ * First message generator using LLM.
3
+ *
4
+ * Generates contextually appropriate first messages based on persona and scenario.
5
+ */
6
+ import OpenAI from "openai";
7
+ import { buildPersonaSystemPrompt, buildScenarioUserContext, } from "../utils/prompt-builders.js";
8
+ // Temperature setting for message generation
9
+ const TEMPERATURE_FIRST_MESSAGE = 0.8;
10
+ const FIRST_MESSAGE_PROMPT = `You are generating the authentic first message a user would type to a support agent.
11
+
12
+ ## Your Task
13
+ Create a realistic opening message that sounds like an ACTUAL customer, not a script.
14
+
15
+ ## Guidelines
16
+
17
+ ### Voice Matching (based on persona traits):
18
+ - **Communication style "terse"**: Short sentences, minimal pleasantries, gets straight to the point
19
+ - **Communication style "verbose"**: Detailed explanations, context, multiple sentences
20
+ - **Communication style "formal"**: Professional language, complete sentences, "Dear", "Sincerely"
21
+ - **Communication style "casual"**: Contractions, slang, emojis if appropriate, friendly tone
22
+
23
+ - **Low patience (0-0.3)**: Frustrated tone, urgency indicators ("I've been waiting", "This is ridiculous")
24
+ - **High patience (0.7-1.0)**: Calm, understanding, may apologize for bothering
25
+
26
+ - **Low politeness (0-0.3)**: Direct, potentially demanding, no pleasantries
27
+ - **High politeness (0.7-1.0)**: "Please", "Thank you", "I appreciate your help"
28
+
29
+ - **Low technical level (0-0.3)**: Simple language, may describe problems in non-technical terms
30
+ - **High technical level (0.7-1.0)**: Technical terminology, specific error codes, detailed descriptions
31
+
32
+ ### Emotional States:
33
+ - **Frustrated**: Caps for emphasis, exclamation marks, expressions of disappointment
34
+ - **Confused**: Questions, uncertainty ("I'm not sure if...", "Am I doing something wrong?")
35
+ - **Urgent**: Time pressure mentioned, immediate action requested
36
+ - **Happy**: Positive tone, compliments, appreciation
37
+ - **Neutral**: Matter-of-fact, balanced
38
+
39
+ ### Message Length:
40
+ - Keep messages 50-200 characters for "terse" style
41
+ - Allow 150-400 characters for "verbose" style
42
+ - Target 80-250 characters for "casual" or "formal"
43
+
44
+ ### DO:
45
+ - Include specific details from the scenario context
46
+ - Sound like a real person typing quickly (minor imperfections are OK)
47
+ - Match the emotional intensity to the starting_emotion
48
+
49
+ ### DON'T:
50
+ - Start with "Dear Support" unless formal style with high politeness
51
+ - Be overly long unless verbose style
52
+ - Use robotic language ("I am writing to inquire about...")
53
+
54
+ Return ONLY the message text. No quotes, no explanations, no labels.`;
55
+ /**
56
+ * Generates first messages for simulations.
57
+ *
58
+ * Creates contextually appropriate opening messages based on
59
+ * persona characteristics and scenario context.
60
+ */
61
+ export class FirstMessageGenerator {
62
+ model;
63
+ client;
64
+ constructor(config) {
65
+ this.model = config?.model ?? "azure/gpt-4o-mini";
66
+ if (config?.client) {
67
+ this.client = config.client;
68
+ }
69
+ else {
70
+ const apiKey = config?.apiKey ?? process.env.ORQ_API_KEY;
71
+ if (!apiKey) {
72
+ throw new Error("ORQ_API_KEY environment variable is not set. Set it or pass apiKey/client in config.");
73
+ }
74
+ this.client = new OpenAI({
75
+ baseURL: process.env.ROUTER_BASE_URL || "https://api.orq.ai/v2/router",
76
+ apiKey,
77
+ });
78
+ }
79
+ }
80
+ /**
81
+ * Generate a first message for a simulation.
82
+ *
83
+ * Uses the Persona's toSystemPrompt() and Scenario's toUserContext()
84
+ * methods to build the context for generation.
85
+ *
86
+ * @param persona - User persona
87
+ * @param scenario - Scenario context
88
+ * @returns Generated first message string
89
+ */
90
+ async generate(persona, scenario) {
91
+ const personaContext = buildPersonaSystemPrompt(persona);
92
+ const scenarioContext = buildScenarioUserContext(scenario);
93
+ const userPrompt = `PERSONA:
94
+ ${personaContext}
95
+
96
+ SCENARIO:
97
+ ${scenarioContext}
98
+
99
+ Generate the FIRST message this user would send to start the conversation.
100
+ The message should immediately convey their goal and emotional state.
101
+ Keep it natural - this is how they would actually open a conversation.`;
102
+ try {
103
+ const response = await this.client.chat.completions.create({
104
+ model: this.model,
105
+ messages: [
106
+ { role: "system", content: FIRST_MESSAGE_PROMPT },
107
+ { role: "user", content: userPrompt },
108
+ ],
109
+ temperature: TEMPERATURE_FIRST_MESSAGE,
110
+ max_tokens: 500,
111
+ });
112
+ let message = response.choices[0]?.message.content ?? "";
113
+ message = message.trim().replace(/^["']|["']$/g, "");
114
+ console.debug(`Generated first message: ${message.substring(0, 100)}...`);
115
+ return message;
116
+ }
117
+ catch (e) {
118
+ // Re-throw auth errors — a bad API key should fail fast, not silently
119
+ // produce meaningless results for the entire simulation run.
120
+ if (e instanceof Error &&
121
+ "status" in e &&
122
+ (e.status === 401 ||
123
+ e.status === 403)) {
124
+ throw e;
125
+ }
126
+ console.warn(`FirstMessageGenerator: API call failed, using generic fallback. Error: ${e}`);
127
+ // Fallback to a generic message based on scenario (no persona traits applied)
128
+ return `Hi, I need help with: ${scenario.goal}`;
129
+ }
130
+ }
131
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Generator modules for agent simulation.
3
+ *
4
+ * Provides persona, scenario, first message, and datapoint generators
5
+ * that use LLMs to create diverse test data.
6
+ */
7
+ export type { DatapointGeneratorConfig } from "./datapoint-generator.js";
8
+ export { DatapointGenerator } from "./datapoint-generator.js";
9
+ export type { FirstMessageGeneratorConfig } from "./first-message-generator.js";
10
+ export { FirstMessageGenerator } from "./first-message-generator.js";
11
+ export type { PersonaGeneratorConfig } from "./persona-generator.js";
12
+ export { PersonaGenerator } from "./persona-generator.js";
13
+ export type { ScenarioGeneratorConfig } from "./scenario-generator.js";
14
+ export { ScenarioGenerator } from "./scenario-generator.js";
15
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,YAAY,EAAE,wBAAwB,EAAE,MAAM,0BAA0B,CAAC;AACzE,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,YAAY,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AACrE,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,YAAY,EAAE,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Generator modules for agent simulation.
3
+ *
4
+ * Provides persona, scenario, first message, and datapoint generators
5
+ * that use LLMs to create diverse test data.
6
+ */
7
+ export { DatapointGenerator } from "./datapoint-generator.js";
8
+ export { FirstMessageGenerator } from "./first-message-generator.js";
9
+ export { PersonaGenerator } from "./persona-generator.js";
10
+ export { ScenarioGenerator } from "./scenario-generator.js";
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Persona generator using LLM.
3
+ *
4
+ * Generates user personas from agent descriptions and optional context.
5
+ */
6
+ import OpenAI from "openai";
7
+ import type { Persona } from "../types.js";
8
+ /**
9
+ * Configuration for PersonaGenerator.
10
+ */
11
+ export interface PersonaGeneratorConfig {
12
+ model?: string;
13
+ client?: OpenAI;
14
+ apiKey?: string;
15
+ }
16
+ /**
17
+ * Generates personas from agent descriptions.
18
+ *
19
+ * Uses an LLM to create diverse, realistic user personas
20
+ * based on the agent's purpose and context.
21
+ */
22
+ export declare class PersonaGenerator {
23
+ private model;
24
+ private client;
25
+ constructor(config?: PersonaGeneratorConfig);
26
+ /**
27
+ * Parse LLM response content into Persona objects.
28
+ */
29
+ private static parsePersonas;
30
+ /**
31
+ * Generate personas for agent testing.
32
+ */
33
+ generate(params: {
34
+ agentDescription: string;
35
+ context?: string;
36
+ numPersonas?: number;
37
+ edgeCasePercentage?: number;
38
+ }): Promise<Persona[]>;
39
+ /**
40
+ * Generate personas with guaranteed trait coverage.
41
+ *
42
+ * Ensures all communication styles and trait ranges are represented,
43
+ * including extreme values that LLMs tend to avoid.
44
+ */
45
+ generateWithCoverage(params: {
46
+ agentDescription: string;
47
+ context?: string;
48
+ numPersonas?: number;
49
+ edgeCasePercentage?: number;
50
+ }): Promise<Persona[]>;
51
+ /**
52
+ * Ensure all communication styles are covered.
53
+ */
54
+ private ensureStyleCoverage;
55
+ /**
56
+ * Log warnings about missing trait coverage without modifying personas.
57
+ */
58
+ private logTraitCoverageGaps;
59
+ }
60
+ //# sourceMappingURL=persona-generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"persona-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/persona-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,KAAK,EAAsB,OAAO,EAAE,MAAM,aAAa,CAAC;AAiE/D;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,sBAAsB;IAkB3C;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAsC5B;;OAEG;IACG,QAAQ,CAAC,MAAM,EAAE;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IA0CtB;;;;;OAKG;IACG,oBAAoB,CAAC,MAAM,EAAE;QACjC,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IAqItB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA2B3B;;OAEG;IACH,OAAO,CAAC,oBAAoB;CAyB7B"}
@@ -0,0 +1,333 @@
1
+ /**
2
+ * Persona generator using LLM.
3
+ *
4
+ * Generates user personas from agent descriptions and optional context.
5
+ */
6
+ import OpenAI from "openai";
7
+ import { extractJsonFromResponse } from "../utils/extract-json.js";
8
+ import { delimit } from "../utils/sanitize.js";
9
+ // Temperature settings for different generation modes
10
+ const TEMPERATURE_CREATIVE = 0.8;
11
+ const TEMPERATURE_BALANCED = 0.7;
12
+ const VALID_STYLES = new Set(["formal", "casual", "terse", "verbose"]);
13
+ /** Clamp a number to [0, 1]. */
14
+ function clamp01(value) {
15
+ return Math.max(0, Math.min(1, value));
16
+ }
17
+ const PERSONA_GENERATOR_PROMPT = `You are an expert persona designer for AI agent testing. Create realistic, memorable user personas that feel like real people, not stereotypes.
18
+
19
+ ## Persona Structure
20
+ Each persona must include:
21
+ - **name**: A vivid, specific descriptor (e.g., "Anxious First-Time Buyer", "Retired Engineer Seeking Help")
22
+ - **patience**: 0-1 (0=interrupts constantly, 1=waits indefinitely)
23
+ - **assertiveness**: 0-1 (0=accepts anything, 1=demands specific outcomes)
24
+ - **politeness**: 0-1 (0=rude/demanding, 1=overly polite)
25
+ - **technical_level**: 0-1 (0=never used a computer, 1=software developer)
26
+ - **communication_style**: "formal", "casual", "terse", or "verbose"
27
+ - **background**: DETAILED context (2-3 sentences) explaining WHO they are, WHY they're contacting support, and WHAT their emotional state is
28
+
29
+ ## Quality Guidelines
30
+
31
+ ### DO create personas that are:
32
+ - **Realistic**: Based on actual customer archetypes you'd encounter
33
+ - **Coherent**: Traits that logically fit together (e.g., high technical_level + formal style for an engineer)
34
+ - **Specific**: Unique situations with concrete details (names, specific products, timeframes)
35
+ - **Emotionally grounded**: Clear emotional context that explains their behavior
36
+
37
+ ### DON'T create personas that are:
38
+ - Generic (e.g., "Customer with a problem")
39
+ - Contradictory (e.g., patience=0.1 but described as "patient and understanding")
40
+ - Unrealistic trait combinations (e.g., technical_level=0.9 + communication_style="terse" for a "confused elderly person")
41
+ - All similar - vary trait values across personas, including some with low values and some with high values
42
+
43
+ ## Example HIGH-QUALITY Persona:
44
+ {
45
+ "name": "Overwhelmed Working Parent",
46
+ "patience": 0.3,
47
+ "assertiveness": 0.6,
48
+ "politeness": 0.7,
49
+ "technical_level": 0.4,
50
+ "communication_style": "terse",
51
+ "background": "Sarah is a working mom with two kids under 5. She ordered a birthday gift (a tablet) for her daughter 2 weeks ago but it hasn't arrived. The party is in 3 days. She's stressed, multitasking while on hold, and needs quick answers without lengthy explanations."
52
+ }
53
+
54
+ ## Example LOW-QUALITY Persona (AVOID):
55
+ {
56
+ "name": "Angry Customer",
57
+ "patience": 0.1,
58
+ "assertiveness": 0.9,
59
+ "politeness": 0.2,
60
+ "technical_level": 0.5,
61
+ "communication_style": "casual",
62
+ "background": "A customer who is angry about something" // TOO VAGUE!
63
+ }
64
+
65
+ Return a JSON array of persona objects.`;
66
+ /**
67
+ * Generates personas from agent descriptions.
68
+ *
69
+ * Uses an LLM to create diverse, realistic user personas
70
+ * based on the agent's purpose and context.
71
+ */
72
+ export class PersonaGenerator {
73
+ model;
74
+ client;
75
+ constructor(config) {
76
+ this.model = config?.model ?? "azure/gpt-4o-mini";
77
+ if (config?.client) {
78
+ this.client = config.client;
79
+ }
80
+ else {
81
+ const apiKey = config?.apiKey ?? process.env.ORQ_API_KEY;
82
+ if (!apiKey) {
83
+ throw new Error("ORQ_API_KEY environment variable is not set. Set it or pass apiKey/client in config.");
84
+ }
85
+ this.client = new OpenAI({
86
+ baseURL: process.env.ROUTER_BASE_URL || "https://api.orq.ai/v2/router",
87
+ apiKey,
88
+ });
89
+ }
90
+ }
91
+ /**
92
+ * Parse LLM response content into Persona objects.
93
+ */
94
+ static parsePersonas(content) {
95
+ const extracted = extractJsonFromResponse(content);
96
+ let personaDicts;
97
+ try {
98
+ const parsed = JSON.parse(extracted);
99
+ if (!Array.isArray(parsed)) {
100
+ console.warn("Failed to parse personas: expected JSON array");
101
+ return [];
102
+ }
103
+ personaDicts = parsed;
104
+ }
105
+ catch {
106
+ console.warn("Failed to parse personas JSON response");
107
+ return [];
108
+ }
109
+ const personas = [];
110
+ for (const pDict of personaDicts) {
111
+ try {
112
+ const p = pDict;
113
+ const rawStyle = String(p.communication_style ?? "casual");
114
+ personas.push({
115
+ name: String(p.name ?? ""),
116
+ patience: clamp01(Number(p.patience ?? 0.5)),
117
+ assertiveness: clamp01(Number(p.assertiveness ?? 0.5)),
118
+ politeness: clamp01(Number(p.politeness ?? 0.5)),
119
+ technical_level: clamp01(Number(p.technical_level ?? 0.5)),
120
+ communication_style: VALID_STYLES.has(rawStyle)
121
+ ? rawStyle
122
+ : "casual",
123
+ background: String(p.background ?? ""),
124
+ });
125
+ }
126
+ catch (e) {
127
+ console.warn(`Failed to parse persona: ${e}`);
128
+ }
129
+ }
130
+ return personas;
131
+ }
132
+ /**
133
+ * Generate personas for agent testing.
134
+ */
135
+ async generate(params) {
136
+ const { agentDescription, context = "", numPersonas = 5, edgeCasePercentage = 0.2, } = params;
137
+ const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
138
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
139
+
140
+ Additional Context: ${delimit(context || "None provided")}
141
+
142
+ Generate ${numPersonas} diverse personas for testing this agent.
143
+ - Include ${numEdgeCases} edge case/challenging personas
144
+ - Ensure variety in patience, assertiveness, and technical levels
145
+ - Create realistic backgrounds relevant to the agent's domain
146
+
147
+ Return ONLY a JSON array, no other text.`;
148
+ const response = await this.client.chat.completions.create({
149
+ model: this.model,
150
+ messages: [
151
+ { role: "system", content: PERSONA_GENERATOR_PROMPT },
152
+ { role: "user", content: userPrompt },
153
+ ],
154
+ temperature: TEMPERATURE_CREATIVE,
155
+ max_tokens: 4000,
156
+ });
157
+ const content = response.choices[0]?.message.content ?? "[]";
158
+ const personas = PersonaGenerator.parsePersonas(content);
159
+ if (personas.length < numPersonas) {
160
+ console.warn(`PersonaGenerator: requested ${numPersonas} personas but only ${personas.length} were successfully parsed`);
161
+ }
162
+ return personas;
163
+ }
164
+ /**
165
+ * Generate personas with guaranteed trait coverage.
166
+ *
167
+ * Ensures all communication styles and trait ranges are represented,
168
+ * including extreme values that LLMs tend to avoid.
169
+ */
170
+ async generateWithCoverage(params) {
171
+ const { agentDescription, context = "", numPersonas = 8, edgeCasePercentage = 0.2, } = params;
172
+ const styles = [
173
+ "formal",
174
+ "casual",
175
+ "terse",
176
+ "verbose",
177
+ ];
178
+ // Explicit trait combinations covering the FULL range (0.0-1.0)
179
+ const traitTargets = [
180
+ {
181
+ patience: 0.1,
182
+ assertiveness: 0.1,
183
+ politeness: 0.1,
184
+ technical_level: 0.1,
185
+ },
186
+ {
187
+ patience: 0.9,
188
+ assertiveness: 0.1,
189
+ politeness: 0.9,
190
+ technical_level: 0.9,
191
+ },
192
+ {
193
+ patience: 0.1,
194
+ assertiveness: 0.9,
195
+ politeness: 0.1,
196
+ technical_level: 0.5,
197
+ },
198
+ {
199
+ patience: 0.5,
200
+ assertiveness: 0.9,
201
+ politeness: 0.9,
202
+ technical_level: 0.1,
203
+ },
204
+ {
205
+ patience: 0.5,
206
+ assertiveness: 0.5,
207
+ politeness: 0.5,
208
+ technical_level: 0.5,
209
+ },
210
+ {
211
+ patience: 0.3,
212
+ assertiveness: 0.7,
213
+ politeness: 0.6,
214
+ technical_level: 0.3,
215
+ },
216
+ {
217
+ patience: 0.7,
218
+ assertiveness: 0.3,
219
+ politeness: 0.8,
220
+ technical_level: 0.7,
221
+ },
222
+ {
223
+ patience: 0.2,
224
+ assertiveness: 0.8,
225
+ politeness: 0.3,
226
+ technical_level: 0.8,
227
+ },
228
+ ];
229
+ const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
230
+ const coverageInstructions = Array.from({ length: Math.min(numPersonas, 8) }, (_, i) => {
231
+ const target = traitTargets[i % traitTargets.length];
232
+ return (`- Persona ${i + 1}: communication_style='${styles[i % styles.length]}', ` +
233
+ `patience=${target.patience.toFixed(1)}, ` +
234
+ `assertiveness=${target.assertiveness.toFixed(1)}, ` +
235
+ `politeness=${target.politeness.toFixed(1)}, ` +
236
+ `technical_level=${target.technical_level.toFixed(1)}`);
237
+ }).join("\n");
238
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
239
+
240
+ Additional Context: ${delimit(context || "None provided")}
241
+
242
+ Generate ${numPersonas} personas with EXACT trait values as specified below.
243
+ CRITICAL: Use the EXACT numeric values provided - do NOT adjust them to be more "balanced".
244
+
245
+ ${coverageInstructions}
246
+
247
+ IMPORTANT:
248
+ - Use the EXACT trait values shown above (e.g., if it says politeness=0.1, use 0.1, not 0.3 or 0.5)
249
+ - Low values (0.1-0.2) represent EXTREME traits - these are intentional, not mistakes
250
+ - Include ${numEdgeCases} edge case/challenging personas
251
+ - Ensure traits span the full 0-1 range across all personas
252
+ - Create realistic backgrounds relevant to the agent's domain
253
+
254
+ Return ONLY a JSON array, no other text.`;
255
+ const response = await this.client.chat.completions.create({
256
+ model: this.model,
257
+ messages: [
258
+ { role: "system", content: PERSONA_GENERATOR_PROMPT },
259
+ { role: "user", content: userPrompt },
260
+ ],
261
+ temperature: TEMPERATURE_BALANCED,
262
+ max_tokens: 4000,
263
+ });
264
+ const content = response.choices[0]?.message.content ?? "[]";
265
+ let personas = PersonaGenerator.parsePersonas(content);
266
+ // Validate coverage and fill gaps if needed
267
+ personas = this.ensureStyleCoverage(personas, styles);
268
+ this.logTraitCoverageGaps(personas);
269
+ // Trim to requested count (coverage adjustments may have kept extras)
270
+ if (personas.length > numPersonas) {
271
+ personas = personas.slice(0, numPersonas);
272
+ }
273
+ if (personas.length < numPersonas) {
274
+ console.warn(`PersonaGenerator: requested ${numPersonas} personas (with coverage) but only ${personas.length} were successfully parsed`);
275
+ }
276
+ return personas;
277
+ }
278
+ /**
279
+ * Ensure all communication styles are covered.
280
+ */
281
+ ensureStyleCoverage(personas, requiredStyles) {
282
+ const existingStyles = new Set(personas.map((p) => p.communication_style));
283
+ const missingStyles = requiredStyles.filter((s) => !existingStyles.has(s));
284
+ if (missingStyles.length > 0 && personas.length > 0) {
285
+ for (let i = 0; i < missingStyles.length; i++) {
286
+ const style = missingStyles[i];
287
+ if (i < personas.length) {
288
+ const p = personas[i];
289
+ // Create a new persona with the adjusted style (immutable update)
290
+ personas[i] = {
291
+ ...p,
292
+ communication_style: style,
293
+ };
294
+ console.debug(`Adjusted persona '${p.name}' to style '${style}' for coverage`);
295
+ }
296
+ }
297
+ }
298
+ return personas;
299
+ }
300
+ /**
301
+ * Log warnings about missing trait coverage without modifying personas.
302
+ */
303
+ logTraitCoverageGaps(personas) {
304
+ if (personas.length === 0)
305
+ return;
306
+ const hasLow = (values) => values.some((v) => v <= 0.2);
307
+ const hasHigh = (values) => values.some((v) => v >= 0.8);
308
+ const patienceVals = personas.map((p) => p.patience);
309
+ const assertiveVals = personas.map((p) => p.assertiveness);
310
+ const politeVals = personas.map((p) => p.politeness);
311
+ const techVals = personas.map((p) => p.technical_level);
312
+ const gaps = [];
313
+ if (!hasLow(patienceVals))
314
+ gaps.push("low patience (<0.2)");
315
+ if (!hasHigh(patienceVals))
316
+ gaps.push("high patience (>0.8)");
317
+ if (!hasLow(assertiveVals))
318
+ gaps.push("low assertiveness (<0.2)");
319
+ if (!hasHigh(assertiveVals))
320
+ gaps.push("high assertiveness (>0.8)");
321
+ if (!hasLow(politeVals))
322
+ gaps.push("low politeness (<0.2)");
323
+ if (!hasHigh(politeVals))
324
+ gaps.push("high politeness (>0.8)");
325
+ if (!hasLow(techVals))
326
+ gaps.push("low technical_level (<0.2)");
327
+ if (!hasHigh(techVals))
328
+ gaps.push("high technical_level (>0.8)");
329
+ if (gaps.length > 0) {
330
+ console.debug(`Trait coverage gaps: ${gaps.join(", ")}`);
331
+ }
332
+ }
333
+ }
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Scenario generator using LLM.
3
+ *
4
+ * Generates test scenarios from agent descriptions and optional context.
5
+ */
6
+ import OpenAI from "openai";
7
+ import type { Scenario } from "../types.js";
8
+ /**
9
+ * Configuration for ScenarioGenerator.
10
+ */
11
+ export interface ScenarioGeneratorConfig {
12
+ model?: string;
13
+ client?: OpenAI;
14
+ apiKey?: string;
15
+ }
16
+ /**
17
+ * Generates scenarios from agent descriptions.
18
+ *
19
+ * Uses an LLM to create diverse test scenarios
20
+ * based on the agent's purpose and context.
21
+ */
22
+ export declare class ScenarioGenerator {
23
+ private model;
24
+ private client;
25
+ constructor(config?: ScenarioGeneratorConfig);
26
+ /**
27
+ * Generate scenarios for agent testing.
28
+ */
29
+ generate(params: {
30
+ agentDescription: string;
31
+ context?: string;
32
+ numScenarios?: number;
33
+ edgeCasePercentage?: number;
34
+ }): Promise<Scenario[]>;
35
+ /**
36
+ * Generate scenarios with guaranteed emotion and criteria coverage.
37
+ */
38
+ generateWithCoverage(params: {
39
+ agentDescription: string;
40
+ context?: string;
41
+ numScenarios?: number;
42
+ edgeCasePercentage?: number;
43
+ }): Promise<Scenario[]>;
44
+ /**
45
+ * Ensure all starting emotions are covered.
46
+ */
47
+ private ensureEmotionCoverage;
48
+ /**
49
+ * Ensure at least one must_not_happen criterion exists if none present.
50
+ */
51
+ private ensureCriteriaCoverage;
52
+ /**
53
+ * Generate edge case scenarios specifically.
54
+ */
55
+ generateEdgeCases(params: {
56
+ agentDescription: string;
57
+ existingScenarios?: Scenario[];
58
+ numEdgeCases?: number;
59
+ }): Promise<Scenario[]>;
60
+ /**
61
+ * Generate boundary/out-of-scope test scenarios.
62
+ */
63
+ generateBoundaryScenarios(params: {
64
+ agentDescription: string;
65
+ numScenarios?: number;
66
+ }): Promise<Scenario[]>;
67
+ /**
68
+ * Generate security test scenarios inspired by OWASP attack patterns.
69
+ */
70
+ generateSecurityScenarios(params: {
71
+ agentDescription: string;
72
+ seedExamples?: Record<string, unknown>[];
73
+ categories?: string[];
74
+ numScenarios?: number;
75
+ }): Promise<Scenario[]>;
76
+ }
77
+ //# sourceMappingURL=scenario-generator.d.ts.map