@orq-ai/evaluatorq 1.2.2 → 1.2.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/lib/integrations/ai-sdk/index.d.ts +2 -0
  2. package/dist/lib/integrations/ai-sdk/index.d.ts.map +1 -1
  3. package/dist/lib/integrations/ai-sdk/index.js +1 -0
  4. package/dist/lib/integrations/ai-sdk/simulation-adapter.d.ts +47 -0
  5. package/dist/lib/integrations/ai-sdk/simulation-adapter.d.ts.map +1 -0
  6. package/dist/lib/integrations/ai-sdk/simulation-adapter.js +58 -0
  7. package/dist/lib/integrations/langchain/index.d.ts +2 -0
  8. package/dist/lib/integrations/langchain/index.d.ts.map +1 -1
  9. package/dist/lib/integrations/langchain/index.js +1 -0
  10. package/dist/lib/integrations/langchain/simulation-adapter.d.ts +49 -0
  11. package/dist/lib/integrations/langchain/simulation-adapter.d.ts.map +1 -0
  12. package/dist/lib/integrations/langchain/simulation-adapter.js +110 -0
  13. package/dist/lib/integrations/simulation/adapters.d.ts +57 -0
  14. package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -0
  15. package/dist/lib/integrations/simulation/adapters.js +64 -0
  16. package/dist/lib/integrations/simulation/agents/base.d.ts +90 -0
  17. package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -0
  18. package/dist/lib/integrations/simulation/agents/base.js +227 -0
  19. package/dist/lib/integrations/simulation/agents/index.d.ts +10 -0
  20. package/dist/lib/integrations/simulation/agents/index.d.ts.map +1 -0
  21. package/dist/lib/integrations/simulation/agents/index.js +6 -0
  22. package/dist/lib/integrations/simulation/agents/judge.d.ts +50 -0
  23. package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -0
  24. package/dist/lib/integrations/simulation/agents/judge.js +313 -0
  25. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts +41 -0
  26. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -0
  27. package/dist/lib/integrations/simulation/agents/user-simulator.js +82 -0
  28. package/dist/lib/integrations/simulation/convert.d.ts +22 -0
  29. package/dist/lib/integrations/simulation/convert.d.ts.map +1 -0
  30. package/dist/lib/integrations/simulation/convert.js +124 -0
  31. package/dist/lib/integrations/simulation/evaluators/index.d.ts +50 -0
  32. package/dist/lib/integrations/simulation/evaluators/index.d.ts.map +1 -0
  33. package/dist/lib/integrations/simulation/evaluators/index.js +100 -0
  34. package/dist/lib/integrations/simulation/generators/datapoint-generator.d.ts +60 -0
  35. package/dist/lib/integrations/simulation/generators/datapoint-generator.d.ts.map +1 -0
  36. package/dist/lib/integrations/simulation/generators/datapoint-generator.js +223 -0
  37. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts +38 -0
  38. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -0
  39. package/dist/lib/integrations/simulation/generators/first-message-generator.js +131 -0
  40. package/dist/lib/integrations/simulation/generators/index.d.ts +15 -0
  41. package/dist/lib/integrations/simulation/generators/index.d.ts.map +1 -0
  42. package/dist/lib/integrations/simulation/generators/index.js +10 -0
  43. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts +60 -0
  44. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -0
  45. package/dist/lib/integrations/simulation/generators/persona-generator.js +333 -0
  46. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts +77 -0
  47. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -0
  48. package/dist/lib/integrations/simulation/generators/scenario-generator.js +545 -0
  49. package/dist/lib/integrations/simulation/index.d.ts +33 -0
  50. package/dist/lib/integrations/simulation/index.d.ts.map +1 -0
  51. package/dist/lib/integrations/simulation/index.js +35 -0
  52. package/dist/lib/integrations/simulation/quality/index.d.ts +5 -0
  53. package/dist/lib/integrations/simulation/quality/index.d.ts.map +1 -0
  54. package/dist/lib/integrations/simulation/quality/index.js +4 -0
  55. package/dist/lib/integrations/simulation/quality/message-perturbation.d.ts +25 -0
  56. package/dist/lib/integrations/simulation/quality/message-perturbation.d.ts.map +1 -0
  57. package/dist/lib/integrations/simulation/quality/message-perturbation.js +150 -0
  58. package/dist/lib/integrations/simulation/runner/index.d.ts +5 -0
  59. package/dist/lib/integrations/simulation/runner/index.d.ts.map +1 -0
  60. package/dist/lib/integrations/simulation/runner/index.js +4 -0
  61. package/dist/lib/integrations/simulation/runner/simulation.d.ts +57 -0
  62. package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -0
  63. package/dist/lib/integrations/simulation/runner/simulation.js +336 -0
  64. package/dist/lib/integrations/simulation/schemas.d.ts +104 -0
  65. package/dist/lib/integrations/simulation/schemas.d.ts.map +1 -0
  66. package/dist/lib/integrations/simulation/schemas.js +76 -0
  67. package/dist/lib/integrations/simulation/simulation/index.d.ts +49 -0
  68. package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -0
  69. package/dist/lib/integrations/simulation/simulation/index.js +159 -0
  70. package/dist/lib/integrations/simulation/types.d.ts +101 -0
  71. package/dist/lib/integrations/simulation/types.d.ts.map +1 -0
  72. package/dist/lib/integrations/simulation/types.js +90 -0
  73. package/dist/lib/integrations/simulation/utils/dataset-export.d.ts +31 -0
  74. package/dist/lib/integrations/simulation/utils/dataset-export.d.ts.map +1 -0
  75. package/dist/lib/integrations/simulation/utils/dataset-export.js +146 -0
  76. package/dist/lib/integrations/simulation/utils/extract-json.d.ts +17 -0
  77. package/dist/lib/integrations/simulation/utils/extract-json.d.ts.map +1 -0
  78. package/dist/lib/integrations/simulation/utils/extract-json.js +106 -0
  79. package/dist/lib/integrations/simulation/utils/prompt-builders.d.ts +34 -0
  80. package/dist/lib/integrations/simulation/utils/prompt-builders.d.ts.map +1 -0
  81. package/dist/lib/integrations/simulation/utils/prompt-builders.js +147 -0
  82. package/dist/lib/integrations/simulation/utils/sanitize.d.ts +15 -0
  83. package/dist/lib/integrations/simulation/utils/sanitize.d.ts.map +1 -0
  84. package/dist/lib/integrations/simulation/utils/sanitize.js +20 -0
  85. package/dist/lib/integrations/simulation/wrap-agent.d.ts +65 -0
  86. package/dist/lib/integrations/simulation/wrap-agent.d.ts.map +1 -0
  87. package/dist/lib/integrations/simulation/wrap-agent.js +140 -0
  88. package/dist/lib/send-results.d.ts.map +1 -1
  89. package/dist/lib/send-results.js +17 -2
  90. package/dist/lib/types.d.ts +2 -2
  91. package/dist/lib/types.d.ts.map +1 -1
  92. package/dist/tsconfig.lib.tsbuildinfo +1 -1
  93. package/package.json +24 -2
@@ -0,0 +1,82 @@
1
+ /**
2
+ * User simulator agent.
3
+ *
4
+ * Simulates user behavior based on a persona and scenario,
5
+ * generating realistic user messages in conversations.
6
+ */
7
+ import { BaseAgent } from "./base.js";
8
+ // ---------------------------------------------------------------------------
9
+ // Default user simulator system prompt
10
+ // ---------------------------------------------------------------------------
11
+ export const DEFAULT_USER_SIMULATOR_PROMPT = `You are a user simulator. Your role is to simulate realistic user behavior in a conversation with an AI agent.
12
+
13
+ You will be given:
14
+ 1. A persona describing who you are and how you behave
15
+ 2. A scenario describing your goal and context
16
+
17
+ Your task:
18
+ - Generate realistic user messages based on your persona and scenario
19
+ - Stay in character throughout the conversation
20
+ - Work towards achieving your goal naturally
21
+ - React authentically to the agent's responses
22
+ - Do not break character or acknowledge that you are a simulation
23
+
24
+ Response format:
25
+ - Respond only with the user's message
26
+ - Do not include any meta-commentary or explanations
27
+ - Keep responses natural and conversational`;
28
+ // ---------------------------------------------------------------------------
29
+ // UserSimulatorAgent
30
+ // ---------------------------------------------------------------------------
31
+ /**
32
+ * Agent that simulates user behavior.
33
+ *
34
+ * Uses a persona and scenario to generate realistic user messages
35
+ * in a conversation with the agent being tested.
36
+ */
37
+ export class UserSimulatorAgent extends BaseAgent {
38
+ customSystemPrompt;
39
+ constructor(config) {
40
+ super(config);
41
+ this.customSystemPrompt = config?.systemPrompt ?? null;
42
+ }
43
+ get name() {
44
+ return "UserSimulator";
45
+ }
46
+ get systemPrompt() {
47
+ if (this.customSystemPrompt) {
48
+ return `${DEFAULT_USER_SIMULATOR_PROMPT}\n\n---\n\n${this.customSystemPrompt}`;
49
+ }
50
+ return DEFAULT_USER_SIMULATOR_PROMPT;
51
+ }
52
+ /**
53
+ * Generate the first message to start a conversation.
54
+ *
55
+ * @param messages - Optional context messages
56
+ * @returns First user message to start the conversation
57
+ */
58
+ async generateFirstMessage(messages) {
59
+ const promptMessages = [...(messages ?? [])];
60
+ promptMessages.push({
61
+ role: "user",
62
+ content: "Generate your first message to start the conversation. Remember your goal and persona.",
63
+ });
64
+ return this.respondAsync(promptMessages, { temperature: 0.8 });
65
+ }
66
+ /**
67
+ * Update the persona and scenario context.
68
+ *
69
+ * @param personaContext - Persona-specific context
70
+ * @param scenarioContext - Scenario-specific context
71
+ */
72
+ updateContext(personaContext, scenarioContext) {
73
+ let combined = "";
74
+ if (personaContext) {
75
+ combined += `PERSONA:\n${personaContext}\n\n`;
76
+ }
77
+ if (scenarioContext) {
78
+ combined += `SCENARIO:\n${scenarioContext}`;
79
+ }
80
+ this.customSystemPrompt = combined.trim() || null;
81
+ }
82
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Conversion functions from SimulationResult to OpenResponses format.
3
+ */
4
+ import type { ResponseResource } from "../openresponses/index.js";
5
+ import type { SimulationResult } from "./types.js";
6
+ /**
7
+ * Converts a SimulationResult to OpenResponses format.
8
+ *
9
+ * Mapping:
10
+ * - messages with role "user" → input[] as Message with input_text content
11
+ * - messages with role "assistant" → output[] as Message with output_text content
12
+ * - messages with role "system" → input[] as Message with input_text content
13
+ * - token_usage → Usage
14
+ * - terminated_by → status ("judge" → "completed", others → "incomplete")
15
+ * - goal_achieved, rules_broken, criteria_results, turn_metrics → metadata
16
+ *
17
+ * @param result - The simulation result to convert
18
+ * @param model - Optional model name to include in the response
19
+ * @returns A ResponseResource in OpenResponses format
20
+ */
21
+ export declare function toOpenResponses(result: SimulationResult, model?: string): ResponseResource;
22
+ //# sourceMappingURL=convert.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"convert.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/convert.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAGV,gBAAgB,EAEjB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,eAAe,CAC7B,MAAM,EAAE,gBAAgB,EACxB,KAAK,SAAe,GACnB,gBAAgB,CA8GlB"}
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Conversion functions from SimulationResult to OpenResponses format.
3
+ */
4
+ import { generateItemId } from "../common/utils.js";
5
+ /**
6
+ * Converts a SimulationResult to OpenResponses format.
7
+ *
8
+ * Mapping:
9
+ * - messages with role "user" → input[] as Message with input_text content
10
+ * - messages with role "assistant" → output[] as Message with output_text content
11
+ * - messages with role "system" → input[] as Message with input_text content
12
+ * - token_usage → Usage
13
+ * - terminated_by → status ("judge" → "completed", others → "incomplete")
14
+ * - goal_achieved, rules_broken, criteria_results, turn_metrics → metadata
15
+ *
16
+ * @param result - The simulation result to convert
17
+ * @param model - Optional model name to include in the response
18
+ * @returns A ResponseResource in OpenResponses format
19
+ */
20
+ export function toOpenResponses(result, model = "simulation") {
21
+ const now = Math.floor(Date.now() / 1000);
22
+ const inputItems = [];
23
+ const outputItems = [];
24
+ for (const msg of result.messages) {
25
+ if (msg.role === "user" || msg.role === "system") {
26
+ const inputMessage = {
27
+ type: "message",
28
+ id: generateItemId("msg"),
29
+ role: msg.role,
30
+ status: "completed",
31
+ content: [{ type: "input_text", text: msg.content }],
32
+ };
33
+ inputItems.push(inputMessage);
34
+ }
35
+ else if (msg.role === "assistant") {
36
+ const outputMessage = {
37
+ type: "message",
38
+ id: generateItemId("msg"),
39
+ role: "assistant",
40
+ status: "completed",
41
+ content: [
42
+ {
43
+ type: "output_text",
44
+ text: msg.content,
45
+ annotations: [],
46
+ logprobs: [],
47
+ },
48
+ ],
49
+ };
50
+ outputItems.push(outputMessage);
51
+ }
52
+ }
53
+ // Map terminated_by to status
54
+ const status = result.terminated_by === "judge"
55
+ ? "completed"
56
+ : result.terminated_by === "error"
57
+ ? "failed"
58
+ : "incomplete";
59
+ const incompleteDetails = status === "incomplete"
60
+ ? { reason: `${result.terminated_by}: ${result.reason}` }
61
+ : null;
62
+ // Build usage from token_usage
63
+ let usageData = null;
64
+ if (result.token_usage.total_tokens > 0) {
65
+ usageData = {
66
+ input_tokens: result.token_usage.prompt_tokens,
67
+ input_tokens_details: { cached_tokens: 0 },
68
+ output_tokens: result.token_usage.completion_tokens,
69
+ output_tokens_details: { reasoning_tokens: 0 },
70
+ total_tokens: result.token_usage.total_tokens,
71
+ };
72
+ }
73
+ return {
74
+ id: generateItemId("resp"),
75
+ object: "response",
76
+ created_at: now,
77
+ completed_at: status === "completed" ? now : null,
78
+ status,
79
+ incomplete_details: incompleteDetails,
80
+ model,
81
+ previous_response_id: null,
82
+ instructions: null,
83
+ input: inputItems,
84
+ output: outputItems,
85
+ error: result.terminated_by === "error" ? { message: result.reason } : null,
86
+ tools: [],
87
+ tool_choice: "auto",
88
+ truncation: "disabled",
89
+ parallel_tool_calls: false,
90
+ text: {
91
+ format: { type: "text" },
92
+ },
93
+ top_p: 1,
94
+ presence_penalty: 0,
95
+ frequency_penalty: 0,
96
+ top_logprobs: 0,
97
+ temperature: 1,
98
+ reasoning: null,
99
+ user: null,
100
+ usage: usageData,
101
+ max_output_tokens: null,
102
+ max_tool_calls: null,
103
+ store: false,
104
+ background: false,
105
+ service_tier: "default",
106
+ metadata: {
107
+ framework: "simulation",
108
+ goal_achieved: result.goal_achieved,
109
+ goal_completion_score: result.goal_completion_score,
110
+ terminated_by: result.terminated_by,
111
+ reason: result.reason,
112
+ turn_count: result.turn_count,
113
+ rules_broken: result.rules_broken,
114
+ ...(result.criteria_results && {
115
+ criteria_results: result.criteria_results,
116
+ }),
117
+ ...(result.turn_metrics.length > 0 && {
118
+ turn_metrics: result.turn_metrics,
119
+ }),
120
+ },
121
+ safety_identifier: null,
122
+ prompt_cache_key: null,
123
+ };
124
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Built-in evaluators for agent simulation.
3
+ *
4
+ * These evaluators assess simulation results using a scorer pattern
5
+ * compatible with evaluatorq integration.
6
+ */
7
+ import type { SimulationResult } from "../types.js";
8
+ export type SimulationScorer = (result: SimulationResult) => number;
9
+ /**
10
+ * Evaluate if the simulation goal was achieved.
11
+ * Returns 1 if achieved, 0 otherwise.
12
+ */
13
+ export declare const goalAchievedScorer: SimulationScorer;
14
+ /**
15
+ * Evaluate how many criteria were met.
16
+ * Returns a value between 0 and 1 based on the ratio of met criteria.
17
+ *
18
+ * Uses the criteria_results from metadata if available; otherwise returns 1.0.
19
+ */
20
+ export declare const criteriaMetScorer: SimulationScorer;
21
+ /**
22
+ * Evaluate conversation efficiency (fewer turns = better).
23
+ * Returns a value between 0 and 1.
24
+ */
25
+ export declare const turnEfficiencyScorer: SimulationScorer;
26
+ /**
27
+ * Evaluate overall conversation quality.
28
+ *
29
+ * Composite score based on:
30
+ * - Goal achievement (40%)
31
+ * - Criteria met (30%)
32
+ * - Turn efficiency (30%)
33
+ */
34
+ export declare const conversationQualityScorer: SimulationScorer;
35
+ export declare const SIMULATION_EVALUATORS: Record<string, SimulationScorer>;
36
+ /**
37
+ * Get a built-in simulation evaluator by name.
38
+ *
39
+ * @param name - Evaluator name (goal_achieved, criteria_met, etc.)
40
+ * @returns The scorer function
41
+ * @throws Error if evaluator not found
42
+ */
43
+ export declare function getEvaluator(name: string): SimulationScorer;
44
+ /**
45
+ * Get all built-in simulation evaluators.
46
+ *
47
+ * @returns Record of evaluator name to scorer function
48
+ */
49
+ export declare function getAllEvaluators(): Record<string, SimulationScorer>;
50
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/evaluators/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAMpD,MAAM,MAAM,gBAAgB,GAAG,CAAC,MAAM,EAAE,gBAAgB,KAAK,MAAM,CAAC;AAMpE;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,gBAEhC,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,iBAAiB,EAAE,gBAY/B,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,oBAAoB,EAAE,gBAqBlC,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,yBAAyB,EAAE,gBAOvC,CAAC;AAMF,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAKlE,CAAC;AAEF;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,gBAAgB,CAO3D;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAEnE"}
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Built-in evaluators for agent simulation.
3
+ *
4
+ * These evaluators assess simulation results using a scorer pattern
5
+ * compatible with evaluatorq integration.
6
+ */
7
+ // ---------------------------------------------------------------------------
8
+ // Individual scorers
9
+ // ---------------------------------------------------------------------------
10
+ /**
11
+ * Evaluate if the simulation goal was achieved.
12
+ * Returns 1 if achieved, 0 otherwise.
13
+ */
14
+ export const goalAchievedScorer = (result) => {
15
+ return result.goal_achieved ? 1 : 0;
16
+ };
17
+ /**
18
+ * Evaluate how many criteria were met.
19
+ * Returns a value between 0 and 1 based on the ratio of met criteria.
20
+ *
21
+ * Uses the criteria_results from metadata if available; otherwise returns 1.0.
22
+ */
23
+ export const criteriaMetScorer = (result) => {
24
+ const criteriaResults = result.criteria_results ?? {};
25
+ const keys = Object.keys(criteriaResults);
26
+ if (keys.length === 0) {
27
+ return 1.0;
28
+ }
29
+ const met = Object.values(criteriaResults).filter((v) => v).length;
30
+ const total = keys.length;
31
+ return total > 0 ? met / total : 1.0;
32
+ };
33
+ /**
34
+ * Evaluate conversation efficiency (fewer turns = better).
35
+ * Returns a value between 0 and 1.
36
+ */
37
+ export const turnEfficiencyScorer = (result) => {
38
+ const totalTurns = result.turn_count;
39
+ const goalAchieved = result.goal_achieved;
40
+ if (!goalAchieved) {
41
+ return 0.0;
42
+ }
43
+ if (totalTurns <= 2) {
44
+ return 1.0;
45
+ }
46
+ if (totalTurns <= 4) {
47
+ return 0.9;
48
+ }
49
+ if (totalTurns <= 6) {
50
+ return 0.7;
51
+ }
52
+ return Math.max(0.3, 1.0 - (totalTurns - 6) * 0.1);
53
+ };
54
+ /**
55
+ * Evaluate overall conversation quality.
56
+ *
57
+ * Composite score based on:
58
+ * - Goal achievement (40%)
59
+ * - Criteria met (30%)
60
+ * - Turn efficiency (30%)
61
+ */
62
+ export const conversationQualityScorer = (result) => {
63
+ const goalScore = goalAchievedScorer(result);
64
+ const criteriaScore = criteriaMetScorer(result);
65
+ const efficiencyScore = turnEfficiencyScorer(result);
66
+ const score = goalScore * 0.4 + criteriaScore * 0.3 + efficiencyScore * 0.3;
67
+ return Math.round(score * 100) / 100;
68
+ };
69
+ // ---------------------------------------------------------------------------
70
+ // Registry
71
+ // ---------------------------------------------------------------------------
72
+ export const SIMULATION_EVALUATORS = {
73
+ goal_achieved: goalAchievedScorer,
74
+ criteria_met: criteriaMetScorer,
75
+ turn_efficiency: turnEfficiencyScorer,
76
+ conversation_quality: conversationQualityScorer,
77
+ };
78
+ /**
79
+ * Get a built-in simulation evaluator by name.
80
+ *
81
+ * @param name - Evaluator name (goal_achieved, criteria_met, etc.)
82
+ * @returns The scorer function
83
+ * @throws Error if evaluator not found
84
+ */
85
+ export function getEvaluator(name) {
86
+ const evaluator = SIMULATION_EVALUATORS[name];
87
+ if (!evaluator) {
88
+ const available = Object.keys(SIMULATION_EVALUATORS).join(", ");
89
+ throw new Error(`Unknown evaluator: ${name}. Available: ${available}`);
90
+ }
91
+ return evaluator;
92
+ }
93
+ /**
94
+ * Get all built-in simulation evaluators.
95
+ *
96
+ * @returns Record of evaluator name to scorer function
97
+ */
98
+ export function getAllEvaluators() {
99
+ return { ...SIMULATION_EVALUATORS };
100
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Datapoint generator for creating test datasets.
3
+ *
4
+ * Combines personas and scenarios into complete datapoints with first messages.
5
+ */
6
+ import type { Datapoint, Persona, Scenario } from "../types.js";
7
+ /**
8
+ * Configuration for DatapointGenerator.
9
+ */
10
+ export interface DatapointGeneratorConfig {
11
+ model?: string;
12
+ rateLimitDelay?: number;
13
+ maxConcurrentCalls?: number;
14
+ }
15
+ /**
16
+ * Generates complete datapoints for simulation.
17
+ *
18
+ * Orchestrates persona, scenario, and first message generation
19
+ * to produce ready-to-use test datapoints.
20
+ */
21
+ export declare class DatapointGenerator {
22
+ private model;
23
+ private rateLimitDelay;
24
+ private semaphore;
25
+ private personaGenerator;
26
+ private scenarioGenerator;
27
+ private firstMessageGenerator;
28
+ constructor(config?: DatapointGeneratorConfig);
29
+ /**
30
+ * Generate datapoints from agent description.
31
+ *
32
+ * Creates personas and scenarios, then combines them into datapoints.
33
+ * Total datapoints = numPersonas x (numScenarios + boundary + security)
34
+ */
35
+ generateFromDescription(params: {
36
+ agentDescription: string;
37
+ context?: string;
38
+ numPersonas?: number;
39
+ numScenarios?: number;
40
+ edgeCasePercentage?: number;
41
+ perturbationRate?: number;
42
+ includeBoundary?: boolean;
43
+ numBoundary?: number;
44
+ includeSecurity?: boolean;
45
+ numSecurity?: number;
46
+ securitySeedExamples?: Record<string, unknown>[];
47
+ securityCategories?: string[];
48
+ }): Promise<Datapoint[]>;
49
+ /**
50
+ * Generate datapoints from persona-scenario combinations.
51
+ */
52
+ generateFromCombinations(personas: Persona[], scenarios: Scenario[]): Promise<Datapoint[]>;
53
+ /**
54
+ * Apply random input perturbations to first messages for robustness testing.
55
+ *
56
+ * Uses the shared message-perturbation module.
57
+ */
58
+ private static applyPerturbations;
59
+ }
60
+ //# sourceMappingURL=datapoint-generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"datapoint-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/datapoint-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAgDhE;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;;;;GAKG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,iBAAiB,CAAoB;IAC7C,OAAO,CAAC,qBAAqB,CAAwB;gBAEzC,MAAM,CAAC,EAAE,wBAAwB;IAa7C;;;;;OAKG;IACG,uBAAuB,CAAC,MAAM,EAAE;QACpC,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,eAAe,CAAC,EAAE,OAAO,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;QACjD,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC/B,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAyHxB;;OAEG;IACG,wBAAwB,CAC5B,QAAQ,EAAE,OAAO,EAAE,EACnB,SAAS,EAAE,QAAQ,EAAE,GACpB,OAAO,CAAC,SAAS,EAAE,CAAC;IAuCvB;;;;OAIG;IACH,OAAO,CAAC,MAAM,CAAC,kBAAkB;CA2BlC"}
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Datapoint generator for creating test datasets.
3
+ *
4
+ * Combines personas and scenarios into complete datapoints with first messages.
5
+ */
6
+ import { applyRandomPerturbation } from "../quality/message-perturbation.js";
7
+ import { generateDatapoint } from "../utils/prompt-builders.js";
8
+ import { FirstMessageGenerator } from "./first-message-generator.js";
9
+ import { PersonaGenerator } from "./persona-generator.js";
10
+ import { ScenarioGenerator } from "./scenario-generator.js";
11
+ // Default rate limit settings
12
+ const DEFAULT_RATE_LIMIT_DELAY = 100; // 100ms delay between API calls
13
+ const DEFAULT_MAX_CONCURRENT_CALLS = 5;
14
+ /**
15
+ * Simple semaphore for limiting concurrency.
16
+ */
17
+ class Semaphore {
18
+ max;
19
+ queue = [];
20
+ running = 0;
21
+ constructor(max) {
22
+ this.max = max;
23
+ }
24
+ async acquire() {
25
+ if (this.running < this.max) {
26
+ this.running++;
27
+ return;
28
+ }
29
+ return new Promise((resolve) => {
30
+ this.queue.push(() => {
31
+ this.running++;
32
+ resolve();
33
+ });
34
+ });
35
+ }
36
+ release() {
37
+ this.running--;
38
+ const next = this.queue.shift();
39
+ if (next) {
40
+ next();
41
+ }
42
+ }
43
+ }
44
+ /**
45
+ * Sleep for a given number of milliseconds.
46
+ */
47
+ function sleep(ms) {
48
+ return new Promise((resolve) => setTimeout(resolve, ms));
49
+ }
50
+ /**
51
+ * Generates complete datapoints for simulation.
52
+ *
53
+ * Orchestrates persona, scenario, and first message generation
54
+ * to produce ready-to-use test datapoints.
55
+ */
56
+ export class DatapointGenerator {
57
+ model;
58
+ rateLimitDelay;
59
+ semaphore;
60
+ personaGenerator;
61
+ scenarioGenerator;
62
+ firstMessageGenerator;
63
+ constructor(config) {
64
+ this.model = config?.model ?? "azure/gpt-4o-mini";
65
+ this.rateLimitDelay = config?.rateLimitDelay ?? DEFAULT_RATE_LIMIT_DELAY;
66
+ this.semaphore = new Semaphore(config?.maxConcurrentCalls ?? DEFAULT_MAX_CONCURRENT_CALLS);
67
+ this.personaGenerator = new PersonaGenerator({ model: this.model });
68
+ this.scenarioGenerator = new ScenarioGenerator({ model: this.model });
69
+ this.firstMessageGenerator = new FirstMessageGenerator({
70
+ model: this.model,
71
+ });
72
+ }
73
+ /**
74
+ * Generate datapoints from agent description.
75
+ *
76
+ * Creates personas and scenarios, then combines them into datapoints.
77
+ * Total datapoints = numPersonas x (numScenarios + boundary + security)
78
+ */
79
+ async generateFromDescription(params) {
80
+ const { agentDescription, context = "", numPersonas = 3, numScenarios = 5, edgeCasePercentage = 0.2, perturbationRate = 0.0, includeBoundary = false, numBoundary = 5, includeSecurity = false, numSecurity = 5, securitySeedExamples, securityCategories, } = params;
81
+ console.log(`Generating ${numPersonas} personas and ${numScenarios} scenarios...`);
82
+ // Build named tasks for parallel generation
83
+ const namedTasks = {
84
+ personas: this.personaGenerator.generate({
85
+ agentDescription,
86
+ context,
87
+ numPersonas,
88
+ edgeCasePercentage,
89
+ }),
90
+ scenarios: this.scenarioGenerator.generate({
91
+ agentDescription,
92
+ context,
93
+ numScenarios,
94
+ edgeCasePercentage,
95
+ }),
96
+ };
97
+ if (includeBoundary) {
98
+ console.log(`Including ${numBoundary} boundary scenarios`);
99
+ namedTasks.boundary = this.scenarioGenerator.generateBoundaryScenarios({
100
+ agentDescription,
101
+ numScenarios: numBoundary,
102
+ });
103
+ }
104
+ if (includeSecurity) {
105
+ console.log(`Including ${numSecurity} security scenarios`);
106
+ namedTasks.security = this.scenarioGenerator.generateSecurityScenarios({
107
+ agentDescription,
108
+ seedExamples: securitySeedExamples,
109
+ categories: securityCategories,
110
+ numScenarios: numSecurity,
111
+ });
112
+ }
113
+ const taskKeys = Object.keys(namedTasks);
114
+ const taskPromises = Object.values(namedTasks);
115
+ const rawResults = await Promise.all(taskPromises);
116
+ const results = {};
117
+ for (let i = 0; i < taskKeys.length; i++) {
118
+ const key = taskKeys[i];
119
+ results[key] = rawResults[i];
120
+ }
121
+ let personas = results.personas;
122
+ let scenarios = results.scenarios;
123
+ // Merge additional scenario types
124
+ if (results.boundary) {
125
+ const boundary = results.boundary;
126
+ console.log(`Generated ${boundary.length} boundary scenarios`);
127
+ scenarios = [...scenarios, ...boundary];
128
+ }
129
+ if (results.security) {
130
+ const security = results.security;
131
+ console.log(`Generated ${security.length} security scenarios`);
132
+ scenarios = [...scenarios, ...security];
133
+ }
134
+ if (personas.length === 0) {
135
+ console.warn("No personas generated, using defaults");
136
+ personas = [
137
+ {
138
+ name: "Default User",
139
+ patience: 0.5,
140
+ assertiveness: 0.5,
141
+ politeness: 0.5,
142
+ technical_level: 0.5,
143
+ communication_style: "casual",
144
+ background: "",
145
+ },
146
+ ];
147
+ }
148
+ if (scenarios.length === 0) {
149
+ console.warn("No scenarios generated, using defaults");
150
+ scenarios = [
151
+ {
152
+ name: "Default Scenario",
153
+ goal: "Get help",
154
+ context: "User needs general assistance",
155
+ starting_emotion: "neutral",
156
+ criteria: [],
157
+ },
158
+ ];
159
+ }
160
+ // Generate datapoints from all combinations
161
+ let datapoints = await this.generateFromCombinations(personas, scenarios);
162
+ // Apply message perturbations for robustness testing
163
+ if (perturbationRate > 0.0) {
164
+ datapoints = DatapointGenerator.applyPerturbations(datapoints, perturbationRate);
165
+ }
166
+ return datapoints;
167
+ }
168
+ /**
169
+ * Generate datapoints from persona-scenario combinations.
170
+ */
171
+ async generateFromCombinations(personas, scenarios) {
172
+ // Build all combinations
173
+ const combinations = [];
174
+ for (const persona of personas) {
175
+ for (const scenario of scenarios) {
176
+ combinations.push([persona, scenario]);
177
+ }
178
+ }
179
+ console.log(`Generating ${combinations.length} datapoints from ${personas.length} personas x ${scenarios.length} scenarios`);
180
+ const generateSingle = async (persona, scenario) => {
181
+ await this.semaphore.acquire();
182
+ try {
183
+ const firstMessage = await this.firstMessageGenerator.generate(persona, scenario);
184
+ // Small delay to prevent overwhelming the API
185
+ await sleep(this.rateLimitDelay);
186
+ return generateDatapoint(persona, scenario, firstMessage);
187
+ }
188
+ finally {
189
+ this.semaphore.release();
190
+ }
191
+ };
192
+ // Generate all datapoints with bounded concurrency
193
+ const tasks = combinations.map(([p, s]) => generateSingle(p, s));
194
+ const datapoints = await Promise.all(tasks);
195
+ console.log(`Generated ${datapoints.length} datapoints`);
196
+ return datapoints;
197
+ }
198
+ /**
199
+ * Apply random input perturbations to first messages for robustness testing.
200
+ *
201
+ * Uses the shared message-perturbation module.
202
+ */
203
+ static applyPerturbations(datapoints, perturbationRate) {
204
+ let perturbedCount = 0;
205
+ const result = datapoints.map((dp) => {
206
+ if (dp.first_message && Math.random() < perturbationRate) {
207
+ const [perturbedMsg, pType] = applyRandomPerturbation(dp.first_message);
208
+ perturbedCount++;
209
+ console.debug(`Applied ${pType} perturbation to: ${dp.scenario.name}`);
210
+ // Create a new datapoint with the perturbed message (immutable update)
211
+ return {
212
+ ...dp,
213
+ first_message: perturbedMsg,
214
+ };
215
+ }
216
+ return dp;
217
+ });
218
+ if (perturbedCount > 0) {
219
+ console.log(`Applied perturbations to ${perturbedCount}/${datapoints.length} first messages`);
220
+ }
221
+ return result;
222
+ }
223
+ }