@orq-ai/evaluatorq 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/lib/integrations/simulation/adapters.d.ts +28 -5
  2. package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -1
  3. package/dist/lib/integrations/simulation/adapters.js +113 -7
  4. package/dist/lib/integrations/simulation/agents/base.d.ts +3 -0
  5. package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -1
  6. package/dist/lib/integrations/simulation/agents/base.js +104 -82
  7. package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -1
  8. package/dist/lib/integrations/simulation/agents/judge.js +1 -0
  9. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -1
  10. package/dist/lib/integrations/simulation/agents/user-simulator.js +4 -1
  11. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -1
  12. package/dist/lib/integrations/simulation/generators/first-message-generator.js +51 -28
  13. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -1
  14. package/dist/lib/integrations/simulation/generators/persona-generator.js +144 -102
  15. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -1
  16. package/dist/lib/integrations/simulation/generators/scenario-generator.js +274 -169
  17. package/dist/lib/integrations/simulation/index.d.ts +1 -1
  18. package/dist/lib/integrations/simulation/index.d.ts.map +1 -1
  19. package/dist/lib/integrations/simulation/index.js +1 -1
  20. package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -1
  21. package/dist/lib/integrations/simulation/runner/simulation.js +147 -85
  22. package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -1
  23. package/dist/lib/integrations/simulation/simulation/index.js +81 -27
  24. package/dist/lib/integrations/simulation/tracing.d.ts +111 -0
  25. package/dist/lib/integrations/simulation/tracing.d.ts.map +1 -0
  26. package/dist/lib/integrations/simulation/tracing.js +310 -0
  27. package/dist/lib/integrations/simulation/wrap-agent.js +2 -2
  28. package/dist/tsconfig.lib.tsbuildinfo +1 -1
  29. package/package.json +1 -1
@@ -4,6 +4,7 @@
4
4
  * Generates user personas from agent descriptions and optional context.
5
5
  */
6
6
  import OpenAI from "openai";
7
+ import { getTraceContextHeaders, recordLLMInput, recordLLMResponse, withLLMSpan, withSimulationSpan, } from "../tracing.js";
7
8
  import { extractJsonFromResponse } from "../utils/extract-json.js";
8
9
  import { delimit } from "../utils/sanitize.js";
9
10
  // Temperature settings for different generation modes
@@ -133,9 +134,13 @@ export class PersonaGenerator {
133
134
  * Generate personas for agent testing.
134
135
  */
135
136
  async generate(params) {
136
- const { agentDescription, context = "", numPersonas = 5, edgeCasePercentage = 0.2, } = params;
137
- const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
138
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
137
+ return withSimulationSpan("orq.simulation.persona_generation", {
138
+ "orq.simulation.num_personas": params.numPersonas ?? 5,
139
+ "orq.simulation.model": this.model,
140
+ }, async (_span) => {
141
+ const { agentDescription, context = "", numPersonas = 5, edgeCasePercentage = 0.2, } = params;
142
+ const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
143
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
139
144
 
140
145
  Additional Context: ${delimit(context || "None provided")}
141
146
 
@@ -145,21 +150,37 @@ Generate ${numPersonas} diverse personas for testing this agent.
145
150
  - Create realistic backgrounds relevant to the agent's domain
146
151
 
147
152
  Return ONLY a JSON array, no other text.`;
148
- const response = await this.client.chat.completions.create({
149
- model: this.model,
150
- messages: [
153
+ const llmMessages = [
151
154
  { role: "system", content: PERSONA_GENERATOR_PROMPT },
152
155
  { role: "user", content: userPrompt },
153
- ],
154
- temperature: TEMPERATURE_CREATIVE,
155
- max_tokens: 4000,
156
+ ];
157
+ const response = await withLLMSpan({
158
+ model: this.model,
159
+ temperature: TEMPERATURE_CREATIVE,
160
+ maxTokens: 4000,
161
+ purpose: "persona_generation",
162
+ }, async (llmSpan) => {
163
+ recordLLMInput(llmSpan, llmMessages.map((m) => ({
164
+ role: m.role,
165
+ content: typeof m.content === "string" ? m.content : "",
166
+ })));
167
+ const traceHeaders = await getTraceContextHeaders();
168
+ const res = await this.client.chat.completions.create({
169
+ model: this.model,
170
+ messages: llmMessages,
171
+ temperature: TEMPERATURE_CREATIVE,
172
+ max_tokens: 4000,
173
+ }, { headers: traceHeaders });
174
+ recordLLMResponse(llmSpan, res);
175
+ return res;
176
+ });
177
+ const content = response.choices[0]?.message.content ?? "[]";
178
+ const personas = PersonaGenerator.parsePersonas(content);
179
+ if (personas.length < numPersonas) {
180
+ console.warn(`PersonaGenerator: requested ${numPersonas} personas but only ${personas.length} were successfully parsed`);
181
+ }
182
+ return personas;
156
183
  });
157
- const content = response.choices[0]?.message.content ?? "[]";
158
- const personas = PersonaGenerator.parsePersonas(content);
159
- if (personas.length < numPersonas) {
160
- console.warn(`PersonaGenerator: requested ${numPersonas} personas but only ${personas.length} were successfully parsed`);
161
- }
162
- return personas;
163
184
  }
164
185
  /**
165
186
  * Generate personas with guaranteed trait coverage.
@@ -168,74 +189,79 @@ Return ONLY a JSON array, no other text.`;
168
189
  * including extreme values that LLMs tend to avoid.
169
190
  */
170
191
  async generateWithCoverage(params) {
171
- const { agentDescription, context = "", numPersonas = 8, edgeCasePercentage = 0.2, } = params;
172
- const styles = [
173
- "formal",
174
- "casual",
175
- "terse",
176
- "verbose",
177
- ];
178
- // Explicit trait combinations covering the FULL range (0.0-1.0)
179
- const traitTargets = [
180
- {
181
- patience: 0.1,
182
- assertiveness: 0.1,
183
- politeness: 0.1,
184
- technical_level: 0.1,
185
- },
186
- {
187
- patience: 0.9,
188
- assertiveness: 0.1,
189
- politeness: 0.9,
190
- technical_level: 0.9,
191
- },
192
- {
193
- patience: 0.1,
194
- assertiveness: 0.9,
195
- politeness: 0.1,
196
- technical_level: 0.5,
197
- },
198
- {
199
- patience: 0.5,
200
- assertiveness: 0.9,
201
- politeness: 0.9,
202
- technical_level: 0.1,
203
- },
204
- {
205
- patience: 0.5,
206
- assertiveness: 0.5,
207
- politeness: 0.5,
208
- technical_level: 0.5,
209
- },
210
- {
211
- patience: 0.3,
212
- assertiveness: 0.7,
213
- politeness: 0.6,
214
- technical_level: 0.3,
215
- },
216
- {
217
- patience: 0.7,
218
- assertiveness: 0.3,
219
- politeness: 0.8,
220
- technical_level: 0.7,
221
- },
222
- {
223
- patience: 0.2,
224
- assertiveness: 0.8,
225
- politeness: 0.3,
226
- technical_level: 0.8,
227
- },
228
- ];
229
- const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
230
- const coverageInstructions = Array.from({ length: Math.min(numPersonas, 8) }, (_, i) => {
231
- const target = traitTargets[i % traitTargets.length];
232
- return (`- Persona ${i + 1}: communication_style='${styles[i % styles.length]}', ` +
233
- `patience=${target.patience.toFixed(1)}, ` +
234
- `assertiveness=${target.assertiveness.toFixed(1)}, ` +
235
- `politeness=${target.politeness.toFixed(1)}, ` +
236
- `technical_level=${target.technical_level.toFixed(1)}`);
237
- }).join("\n");
238
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
192
+ return withSimulationSpan("orq.simulation.persona_generation", {
193
+ "orq.simulation.num_personas": params.numPersonas ?? 8,
194
+ "orq.simulation.mode": "coverage",
195
+ "orq.simulation.model": this.model,
196
+ }, async (_span) => {
197
+ const { agentDescription, context = "", numPersonas = 8, edgeCasePercentage = 0.2, } = params;
198
+ const styles = [
199
+ "formal",
200
+ "casual",
201
+ "terse",
202
+ "verbose",
203
+ ];
204
+ // Explicit trait combinations covering the FULL range (0.0-1.0)
205
+ const traitTargets = [
206
+ {
207
+ patience: 0.1,
208
+ assertiveness: 0.1,
209
+ politeness: 0.1,
210
+ technical_level: 0.1,
211
+ },
212
+ {
213
+ patience: 0.9,
214
+ assertiveness: 0.1,
215
+ politeness: 0.9,
216
+ technical_level: 0.9,
217
+ },
218
+ {
219
+ patience: 0.1,
220
+ assertiveness: 0.9,
221
+ politeness: 0.1,
222
+ technical_level: 0.5,
223
+ },
224
+ {
225
+ patience: 0.5,
226
+ assertiveness: 0.9,
227
+ politeness: 0.9,
228
+ technical_level: 0.1,
229
+ },
230
+ {
231
+ patience: 0.5,
232
+ assertiveness: 0.5,
233
+ politeness: 0.5,
234
+ technical_level: 0.5,
235
+ },
236
+ {
237
+ patience: 0.3,
238
+ assertiveness: 0.7,
239
+ politeness: 0.6,
240
+ technical_level: 0.3,
241
+ },
242
+ {
243
+ patience: 0.7,
244
+ assertiveness: 0.3,
245
+ politeness: 0.8,
246
+ technical_level: 0.7,
247
+ },
248
+ {
249
+ patience: 0.2,
250
+ assertiveness: 0.8,
251
+ politeness: 0.3,
252
+ technical_level: 0.8,
253
+ },
254
+ ];
255
+ const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
256
+ const coverageInstructions = Array.from({ length: Math.min(numPersonas, 8) }, (_, i) => {
257
+ const target = traitTargets[i % traitTargets.length];
258
+ return (`- Persona ${i + 1}: communication_style='${styles[i % styles.length]}', ` +
259
+ `patience=${target.patience.toFixed(1)}, ` +
260
+ `assertiveness=${target.assertiveness.toFixed(1)}, ` +
261
+ `politeness=${target.politeness.toFixed(1)}, ` +
262
+ `technical_level=${target.technical_level.toFixed(1)}`);
263
+ }).join("\n");
264
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
239
265
 
240
266
  Additional Context: ${delimit(context || "None provided")}
241
267
 
@@ -252,28 +278,44 @@ IMPORTANT:
252
278
  - Create realistic backgrounds relevant to the agent's domain
253
279
 
254
280
  Return ONLY a JSON array, no other text.`;
255
- const response = await this.client.chat.completions.create({
256
- model: this.model,
257
- messages: [
281
+ const covMessages = [
258
282
  { role: "system", content: PERSONA_GENERATOR_PROMPT },
259
283
  { role: "user", content: userPrompt },
260
- ],
261
- temperature: TEMPERATURE_BALANCED,
262
- max_tokens: 4000,
284
+ ];
285
+ const response = await withLLMSpan({
286
+ model: this.model,
287
+ temperature: TEMPERATURE_BALANCED,
288
+ maxTokens: 4000,
289
+ purpose: "persona_generation_coverage",
290
+ }, async (llmSpan) => {
291
+ recordLLMInput(llmSpan, covMessages.map((m) => ({
292
+ role: m.role,
293
+ content: typeof m.content === "string" ? m.content : "",
294
+ })));
295
+ const traceHeaders = await getTraceContextHeaders();
296
+ const res = await this.client.chat.completions.create({
297
+ model: this.model,
298
+ messages: covMessages,
299
+ temperature: TEMPERATURE_BALANCED,
300
+ max_tokens: 4000,
301
+ }, { headers: traceHeaders });
302
+ recordLLMResponse(llmSpan, res);
303
+ return res;
304
+ });
305
+ const content = response.choices[0]?.message.content ?? "[]";
306
+ let personas = PersonaGenerator.parsePersonas(content);
307
+ // Validate coverage and fill gaps if needed
308
+ personas = this.ensureStyleCoverage(personas, styles);
309
+ this.logTraitCoverageGaps(personas);
310
+ // Trim to requested count (coverage adjustments may have kept extras)
311
+ if (personas.length > numPersonas) {
312
+ personas = personas.slice(0, numPersonas);
313
+ }
314
+ if (personas.length < numPersonas) {
315
+ console.warn(`PersonaGenerator: requested ${numPersonas} personas (with coverage) but only ${personas.length} were successfully parsed`);
316
+ }
317
+ return personas;
263
318
  });
264
- const content = response.choices[0]?.message.content ?? "[]";
265
- let personas = PersonaGenerator.parsePersonas(content);
266
- // Validate coverage and fill gaps if needed
267
- personas = this.ensureStyleCoverage(personas, styles);
268
- this.logTraitCoverageGaps(personas);
269
- // Trim to requested count (coverage adjustments may have kept extras)
270
- if (personas.length > numPersonas) {
271
- personas = personas.slice(0, numPersonas);
272
- }
273
- if (personas.length < numPersonas) {
274
- console.warn(`PersonaGenerator: requested ${numPersonas} personas (with coverage) but only ${personas.length} were successfully parsed`);
275
- }
276
- return personas;
277
319
  }
278
320
  /**
279
321
  * Ensure all communication styles are covered.
@@ -1 +1 @@
1
- {"version":3,"file":"scenario-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/scenario-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAE5B,OAAO,KAAK,EAGV,QAAQ,EAET,MAAM,aAAa,CAAC;AAsJrB;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAwDD;;;;;GAKG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,uBAAuB;IAkB5C;;OAEG;IACG,QAAQ,CAAC,MAAM,EAAE;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAuDvB;;OAEG;IACG,oBAAoB,CAAC,MAAM,EAAE;QACjC,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAoFvB;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA6B7B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAyB9B;;OAEG;IACG,iBAAiB,CAAC,MAAM,EAAE;QAC9B,gBAAgB,EAAE,MAAM,CAAC;QACzB,iBAAiB,CAAC,EAAE,QAAQ,EAAE,CAAC;QAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IA4DvB;;OAEG;IACG,yBAAyB,CAAC,MAAM,EAAE;QACtC,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAuDvB;;OAEG;IACG,yBAAyB,CAAC,MAAM,EAAE;QACtC,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;QACzC,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;CA2ExB"}
1
+ {"version":3,"file":"scenario-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/scenario-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAS5B,OAAO,KAAK,EAGV,QAAQ,EAET,MAAM,aAAa,CAAC;AAsJrB;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAwDD;;;;;GAKG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,uBAAuB;IAkB5C;;OAEG;IACG,QAAQ,CAAC,MAAM,EAAE;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAyFvB;;OAEG;IACG,oBAAoB,CAAC,MAAM,EAAE;QACjC,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAuHvB;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA6B7B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAyB9B;;OAEG;IACG,iBAAiB,CAAC,MAAM,EAAE;QAC9B,gBAAgB,EAAE,MAAM,CAAC;QACzB,iBAAiB,CAAC,EAAE,QAAQ,EAAE,CAAC;QAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAmGvB;;OAEG;IACG,yBAAyB,CAAC,MAAM,EAAE;QACtC,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IA0FvB;;OAEG;IACG,yBAAyB,CAAC,MAAM,EAAE;QACtC,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;QACzC,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;CA8GxB"}