@orq-ai/evaluatorq 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/integrations/simulation/adapters.d.ts +28 -5
- package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/adapters.js +113 -7
- package/dist/lib/integrations/simulation/agents/base.d.ts +3 -0
- package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/agents/base.js +104 -82
- package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/agents/judge.js +1 -0
- package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/agents/user-simulator.js +4 -1
- package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/generators/first-message-generator.js +51 -28
- package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/generators/persona-generator.js +144 -102
- package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/generators/scenario-generator.js +274 -169
- package/dist/lib/integrations/simulation/index.d.ts +1 -1
- package/dist/lib/integrations/simulation/index.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/index.js +1 -1
- package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/runner/simulation.js +147 -85
- package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/simulation/index.js +81 -27
- package/dist/lib/integrations/simulation/tracing.d.ts +111 -0
- package/dist/lib/integrations/simulation/tracing.d.ts.map +1 -0
- package/dist/lib/integrations/simulation/tracing.js +310 -0
- package/dist/lib/integrations/simulation/wrap-agent.js +2 -2
- package/dist/tsconfig.lib.tsbuildinfo +1 -1
- package/package.json +1 -1
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Generates user personas from agent descriptions and optional context.
|
|
5
5
|
*/
|
|
6
6
|
import OpenAI from "openai";
|
|
7
|
+
import { getTraceContextHeaders, recordLLMInput, recordLLMResponse, withLLMSpan, withSimulationSpan, } from "../tracing.js";
|
|
7
8
|
import { extractJsonFromResponse } from "../utils/extract-json.js";
|
|
8
9
|
import { delimit } from "../utils/sanitize.js";
|
|
9
10
|
// Temperature settings for different generation modes
|
|
@@ -133,9 +134,13 @@ export class PersonaGenerator {
|
|
|
133
134
|
* Generate personas for agent testing.
|
|
134
135
|
*/
|
|
135
136
|
async generate(params) {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
137
|
+
return withSimulationSpan("orq.simulation.persona_generation", {
|
|
138
|
+
"orq.simulation.num_personas": params.numPersonas ?? 5,
|
|
139
|
+
"orq.simulation.model": this.model,
|
|
140
|
+
}, async (_span) => {
|
|
141
|
+
const { agentDescription, context = "", numPersonas = 5, edgeCasePercentage = 0.2, } = params;
|
|
142
|
+
const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
|
|
143
|
+
const userPrompt = `Agent Description: ${delimit(agentDescription)}
|
|
139
144
|
|
|
140
145
|
Additional Context: ${delimit(context || "None provided")}
|
|
141
146
|
|
|
@@ -145,21 +150,37 @@ Generate ${numPersonas} diverse personas for testing this agent.
|
|
|
145
150
|
- Create realistic backgrounds relevant to the agent's domain
|
|
146
151
|
|
|
147
152
|
Return ONLY a JSON array, no other text.`;
|
|
148
|
-
|
|
149
|
-
model: this.model,
|
|
150
|
-
messages: [
|
|
153
|
+
const llmMessages = [
|
|
151
154
|
{ role: "system", content: PERSONA_GENERATOR_PROMPT },
|
|
152
155
|
{ role: "user", content: userPrompt },
|
|
153
|
-
]
|
|
154
|
-
|
|
155
|
-
|
|
156
|
+
];
|
|
157
|
+
const response = await withLLMSpan({
|
|
158
|
+
model: this.model,
|
|
159
|
+
temperature: TEMPERATURE_CREATIVE,
|
|
160
|
+
maxTokens: 4000,
|
|
161
|
+
purpose: "persona_generation",
|
|
162
|
+
}, async (llmSpan) => {
|
|
163
|
+
recordLLMInput(llmSpan, llmMessages.map((m) => ({
|
|
164
|
+
role: m.role,
|
|
165
|
+
content: typeof m.content === "string" ? m.content : "",
|
|
166
|
+
})));
|
|
167
|
+
const traceHeaders = await getTraceContextHeaders();
|
|
168
|
+
const res = await this.client.chat.completions.create({
|
|
169
|
+
model: this.model,
|
|
170
|
+
messages: llmMessages,
|
|
171
|
+
temperature: TEMPERATURE_CREATIVE,
|
|
172
|
+
max_tokens: 4000,
|
|
173
|
+
}, { headers: traceHeaders });
|
|
174
|
+
recordLLMResponse(llmSpan, res);
|
|
175
|
+
return res;
|
|
176
|
+
});
|
|
177
|
+
const content = response.choices[0]?.message.content ?? "[]";
|
|
178
|
+
const personas = PersonaGenerator.parsePersonas(content);
|
|
179
|
+
if (personas.length < numPersonas) {
|
|
180
|
+
console.warn(`PersonaGenerator: requested ${numPersonas} personas but only ${personas.length} were successfully parsed`);
|
|
181
|
+
}
|
|
182
|
+
return personas;
|
|
156
183
|
});
|
|
157
|
-
const content = response.choices[0]?.message.content ?? "[]";
|
|
158
|
-
const personas = PersonaGenerator.parsePersonas(content);
|
|
159
|
-
if (personas.length < numPersonas) {
|
|
160
|
-
console.warn(`PersonaGenerator: requested ${numPersonas} personas but only ${personas.length} were successfully parsed`);
|
|
161
|
-
}
|
|
162
|
-
return personas;
|
|
163
184
|
}
|
|
164
185
|
/**
|
|
165
186
|
* Generate personas with guaranteed trait coverage.
|
|
@@ -168,74 +189,79 @@ Return ONLY a JSON array, no other text.`;
|
|
|
168
189
|
* including extreme values that LLMs tend to avoid.
|
|
169
190
|
*/
|
|
170
191
|
async generateWithCoverage(params) {
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
"
|
|
174
|
-
"
|
|
175
|
-
|
|
176
|
-
"
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
192
|
+
return withSimulationSpan("orq.simulation.persona_generation", {
|
|
193
|
+
"orq.simulation.num_personas": params.numPersonas ?? 8,
|
|
194
|
+
"orq.simulation.mode": "coverage",
|
|
195
|
+
"orq.simulation.model": this.model,
|
|
196
|
+
}, async (_span) => {
|
|
197
|
+
const { agentDescription, context = "", numPersonas = 8, edgeCasePercentage = 0.2, } = params;
|
|
198
|
+
const styles = [
|
|
199
|
+
"formal",
|
|
200
|
+
"casual",
|
|
201
|
+
"terse",
|
|
202
|
+
"verbose",
|
|
203
|
+
];
|
|
204
|
+
// Explicit trait combinations covering the FULL range (0.0-1.0)
|
|
205
|
+
const traitTargets = [
|
|
206
|
+
{
|
|
207
|
+
patience: 0.1,
|
|
208
|
+
assertiveness: 0.1,
|
|
209
|
+
politeness: 0.1,
|
|
210
|
+
technical_level: 0.1,
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
patience: 0.9,
|
|
214
|
+
assertiveness: 0.1,
|
|
215
|
+
politeness: 0.9,
|
|
216
|
+
technical_level: 0.9,
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
patience: 0.1,
|
|
220
|
+
assertiveness: 0.9,
|
|
221
|
+
politeness: 0.1,
|
|
222
|
+
technical_level: 0.5,
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
patience: 0.5,
|
|
226
|
+
assertiveness: 0.9,
|
|
227
|
+
politeness: 0.9,
|
|
228
|
+
technical_level: 0.1,
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
patience: 0.5,
|
|
232
|
+
assertiveness: 0.5,
|
|
233
|
+
politeness: 0.5,
|
|
234
|
+
technical_level: 0.5,
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
patience: 0.3,
|
|
238
|
+
assertiveness: 0.7,
|
|
239
|
+
politeness: 0.6,
|
|
240
|
+
technical_level: 0.3,
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
patience: 0.7,
|
|
244
|
+
assertiveness: 0.3,
|
|
245
|
+
politeness: 0.8,
|
|
246
|
+
technical_level: 0.7,
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
patience: 0.2,
|
|
250
|
+
assertiveness: 0.8,
|
|
251
|
+
politeness: 0.3,
|
|
252
|
+
technical_level: 0.8,
|
|
253
|
+
},
|
|
254
|
+
];
|
|
255
|
+
const numEdgeCases = Math.floor(numPersonas * edgeCasePercentage);
|
|
256
|
+
const coverageInstructions = Array.from({ length: Math.min(numPersonas, 8) }, (_, i) => {
|
|
257
|
+
const target = traitTargets[i % traitTargets.length];
|
|
258
|
+
return (`- Persona ${i + 1}: communication_style='${styles[i % styles.length]}', ` +
|
|
259
|
+
`patience=${target.patience.toFixed(1)}, ` +
|
|
260
|
+
`assertiveness=${target.assertiveness.toFixed(1)}, ` +
|
|
261
|
+
`politeness=${target.politeness.toFixed(1)}, ` +
|
|
262
|
+
`technical_level=${target.technical_level.toFixed(1)}`);
|
|
263
|
+
}).join("\n");
|
|
264
|
+
const userPrompt = `Agent Description: ${delimit(agentDescription)}
|
|
239
265
|
|
|
240
266
|
Additional Context: ${delimit(context || "None provided")}
|
|
241
267
|
|
|
@@ -252,28 +278,44 @@ IMPORTANT:
|
|
|
252
278
|
- Create realistic backgrounds relevant to the agent's domain
|
|
253
279
|
|
|
254
280
|
Return ONLY a JSON array, no other text.`;
|
|
255
|
-
|
|
256
|
-
model: this.model,
|
|
257
|
-
messages: [
|
|
281
|
+
const covMessages = [
|
|
258
282
|
{ role: "system", content: PERSONA_GENERATOR_PROMPT },
|
|
259
283
|
{ role: "user", content: userPrompt },
|
|
260
|
-
]
|
|
261
|
-
|
|
262
|
-
|
|
284
|
+
];
|
|
285
|
+
const response = await withLLMSpan({
|
|
286
|
+
model: this.model,
|
|
287
|
+
temperature: TEMPERATURE_BALANCED,
|
|
288
|
+
maxTokens: 4000,
|
|
289
|
+
purpose: "persona_generation_coverage",
|
|
290
|
+
}, async (llmSpan) => {
|
|
291
|
+
recordLLMInput(llmSpan, covMessages.map((m) => ({
|
|
292
|
+
role: m.role,
|
|
293
|
+
content: typeof m.content === "string" ? m.content : "",
|
|
294
|
+
})));
|
|
295
|
+
const traceHeaders = await getTraceContextHeaders();
|
|
296
|
+
const res = await this.client.chat.completions.create({
|
|
297
|
+
model: this.model,
|
|
298
|
+
messages: covMessages,
|
|
299
|
+
temperature: TEMPERATURE_BALANCED,
|
|
300
|
+
max_tokens: 4000,
|
|
301
|
+
}, { headers: traceHeaders });
|
|
302
|
+
recordLLMResponse(llmSpan, res);
|
|
303
|
+
return res;
|
|
304
|
+
});
|
|
305
|
+
const content = response.choices[0]?.message.content ?? "[]";
|
|
306
|
+
let personas = PersonaGenerator.parsePersonas(content);
|
|
307
|
+
// Validate coverage and fill gaps if needed
|
|
308
|
+
personas = this.ensureStyleCoverage(personas, styles);
|
|
309
|
+
this.logTraitCoverageGaps(personas);
|
|
310
|
+
// Trim to requested count (coverage adjustments may have kept extras)
|
|
311
|
+
if (personas.length > numPersonas) {
|
|
312
|
+
personas = personas.slice(0, numPersonas);
|
|
313
|
+
}
|
|
314
|
+
if (personas.length < numPersonas) {
|
|
315
|
+
console.warn(`PersonaGenerator: requested ${numPersonas} personas (with coverage) but only ${personas.length} were successfully parsed`);
|
|
316
|
+
}
|
|
317
|
+
return personas;
|
|
263
318
|
});
|
|
264
|
-
const content = response.choices[0]?.message.content ?? "[]";
|
|
265
|
-
let personas = PersonaGenerator.parsePersonas(content);
|
|
266
|
-
// Validate coverage and fill gaps if needed
|
|
267
|
-
personas = this.ensureStyleCoverage(personas, styles);
|
|
268
|
-
this.logTraitCoverageGaps(personas);
|
|
269
|
-
// Trim to requested count (coverage adjustments may have kept extras)
|
|
270
|
-
if (personas.length > numPersonas) {
|
|
271
|
-
personas = personas.slice(0, numPersonas);
|
|
272
|
-
}
|
|
273
|
-
if (personas.length < numPersonas) {
|
|
274
|
-
console.warn(`PersonaGenerator: requested ${numPersonas} personas (with coverage) but only ${personas.length} were successfully parsed`);
|
|
275
|
-
}
|
|
276
|
-
return personas;
|
|
277
319
|
}
|
|
278
320
|
/**
|
|
279
321
|
* Ensure all communication styles are covered.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scenario-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/scenario-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"scenario-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/scenario-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAS5B,OAAO,KAAK,EAGV,QAAQ,EAET,MAAM,aAAa,CAAC;AAsJrB;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAwDD;;;;;GAKG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,uBAAuB;IAkB5C;;OAEG;IACG,QAAQ,CAAC,MAAM,EAAE;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAyFvB;;OAEG;IACG,oBAAoB,CAAC,MAAM,EAAE;QACjC,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAuHvB;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA6B7B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAyB9B;;OAEG;IACG,iBAAiB,CAAC,MAAM,EAAE;QAC9B,gBAAgB,EAAE,MAAM,CAAC;QACzB,iBAAiB,CAAC,EAAE,QAAQ,EAAE,CAAC;QAC/B,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAmGvB;;OAEG;IACG,yBAAyB,CAAC,MAAM,EAAE;QACtC,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IA0FvB;;OAEG;IACG,yBAAyB,CAAC,MAAM,EAAE;QACtC,gBAAgB,EAAE,MAAM,CAAC;QACzB,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;QACzC,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;QACtB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;CA8GxB"}
|