@orq-ai/evaluatorq 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/lib/integrations/simulation/adapters.d.ts +28 -5
  2. package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -1
  3. package/dist/lib/integrations/simulation/adapters.js +113 -7
  4. package/dist/lib/integrations/simulation/agents/base.d.ts +3 -0
  5. package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -1
  6. package/dist/lib/integrations/simulation/agents/base.js +104 -82
  7. package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -1
  8. package/dist/lib/integrations/simulation/agents/judge.js +1 -0
  9. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -1
  10. package/dist/lib/integrations/simulation/agents/user-simulator.js +4 -1
  11. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -1
  12. package/dist/lib/integrations/simulation/generators/first-message-generator.js +51 -28
  13. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -1
  14. package/dist/lib/integrations/simulation/generators/persona-generator.js +144 -102
  15. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -1
  16. package/dist/lib/integrations/simulation/generators/scenario-generator.js +274 -169
  17. package/dist/lib/integrations/simulation/index.d.ts +1 -1
  18. package/dist/lib/integrations/simulation/index.d.ts.map +1 -1
  19. package/dist/lib/integrations/simulation/index.js +1 -1
  20. package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -1
  21. package/dist/lib/integrations/simulation/runner/simulation.js +147 -85
  22. package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -1
  23. package/dist/lib/integrations/simulation/simulation/index.js +81 -27
  24. package/dist/lib/integrations/simulation/tracing.d.ts +111 -0
  25. package/dist/lib/integrations/simulation/tracing.d.ts.map +1 -0
  26. package/dist/lib/integrations/simulation/tracing.js +310 -0
  27. package/dist/lib/integrations/simulation/wrap-agent.js +2 -2
  28. package/dist/tsconfig.lib.tsbuildinfo +1 -1
  29. package/package.json +1 -1
@@ -4,6 +4,7 @@
4
4
  * Generates test scenarios from agent descriptions and optional context.
5
5
  */
6
6
  import OpenAI from "openai";
7
+ import { getTraceContextHeaders, recordLLMInput, recordLLMResponse, withLLMSpan, withSimulationSpan, } from "../tracing.js";
7
8
  import { extractJsonFromResponse } from "../utils/extract-json.js";
8
9
  import { delimit } from "../utils/sanitize.js";
9
10
  // Temperature settings for different generation modes
@@ -225,9 +226,13 @@ export class ScenarioGenerator {
225
226
  * Generate scenarios for agent testing.
226
227
  */
227
228
  async generate(params) {
228
- const { agentDescription, context = "", numScenarios = 10, edgeCasePercentage = 0.3, } = params;
229
- const numEdgeCases = Math.floor(numScenarios * edgeCasePercentage);
230
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
229
+ return withSimulationSpan("orq.simulation.scenario_generation", {
230
+ "orq.simulation.num_scenarios": params.numScenarios ?? 10,
231
+ "orq.simulation.model": this.model,
232
+ }, async (_span) => {
233
+ const { agentDescription, context = "", numScenarios = 10, edgeCasePercentage = 0.3, } = params;
234
+ const numEdgeCases = Math.floor(numScenarios * edgeCasePercentage);
235
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
231
236
 
232
237
  Additional Context: ${delimit(context || "None provided")}
233
238
 
@@ -238,52 +243,73 @@ Generate ${numScenarios} diverse test scenarios for this agent.
238
243
  - Each scenario should have clear success/failure criteria
239
244
 
240
245
  Return ONLY a JSON array, no other text.`;
241
- try {
242
- const response = await this.client.chat.completions.create({
243
- model: this.model,
244
- messages: [
246
+ try {
247
+ const genMessages = [
245
248
  { role: "system", content: SCENARIO_GENERATOR_PROMPT },
246
249
  { role: "user", content: userPrompt },
247
- ],
248
- temperature: TEMPERATURE_CREATIVE,
249
- max_tokens: 6000,
250
- });
251
- const content = response.choices[0]?.message.content ?? "[]";
252
- const extracted = extractJsonFromResponse(content);
253
- const scenarioDicts = parseJsonArray(extracted);
254
- const scenarios = parseScenarios(scenarioDicts);
255
- if (scenarios.length < numScenarios) {
256
- console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios but only ${scenarios.length} were successfully parsed`);
250
+ ];
251
+ const response = await withLLMSpan({
252
+ model: this.model,
253
+ temperature: TEMPERATURE_CREATIVE,
254
+ maxTokens: 6000,
255
+ purpose: "scenario_generation",
256
+ }, async (llmSpan) => {
257
+ recordLLMInput(llmSpan, [
258
+ { role: "system", content: SCENARIO_GENERATOR_PROMPT },
259
+ { role: "user", content: userPrompt },
260
+ ]);
261
+ const traceHeaders = await getTraceContextHeaders();
262
+ const res = await this.client.chat.completions.create({
263
+ model: this.model,
264
+ messages: genMessages,
265
+ temperature: TEMPERATURE_CREATIVE,
266
+ max_tokens: 6000,
267
+ }, { headers: traceHeaders });
268
+ recordLLMResponse(llmSpan, res);
269
+ return res;
270
+ });
271
+ const content = response.choices[0]?.message.content ?? "[]";
272
+ const extracted = extractJsonFromResponse(content);
273
+ const scenarioDicts = parseJsonArray(extracted);
274
+ const scenarios = parseScenarios(scenarioDicts);
275
+ if (scenarios.length < numScenarios) {
276
+ console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios but only ${scenarios.length} were successfully parsed`);
277
+ }
278
+ return scenarios;
257
279
  }
258
- return scenarios;
259
- }
260
- catch (e) {
261
- if (e instanceof SyntaxError) {
262
- console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios but LLM response was not valid JSON — returning empty array`);
263
- return [];
280
+ catch (e) {
281
+ if (e instanceof SyntaxError) {
282
+ console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios but LLM response was not valid JSON — returning empty array`);
283
+ return [];
284
+ }
285
+ throw e;
264
286
  }
265
- throw e;
266
- }
287
+ });
267
288
  }
268
289
  /**
269
290
  * Generate scenarios with guaranteed emotion and criteria coverage.
270
291
  */
271
292
  async generateWithCoverage(params) {
272
- const { agentDescription, context = "", numScenarios = 6, edgeCasePercentage = 0.3, } = params;
273
- const emotions = [
274
- "neutral",
275
- "frustrated",
276
- "confused",
277
- "happy",
278
- "urgent",
279
- ];
280
- const numEdgeCases = Math.floor(numScenarios * edgeCasePercentage);
281
- const coverageInstructions = Array.from({ length: numScenarios }, (_, i) => {
282
- const emotion = emotions[i % emotions.length];
283
- const edgeLabel = i < numEdgeCases ? " (edge case)" : "";
284
- return `- Scenario ${i + 1}: starting_emotion='${emotion}'${edgeLabel}`;
285
- }).join("\n");
286
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
293
+ return withSimulationSpan("orq.simulation.scenario_generation", {
294
+ "orq.simulation.num_scenarios": params.numScenarios ?? 6,
295
+ "orq.simulation.mode": "coverage",
296
+ "orq.simulation.model": this.model,
297
+ }, async (_span) => {
298
+ const { agentDescription, context = "", numScenarios = 6, edgeCasePercentage = 0.3, } = params;
299
+ const emotions = [
300
+ "neutral",
301
+ "frustrated",
302
+ "confused",
303
+ "happy",
304
+ "urgent",
305
+ ];
306
+ const numEdgeCases = Math.floor(numScenarios * edgeCasePercentage);
307
+ const coverageInstructions = Array.from({ length: numScenarios }, (_, i) => {
308
+ const emotion = emotions[i % emotions.length];
309
+ const edgeLabel = i < numEdgeCases ? " (edge case)" : "";
310
+ return `- Scenario ${i + 1}: starting_emotion='${emotion}'${edgeLabel}`;
311
+ }).join("\n");
312
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
287
313
 
288
314
  Additional Context: ${delimit(context || "None provided")}
289
315
 
@@ -298,39 +324,55 @@ Additional requirements:
298
324
  - Cover different types of user requests
299
325
 
300
326
  Return ONLY a JSON array, no other text.`;
301
- try {
302
- const response = await this.client.chat.completions.create({
303
- model: this.model,
304
- messages: [
327
+ try {
328
+ const covMessages = [
305
329
  { role: "system", content: SCENARIO_GENERATOR_PROMPT },
306
330
  { role: "user", content: userPrompt },
307
- ],
308
- temperature: TEMPERATURE_BALANCED,
309
- max_tokens: 6000,
310
- });
311
- const content = response.choices[0]?.message.content ?? "[]";
312
- const extracted = extractJsonFromResponse(content);
313
- const scenarioDicts = parseJsonArray(extracted);
314
- let scenarios = parseScenarios(scenarioDicts);
315
- // Validate coverage and fill gaps
316
- scenarios = this.ensureEmotionCoverage(scenarios, emotions);
317
- scenarios = this.ensureCriteriaCoverage(scenarios);
318
- // Trim to requested count (coverage adjustments may have kept extras)
319
- if (scenarios.length > numScenarios) {
320
- scenarios = scenarios.slice(0, numScenarios);
321
- }
322
- if (scenarios.length < numScenarios) {
323
- console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios (with coverage) but only ${scenarios.length} were successfully parsed`);
331
+ ];
332
+ const response = await withLLMSpan({
333
+ model: this.model,
334
+ temperature: TEMPERATURE_BALANCED,
335
+ maxTokens: 6000,
336
+ purpose: "scenario_generation_coverage",
337
+ }, async (llmSpan) => {
338
+ recordLLMInput(llmSpan, [
339
+ { role: "system", content: SCENARIO_GENERATOR_PROMPT },
340
+ { role: "user", content: userPrompt },
341
+ ]);
342
+ const traceHeaders = await getTraceContextHeaders();
343
+ const res = await this.client.chat.completions.create({
344
+ model: this.model,
345
+ messages: covMessages,
346
+ temperature: TEMPERATURE_BALANCED,
347
+ max_tokens: 6000,
348
+ }, { headers: traceHeaders });
349
+ recordLLMResponse(llmSpan, res);
350
+ return res;
351
+ });
352
+ const content = response.choices[0]?.message.content ?? "[]";
353
+ const extracted = extractJsonFromResponse(content);
354
+ const scenarioDicts = parseJsonArray(extracted);
355
+ let scenarios = parseScenarios(scenarioDicts);
356
+ // Validate coverage and fill gaps
357
+ scenarios = this.ensureEmotionCoverage(scenarios, emotions);
358
+ scenarios = this.ensureCriteriaCoverage(scenarios);
359
+ // Trim to requested count (coverage adjustments may have kept extras)
360
+ if (scenarios.length > numScenarios) {
361
+ scenarios = scenarios.slice(0, numScenarios);
362
+ }
363
+ if (scenarios.length < numScenarios) {
364
+ console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios (with coverage) but only ${scenarios.length} were successfully parsed`);
365
+ }
366
+ return scenarios;
324
367
  }
325
- return scenarios;
326
- }
327
- catch (e) {
328
- if (e instanceof SyntaxError) {
329
- console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios but LLM response was not valid JSON — returning empty array`);
330
- return [];
368
+ catch (e) {
369
+ if (e instanceof SyntaxError) {
370
+ console.warn(`ScenarioGenerator: requested ${numScenarios} scenarios but LLM response was not valid JSON — returning empty array`);
371
+ return [];
372
+ }
373
+ throw e;
331
374
  }
332
- throw e;
333
- }
375
+ });
334
376
  }
335
377
  /**
336
378
  * Ensure all starting emotions are covered.
@@ -381,11 +423,16 @@ Return ONLY a JSON array, no other text.`;
381
423
  * Generate edge case scenarios specifically.
382
424
  */
383
425
  async generateEdgeCases(params) {
384
- const { agentDescription, existingScenarios, numEdgeCases = 5 } = params;
385
- const existingNames = existingScenarios
386
- ? existingScenarios.map((s) => s.name)
387
- : [];
388
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
426
+ return withSimulationSpan("orq.simulation.scenario_generation", {
427
+ "orq.simulation.num_scenarios": params.numEdgeCases ?? 5,
428
+ "orq.simulation.mode": "edge_cases",
429
+ "orq.simulation.model": this.model,
430
+ }, async (_span) => {
431
+ const { agentDescription, existingScenarios, numEdgeCases = 5, } = params;
432
+ const existingNames = existingScenarios
433
+ ? existingScenarios.map((s) => s.name)
434
+ : [];
435
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
389
436
 
390
437
  Existing scenarios (avoid duplicating these):
391
438
  ${delimit(JSON.stringify(existingNames, null, 2))}
@@ -399,43 +446,64 @@ Generate ${numEdgeCases} EDGE CASE scenarios that:
399
446
  Each scenario MUST have is_edge_case: true
400
447
 
401
448
  Return ONLY a JSON array, no other text.`;
402
- try {
403
- const response = await this.client.chat.completions.create({
404
- model: this.model,
405
- messages: [
449
+ try {
450
+ const edgeMessages = [
406
451
  { role: "system", content: SCENARIO_GENERATOR_PROMPT },
407
452
  { role: "user", content: userPrompt },
408
- ],
409
- temperature: TEMPERATURE_EDGE_CASE,
410
- max_tokens: 4000,
411
- });
412
- const content = response.choices[0]?.message.content ?? "[]";
413
- const extracted = extractJsonFromResponse(content);
414
- const scenarioDicts = parseJsonArray(extracted);
415
- // Force edge case flag
416
- for (const sDict of scenarioDicts) {
417
- sDict.is_edge_case = true;
418
- }
419
- const scenarios = parseScenarios(scenarioDicts);
420
- if (scenarios.length < numEdgeCases) {
421
- console.warn(`ScenarioGenerator: requested ${numEdgeCases} edge cases but only ${scenarios.length} were successfully parsed`);
453
+ ];
454
+ const response = await withLLMSpan({
455
+ model: this.model,
456
+ temperature: TEMPERATURE_EDGE_CASE,
457
+ maxTokens: 4000,
458
+ purpose: "scenario_edge_cases",
459
+ }, async (llmSpan) => {
460
+ recordLLMInput(llmSpan, [
461
+ { role: "system", content: SCENARIO_GENERATOR_PROMPT },
462
+ { role: "user", content: userPrompt },
463
+ ]);
464
+ const traceHeaders = await getTraceContextHeaders();
465
+ const res = await this.client.chat.completions.create({
466
+ model: this.model,
467
+ messages: edgeMessages,
468
+ temperature: TEMPERATURE_EDGE_CASE,
469
+ max_tokens: 4000,
470
+ }, { headers: traceHeaders });
471
+ recordLLMResponse(llmSpan, res);
472
+ return res;
473
+ });
474
+ const content = response.choices[0]?.message.content ?? "[]";
475
+ const extracted = extractJsonFromResponse(content);
476
+ const scenarioDicts = parseJsonArray(extracted);
477
+ // Force edge case flag
478
+ for (const sDict of scenarioDicts) {
479
+ sDict.is_edge_case = true;
480
+ }
481
+ const scenarios = parseScenarios(scenarioDicts);
482
+ if (scenarios.length < numEdgeCases) {
483
+ console.warn(`ScenarioGenerator: requested ${numEdgeCases} edge cases but only ${scenarios.length} were successfully parsed`);
484
+ }
485
+ return scenarios;
422
486
  }
423
- return scenarios;
424
- }
425
- catch (e) {
426
- if (e instanceof SyntaxError) {
427
- console.warn(`ScenarioGenerator: requested ${numEdgeCases} edge cases but LLM response was not valid JSON — returning empty array`);
428
- return [];
487
+ catch (e) {
488
+ if (e instanceof SyntaxError) {
489
+ console.warn(`ScenarioGenerator: requested ${numEdgeCases} edge cases but LLM response was not valid JSON — returning empty array`);
490
+ return [];
491
+ }
492
+ throw e;
429
493
  }
430
- throw e;
431
- }
494
+ });
432
495
  }
433
496
  /**
434
497
  * Generate boundary/out-of-scope test scenarios.
435
498
  */
436
499
  async generateBoundaryScenarios(params) {
437
- const { agentDescription, numScenarios = 5 } = params;
438
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
500
+ return withSimulationSpan("orq.simulation.scenario_generation", {
501
+ "orq.simulation.num_scenarios": params.numScenarios ?? 5,
502
+ "orq.simulation.mode": "boundary",
503
+ "orq.simulation.model": this.model,
504
+ }, async (_span) => {
505
+ const { agentDescription, numScenarios = 5 } = params;
506
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
439
507
 
440
508
  Generate ${numScenarios} BOUNDARY TEST scenarios that probe the limits of this agent's scope.
441
509
 
@@ -448,56 +516,77 @@ Include a mix of:
448
516
  Each scenario MUST have is_edge_case: true
449
517
 
450
518
  Return ONLY a JSON array, no other text.`;
451
- try {
452
- const response = await this.client.chat.completions.create({
453
- model: this.model,
454
- messages: [
519
+ try {
520
+ const bndMessages = [
455
521
  { role: "system", content: BOUNDARY_SCENARIO_PROMPT },
456
522
  { role: "user", content: userPrompt },
457
- ],
458
- temperature: TEMPERATURE_EDGE_CASE,
459
- max_tokens: 4000,
460
- });
461
- const content = response.choices[0]?.message.content ?? "[]";
462
- const extracted = extractJsonFromResponse(content);
463
- const scenarioDicts = parseJsonArray(extracted);
464
- // Force edge case flag
465
- for (const sDict of scenarioDicts) {
466
- sDict.is_edge_case = true;
467
- }
468
- const scenarios = parseScenarios(scenarioDicts);
469
- if (scenarios.length < numScenarios) {
470
- console.warn(`ScenarioGenerator: requested ${numScenarios} boundary scenarios but only ${scenarios.length} were successfully parsed`);
523
+ ];
524
+ const response = await withLLMSpan({
525
+ model: this.model,
526
+ temperature: TEMPERATURE_EDGE_CASE,
527
+ maxTokens: 4000,
528
+ purpose: "scenario_boundary",
529
+ }, async (llmSpan) => {
530
+ recordLLMInput(llmSpan, [
531
+ { role: "system", content: BOUNDARY_SCENARIO_PROMPT },
532
+ { role: "user", content: userPrompt },
533
+ ]);
534
+ const traceHeaders = await getTraceContextHeaders();
535
+ const res = await this.client.chat.completions.create({
536
+ model: this.model,
537
+ messages: bndMessages,
538
+ temperature: TEMPERATURE_EDGE_CASE,
539
+ max_tokens: 4000,
540
+ }, { headers: traceHeaders });
541
+ recordLLMResponse(llmSpan, res);
542
+ return res;
543
+ });
544
+ const content = response.choices[0]?.message.content ?? "[]";
545
+ const extracted = extractJsonFromResponse(content);
546
+ const scenarioDicts = parseJsonArray(extracted);
547
+ // Force edge case flag
548
+ for (const sDict of scenarioDicts) {
549
+ sDict.is_edge_case = true;
550
+ }
551
+ const scenarios = parseScenarios(scenarioDicts);
552
+ if (scenarios.length < numScenarios) {
553
+ console.warn(`ScenarioGenerator: requested ${numScenarios} boundary scenarios but only ${scenarios.length} were successfully parsed`);
554
+ }
555
+ return scenarios;
471
556
  }
472
- return scenarios;
473
- }
474
- catch (e) {
475
- if (e instanceof SyntaxError) {
476
- console.warn(`ScenarioGenerator: requested ${numScenarios} boundary scenarios but LLM response was not valid JSON — returning empty array`);
477
- return [];
557
+ catch (e) {
558
+ if (e instanceof SyntaxError) {
559
+ console.warn(`ScenarioGenerator: requested ${numScenarios} boundary scenarios but LLM response was not valid JSON — returning empty array`);
560
+ return [];
561
+ }
562
+ throw e;
478
563
  }
479
- throw e;
480
- }
564
+ });
481
565
  }
482
566
  /**
483
567
  * Generate security test scenarios inspired by OWASP attack patterns.
484
568
  */
485
569
  async generateSecurityScenarios(params) {
486
- const { agentDescription, seedExamples, categories, numScenarios = 10, } = params;
487
- let categoryFocus = "";
488
- if (categories && categories.length > 0) {
489
- const catNames = categories.map((cat) => {
490
- const normalized = cat.toUpperCase().replace("OWASP-", "");
491
- return `OWASP-${normalized}`;
492
- });
493
- categoryFocus = `\nFocus on these OWASP categories: ${delimit(catNames.join(", "))}`;
494
- }
495
- let seedText = "";
496
- if (seedExamples && seedExamples.length > 0) {
497
- const examplesToShow = seedExamples.slice(0, 5);
498
- seedText = `\n\nUse these attack patterns as INSPIRATION (generate NOVEL variations, not copies):\n${delimit(JSON.stringify(examplesToShow, null, 2))}`;
499
- }
500
- const userPrompt = `Agent Description: ${delimit(agentDescription)}
570
+ return withSimulationSpan("orq.simulation.scenario_generation", {
571
+ "orq.simulation.num_scenarios": params.numScenarios ?? 10,
572
+ "orq.simulation.mode": "security",
573
+ "orq.simulation.model": this.model,
574
+ }, async (_span) => {
575
+ const { agentDescription, seedExamples, categories, numScenarios = 10, } = params;
576
+ let categoryFocus = "";
577
+ if (categories && categories.length > 0) {
578
+ const catNames = categories.map((cat) => {
579
+ const normalized = cat.toUpperCase().replace("OWASP-", "");
580
+ return `OWASP-${normalized}`;
581
+ });
582
+ categoryFocus = `\nFocus on these OWASP categories: ${delimit(catNames.join(", "))}`;
583
+ }
584
+ let seedText = "";
585
+ if (seedExamples && seedExamples.length > 0) {
586
+ const examplesToShow = seedExamples.slice(0, 5);
587
+ seedText = `\n\nUse these attack patterns as INSPIRATION (generate NOVEL variations, not copies):\n${delimit(JSON.stringify(examplesToShow, null, 2))}`;
588
+ }
589
+ const userPrompt = `Agent Description: ${delimit(agentDescription)}
501
590
  ${categoryFocus}
502
591
  ${seedText}
503
592
 
@@ -511,35 +600,51 @@ Requirements:
511
600
  - Include conversation_strategy matching the attack style
512
601
 
513
602
  Return ONLY a JSON array, no other text.`;
514
- try {
515
- const response = await this.client.chat.completions.create({
516
- model: this.model,
517
- messages: [
603
+ try {
604
+ const secMessages = [
518
605
  { role: "system", content: SECURITY_SCENARIO_PROMPT },
519
606
  { role: "user", content: userPrompt },
520
- ],
521
- temperature: TEMPERATURE_EDGE_CASE,
522
- max_tokens: 6000,
523
- });
524
- const content = response.choices[0]?.message.content ?? "[]";
525
- const extracted = extractJsonFromResponse(content);
526
- const scenarioDicts = parseJsonArray(extracted);
527
- // Force edge case flag
528
- for (const sDict of scenarioDicts) {
529
- sDict.is_edge_case = true;
530
- }
531
- const scenarios = parseScenarios(scenarioDicts);
532
- if (scenarios.length < numScenarios) {
533
- console.warn(`ScenarioGenerator: requested ${numScenarios} security scenarios but only ${scenarios.length} were successfully parsed`);
607
+ ];
608
+ const response = await withLLMSpan({
609
+ model: this.model,
610
+ temperature: TEMPERATURE_EDGE_CASE,
611
+ maxTokens: 6000,
612
+ purpose: "scenario_security",
613
+ }, async (llmSpan) => {
614
+ recordLLMInput(llmSpan, [
615
+ { role: "system", content: SECURITY_SCENARIO_PROMPT },
616
+ { role: "user", content: userPrompt },
617
+ ]);
618
+ const traceHeaders = await getTraceContextHeaders();
619
+ const res = await this.client.chat.completions.create({
620
+ model: this.model,
621
+ messages: secMessages,
622
+ temperature: TEMPERATURE_EDGE_CASE,
623
+ max_tokens: 6000,
624
+ }, { headers: traceHeaders });
625
+ recordLLMResponse(llmSpan, res);
626
+ return res;
627
+ });
628
+ const content = response.choices[0]?.message.content ?? "[]";
629
+ const extracted = extractJsonFromResponse(content);
630
+ const scenarioDicts = parseJsonArray(extracted);
631
+ // Force edge case flag
632
+ for (const sDict of scenarioDicts) {
633
+ sDict.is_edge_case = true;
634
+ }
635
+ const scenarios = parseScenarios(scenarioDicts);
636
+ if (scenarios.length < numScenarios) {
637
+ console.warn(`ScenarioGenerator: requested ${numScenarios} security scenarios but only ${scenarios.length} were successfully parsed`);
638
+ }
639
+ return scenarios;
534
640
  }
535
- return scenarios;
536
- }
537
- catch (e) {
538
- if (e instanceof SyntaxError) {
539
- console.warn(`ScenarioGenerator: requested ${numScenarios} security scenarios but LLM response was not valid JSON — returning empty array`);
540
- return [];
641
+ catch (e) {
642
+ if (e instanceof SyntaxError) {
643
+ console.warn(`ScenarioGenerator: requested ${numScenarios} security scenarios but LLM response was not valid JSON — returning empty array`);
644
+ return [];
645
+ }
646
+ throw e;
541
647
  }
542
- throw e;
543
- }
648
+ });
544
649
  }
545
650
  }
@@ -10,7 +10,7 @@
10
10
  * import { simulate, wrapSimulationAgent, toOpenResponses } from "@orq-ai/evaluatorq/simulation";
11
11
  * ```
12
12
  */
13
- export { fromChatCompletions, fromOrqDeployment } from "./adapters.js";
13
+ export { fromChatCompletions, fromOrqAgent, fromOrqDeployment, } from "./adapters.js";
14
14
  export type { AgentConfig } from "./agents/base.js";
15
15
  export { BaseAgent } from "./agents/base.js";
16
16
  export { JudgeAgent } from "./agents/judge.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAGH,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AACvE,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAEhE,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,YAAY,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,qBAAqB,GACtB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,kBAAkB,EAClB,qBAAqB,EACrB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,uBAAuB,CAAC;AAC/B,YAAY,EAAE,gBAAgB,EAAE,MAAM,mCAAmC,CAAC;AAE1E,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,uBAAuB,GACxB,MAAM,mCAAmC,CAAC;AAC3C,YAAY,EACV,cAAc,EACd,SAAS,EACT,sBAAsB,EACtB,WAAW,GACZ,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAI1D,YAAY,EACV,yBAAyB,EACzB,cAAc,GACf,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,mBAAmB,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEtE,YAAY,EACV,WAAW,EACX,kBAAkB,EAClB,oBAAoB,EACpB,SAAS,EACT,eAAe,EACf,SAAS,EACT,YAAY,EACZ,WAAW,EACX,QAAQ,EACR,OAAO,IAAI,iBAAiB,EAC5B,OAAO,EACP,QAAQ,EACR,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,UAAU,EACV,WAAW,GACZ,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,uBAAuB,EACvB,oBAAoB,EACpB,uBAAuB,EACvB,cAAc,GACf,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,0BAA0B,EAC1B,wBAAwB,EACxB,wBAAwB,EACxB,iBAAiB,GAClB,MAAM,4BAA4B,CAAC;AACpC,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAE5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAGH,OAAO,EACL,mBAAmB,EACnB,YAAY,EACZ,iBAAiB,GAClB,MAAM,eAAe,CAAC;AACvB,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAEhE,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,YAAY,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,qBAAqB,GACtB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,kBAAkB,EAClB,qBAAqB,EACrB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,uBAAuB,CAAC;AAC/B,YAAY,EAAE,gBAAgB,EAAE,MAAM,mCAAmC,CAAC;AAE1E,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,uBAAuB,GACxB,MAAM,mCAAmC,CAAC;AAC3C,YAAY,EACV,cAAc,EACd,SAAS,EACT,sBAAsB,EACtB,WAAW,GACZ,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAI1D,YAAY,EACV,yBAAyB,EACzB,cAAc,GACf,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,mBAAmB,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEtE,YAAY,EACV,WAAW,EACX,kBAAkB,EAClB,oBAAoB,EACpB,SAAS,EACT,eAAe,EACf,SAAS,EACT,YAAY,EACZ,WAAW,EACX,QAAQ,EACR,OAAO,IAAI,iBAAiB,EAC5B,OAAO,EACP,QAAQ,EACR,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,UAAU,EACV,WAAW,GACZ,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,uBAAuB,EACvB,oBAAoB,EACpB,uBAAuB,EACvB,cAAc,GACf,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,0BAA0B,EAC1B,wBAAwB,EACxB,wBAAwB,EACxB,iBAAiB,GAClB,MAAM,4BAA4B,CAAC;AACpC,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAE5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC"}
@@ -11,7 +11,7 @@
11
11
  * ```
12
12
  */
13
13
  // --- Adapters ---
14
- export { fromChatCompletions, fromOrqDeployment } from "./adapters.js";
14
+ export { fromChatCompletions, fromOrqAgent, fromOrqDeployment, } from "./adapters.js";
15
15
  // --- Agents (advanced usage) ---
16
16
  export { BaseAgent } from "./agents/base.js";
17
17
  export { JudgeAgent } from "./agents/judge.js";
@@ -1 +1 @@
1
- {"version":3,"file":"simulation.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/runner/simulation.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,KAAK,EACV,WAAW,EACX,SAAS,EAGT,OAAO,EACP,QAAQ,EACR,gBAAgB,EAGjB,MAAM,aAAa,CAAC;AAOrB,+CAA+C;AAC/C,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAMD,MAAM,WAAW,sBAAsB;IACrC,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wEAAwE;IACxE,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,mDAAmD;IACnD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA8DD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAc;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,CAEF;IAC9B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,YAAY,CAAuB;gBAE/B,MAAM,EAAE,sBAAsB;IAmB1C,OAAO,CAAC,eAAe;IAgBvB,0FAA0F;IACpF,GAAG,CAAC,MAAM,EAAE,SAAS,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAsLvD,4DAA4D;IACtD,QAAQ,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IA2DnE,4CAA4C;IACtC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;YAUd,iBAAiB;IAU/B;;;OAGG;IACH,OAAO,CAAC,oBAAoB;YAiBd,cAAc;CAyC7B"}
1
+ {"version":3,"file":"simulation.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/runner/simulation.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAaH,OAAO,KAAK,EACV,WAAW,EACX,SAAS,EAGT,OAAO,EACP,QAAQ,EACR,gBAAgB,EAGjB,MAAM,aAAa,CAAC;AAOrB,+CAA+C;AAC/C,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACnD;AAMD,MAAM,WAAW,sBAAsB;IACrC,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,wEAAwE;IACxE,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,mDAAmD;IACnD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA8DD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAc;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC,CAEF;IAC9B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,YAAY,CAAuB;gBAE/B,MAAM,EAAE,sBAAsB;IAmB1C,OAAO,CAAC,eAAe;IAgBvB,0FAA0F;IACpF,GAAG,CAAC,MAAM,EAAE,SAAS,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAwRvD,4DAA4D;IACtD,QAAQ,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IA2DnE,4CAA4C;IACtC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;YAUd,iBAAiB;IAU/B;;;OAGG;IACH,OAAO,CAAC,oBAAoB;YAiBd,cAAc;CAyC7B"}