npm - @orq-ai/evaluatorq - Versions diffs - 1.3.1 → 1.3.2 - Mend

@orq-ai/evaluatorq 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/lib/integrations/simulation/runner/simulation.js CHANGED Viewed

@@ -7,6 +7,7 @@
 import OpenAI from "openai";
 import { JudgeAgent } from "../agents/judge.js";
 import { UserSimulatorAgent } from "../agents/user-simulator.js";
+import { recordLLMInput, recordLLMOutput, recordTokenUsage, setSpanAttrs, withSimulationSpan, } from "../tracing.js";
 import { buildDatapointSystemPrompt } from "../utils/prompt-builders.js";
 // ---------------------------------------------------------------------------
 // Helpers: create SimulationResult variants
@@ -111,96 +112,157 @@ export class SimulationRunner {
         // Declare usage helper references — initialized inside try after agents are created
         let getTotalUsage;
         try {
-            // Use stored system prompt if available, otherwise build from persona+scenario
-            const systemPrompt = storedSystemPrompt ??
-                buildDatapointSystemPrompt(persona, scenario);
-            const client = this.getSharedClient();
-            // Always create fresh agents per simulation (no shared state between concurrent runs)
-            const userSimulator = new UserSimulatorAgent({
-                model: this.model,
-                client,
-                systemPrompt: systemPrompt,
-            });
-            const judge = new JudgeAgent({
-                model: this.model,
-                client,
-                goal: scenario?.goal,
-                criteria: scenario?.criteria ?? [],
-                groundTruth: scenario?.ground_truth ?? "",
-            });
-            getTotalUsage = () => {
-                const usage = userSimulator.getUsage();
-                const judgeUsage = judge.getUsage();
-                usage.prompt_tokens += judgeUsage.prompt_tokens;
-                usage.completion_tokens += judgeUsage.completion_tokens;
-                usage.total_tokens += judgeUsage.total_tokens;
-                return usage;
-            };
-            const buildTurnMetrics = (turnNum, judgment, usageBefore) => {
-                const usageAfter = getTotalUsage();
-                return {
-                    turn_number: turnNum,
-                    token_usage: {
-                        prompt_tokens: usageAfter.prompt_tokens - usageBefore.prompt_tokens,
-                        completion_tokens: usageAfter.completion_tokens - usageBefore.completion_tokens,
-                        total_tokens: usageAfter.total_tokens - usageBefore.total_tokens,
-                    },
-                    response_quality: judgment.response_quality ?? null,
-                    hallucination_risk: judgment.hallucination_risk ?? null,
-                    tone_appropriateness: judgment.tone_appropriateness ?? null,
-                    factual_accuracy: judgment.factual_accuracy ?? null,
-                    judge_reason: judgment.reason,
+            return await withSimulationSpan("orq.simulation.run", {
+                "orq.simulation.persona": persona?.name,
+                "orq.simulation.scenario": scenario?.name,
+                "orq.simulation.max_turns": maxTurns,
+                "orq.simulation.model": this.model,
+            }, async (runSpan) => {
+                // Use stored system prompt if available, otherwise build from persona+scenario
+                const systemPrompt = storedSystemPrompt ??
+                    buildDatapointSystemPrompt(persona, scenario);
+                const client = this.getSharedClient();
+                // Always create fresh agents per simulation (no shared state between concurrent runs)
+                const userSimulator = new UserSimulatorAgent({
+                    model: this.model,
+                    client,
+                    systemPrompt: systemPrompt,
+                });
+                const judge = new JudgeAgent({
+                    model: this.model,
+                    client,
+                    goal: scenario?.goal,
+                    criteria: scenario?.criteria ?? [],
+                    groundTruth: scenario?.ground_truth ?? "",
+                });
+                getTotalUsage = () => {
+                    const usage = userSimulator.getUsage();
+                    const judgeUsage = judge.getUsage();
+                    usage.prompt_tokens += judgeUsage.prompt_tokens;
+                    usage.completion_tokens += judgeUsage.completion_tokens;
+                    usage.total_tokens += judgeUsage.total_tokens;
+                    return usage;
                 };
-            };
-            /** Check if this run has been cancelled (timeout). */
-            const checkCancelled = () => {
-                if (signal?.aborted) {
-                    throw new Error("Simulation cancelled");
-                }
-            };
-            checkCancelled();
-            // Generate or use first message
-            const firstMsg = firstMessage
-                ? firstMessage
-                : await userSimulator.generateFirstMessage();
-            messages.push({ role: "user", content: firstMsg });
-            let lastJudgment;
-            for (let turn = 0; turn < maxTurns; turn++) {
-                checkCancelled();
-                const usageBefore = getTotalUsage();
-                // 1. Target agent responds
-                const agentResponse = await this.getTargetResponse(messages.map((m) => ({ role: m.role, content: m.content })));
-                messages.push({ role: "assistant", content: agentResponse });
-                checkCancelled();
-                // 2. Judge evaluates
-                const judgment = await judge.evaluate(messages.map((m) => ({ role: m.role, content: m.content })), { signal });
-                turnMetricsList.push(buildTurnMetrics(turn + 1, judgment, usageBefore));
-                lastJudgment = judgment;
-                if (judgment.should_terminate) {
+                const buildTurnMetrics = (turnNum, judgment, usageBefore) => {
+                    const usageAfter = getTotalUsage();
                     return {
-                        messages,
-                        terminated_by: "judge",
-                        reason: judgment.reason,
-                        goal_achieved: judgment.goal_achieved,
-                        goal_completion_score: judgment.goal_completion_score,
-                        rules_broken: judgment.rules_broken,
-                        turn_count: turn + 1,
-                        turn_metrics: turnMetricsList,
-                        token_usage: getTotalUsage(),
-                        criteria_results: this.buildCriteriaResults(scenario, judgment),
-                        metadata: { persona: persona?.name, scenario: scenario?.name },
+                        turn_number: turnNum,
+                        token_usage: {
+                            prompt_tokens: usageAfter.prompt_tokens - usageBefore.prompt_tokens,
+                            completion_tokens: usageAfter.completion_tokens - usageBefore.completion_tokens,
+                            total_tokens: usageAfter.total_tokens - usageBefore.total_tokens,
+                        },
+                        response_quality: judgment.response_quality ?? null,
+                        hallucination_risk: judgment.hallucination_risk ?? null,
+                        tone_appropriateness: judgment.tone_appropriateness ?? null,
+                        factual_accuracy: judgment.factual_accuracy ?? null,
+                        judge_reason: judgment.reason,
                     };
-                }
-                // 3. User simulator continues (if not last turn)
-                if (turn < maxTurns - 1) {
+                };
+                /** Check if this run has been cancelled (timeout). */
+                const checkCancelled = () => {
+                    if (signal?.aborted) {
+                        throw new Error("Simulation cancelled");
+                    }
+                };
+                checkCancelled();
+                // Generate or use first message
+                const firstMsg = firstMessage
+                    ? firstMessage
+                    : await withSimulationSpan("orq.simulation.first_message_generation", {
+                        "orq.simulation.persona": persona?.name,
+                        "orq.simulation.scenario": scenario?.name,
+                        "orq.simulation.model": this.model,
+                    }, async () => userSimulator.generateFirstMessage());
+                messages.push({ role: "user", content: firstMsg });
+                let lastJudgment;
+                for (let turn = 0; turn < maxTurns; turn++) {
                     checkCancelled();
-                    const userResponse = await userSimulator.respondAsync(messages.map((m) => ({ role: m.role, content: m.content })), { signal });
-                    messages.push({ role: "user", content: userResponse });
+                    const usageBefore = getTotalUsage();
+                    await withSimulationSpan("orq.simulation.turn", {
+                        "orq.simulation.turn": turn + 1,
+                        "orq.simulation.max_turns": maxTurns,
+                    }, async (turnSpan) => {
+                        // 1. Target agent responds
+                        const targetMessages = messages.map((m) => ({
+                            role: m.role,
+                            content: m.content,
+                        }));
+                        const agentResponse = await withSimulationSpan("orq.simulation.target_call", undefined, async (targetSpan) => {
+                            recordLLMInput(targetSpan, targetMessages);
+                            const response = await this.getTargetResponse(targetMessages);
+                            recordLLMOutput(targetSpan, response);
+                            return response;
+                        });
+                        messages.push({ role: "assistant", content: agentResponse });
+                        checkCancelled();
+                        // 2. Judge evaluates
+                        const judgment = await withSimulationSpan("orq.simulation.judge_evaluation", undefined, async () => judge.evaluate(messages.map((m) => ({
+                            role: m.role,
+                            content: m.content,
+                        })), { signal }));
+                        turnMetricsList.push(buildTurnMetrics(turn + 1, judgment, usageBefore));
+                        lastJudgment = judgment;
+                        setSpanAttrs(turnSpan, {
+                            "orq.simulation.goal_achieved": judgment.goal_achieved,
+                            "orq.simulation.goal_completion_score": judgment.goal_completion_score,
+                            "orq.simulation.should_terminate": judgment.should_terminate,
+                        });
+                        if (!judgment.should_terminate && turn < maxTurns - 1) {
+                            // 3. User simulator continues
+                            checkCancelled();
+                            const userResponse = await withSimulationSpan("orq.simulation.user_simulator_call", undefined, async () => userSimulator.respondAsync(messages.map((m) => ({
+                                role: m.role,
+                                content: m.content,
+                            })), { signal, llmPurpose: "user_simulator" }));
+                            messages.push({ role: "user", content: userResponse });
+                        }
+                    });
+                    // Check if judge terminated after the turn span completes
+                    if (lastJudgment?.should_terminate) {
+                        const finalUsage = getTotalUsage();
+                        recordTokenUsage(runSpan, {
+                            promptTokens: finalUsage.prompt_tokens,
+                            completionTokens: finalUsage.completion_tokens,
+                            totalTokens: finalUsage.total_tokens,
+                        });
+                        setSpanAttrs(runSpan, {
+                            "orq.simulation.terminated_by": "judge",
+                            "orq.simulation.goal_achieved": lastJudgment.goal_achieved,
+                            "orq.simulation.turn_count": turn + 1,
+                        });
+                        return {
+                            messages,
+                            terminated_by: "judge",
+                            reason: lastJudgment.reason,
+                            goal_achieved: lastJudgment.goal_achieved,
+                            goal_completion_score: lastJudgment.goal_completion_score,
+                            rules_broken: lastJudgment.rules_broken,
+                            turn_count: turn + 1,
+                            turn_metrics: turnMetricsList,
+                            token_usage: finalUsage,
+                            criteria_results: this.buildCriteriaResults(scenario, lastJudgment),
+                            metadata: {
+                                persona: persona?.name,
+                                scenario: scenario?.name,
+                            },
+                        };
+                    }
                 }
-            }
-            // Max turns reached — preserve the last judge's assessment instead of
-            // hardcoding goal_achieved: false, so the final evaluation is not lost.
-            return maxTurnsResult(maxTurns, messages, turnMetricsList, getTotalUsage(), persona, scenario, lastJudgment);
+                // Max turns reached
+                const finalUsage = getTotalUsage();
+                recordTokenUsage(runSpan, {
+                    promptTokens: finalUsage.prompt_tokens,
+                    completionTokens: finalUsage.completion_tokens,
+                    totalTokens: finalUsage.total_tokens,
+                });
+                setSpanAttrs(runSpan, {
+                    "orq.simulation.terminated_by": "max_turns",
+                    "orq.simulation.goal_achieved": lastJudgment?.goal_achieved ?? false,
+                    "orq.simulation.turn_count": maxTurns,
+                });
+                return maxTurnsResult(maxTurns, messages, turnMetricsList, finalUsage, persona, scenario, lastJudgment);
+            });
         }
         catch (e) {
             console.error("SimulationRunner.run() failed:", e);

package/dist/lib/integrations/simulation/simulation/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/simulation/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;~~AAQH~~,OAAO,KAAK,EACV,WAAW,EACX,SAAS,EACT,OAAO,EACP,QAAQ,EACR,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAOrB,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IACvB,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,gBAAgB,EAAE,CAAC,~~CAkH7B~~;~~AAMD~~,MAAM,WAAW,yBAAyB;IACxC,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,wBAAsB,mBAAmB,CACvC,MAAM,EAAE,yBAAyB,GAChC,OAAO,CAAC,gBAAgB,EAAE,CAAC,~~CAgF7B~~;AAGD,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,qBAAqB,GACtB,MAAM,wBAAwB,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/simulation/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAcH,OAAO,KAAK,EACV,WAAW,EACX,SAAS,EACT,OAAO,EACP,QAAQ,EACR,gBAAgB,EACjB,MAAM,aAAa,CAAC;AAOrB,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IACvB,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAkB7B;AA2JD,MAAM,WAAW,yBAAyB;IACxC,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;GAIG;AACH,wBAAsB,mBAAmB,CACvC,MAAM,EAAE,yBAAyB,GAChC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAuG7B;AAGD,OAAO,EACL,gBAAgB,EAChB,YAAY,EACZ,qBAAqB,GACtB,MAAM,wBAAwB,CAAC"}

package/dist/lib/integrations/simulation/simulation/index.js CHANGED Viewed

@@ -5,10 +5,12 @@
  * either standalone or within the evaluatorq framework.
  */
 import OpenAI from "openai";
-import { fromOrqDeployment } from "../adapters.js";
+import { flushTracing, initTracingIfNeeded } from "../../../tracing/setup.js";
+import { fromOrqAgent } from "../adapters.js";
 import { getEvaluator } from "../evaluators/index.js";
 import { FirstMessageGenerator } from "../generators/first-message-generator.js";
 import { SimulationRunner } from "../runner/simulation.js";
+import { recordTokenUsage, setSpanAttrs, withSimulationSpan, } from "../tracing.js";
 import { generateDatapoint } from "../utils/prompt-builders.js";
 /**
  * High-level function to run agent simulations.
@@ -20,6 +22,24 @@ import { generateDatapoint } from "../utils/prompt-builders.js";
  * - Applying evaluators to results
  */
 export async function simulate(params) {
+    // Initialize OTel tracing (no-op if already initialized or not configured)
+    await initTracingIfNeeded();
+    try {
+        return await withSimulationSpan("orq.simulation.pipeline", {
+            "orq.simulation.evaluation_name": params.evaluationName,
+            "orq.simulation.max_turns": params.maxTurns ?? 10,
+            "orq.simulation.parallelism": params.parallelism ?? 5,
+        }, (pipelineSpan) => _simulateCore(params, pipelineSpan));
+    }
+    finally {
+        // Flush pending spans to ensure they're exported before the process exits
+        await flushTracing();
+    }
+}
+// ---------------------------------------------------------------------------
+// Core simulation logic (shared by simulate and generateAndSimulate)
+// ---------------------------------------------------------------------------
+async function _simulateCore(params, pipelineSpan) {
     const { targetCallback, personas, scenarios, maxTurns = 10, model = "azure/gpt-4o-mini", evaluators: evaluatorNames, parallelism = 5, } = params;
     let { datapoints } = params;
     // Validate evaluator names early — throw on unknown names
@@ -69,10 +89,13 @@ export async function simulate(params) {
     if (!datapoints || datapoints.length === 0) {
         throw new Error("No datapoints to simulate — persona or scenario generation may have failed");
     }
+    setSpanAttrs(pipelineSpan, {
+        "orq.simulation.datapoints_count": datapoints.length,
+    });
     // Bridge agentKey to invoke() if no callback is provided
     let resolvedCallback = targetCallback;
     if (!resolvedCallback && params.agentKey) {
-        resolvedCallback = fromOrqDeployment(params.agentKey);
+        resolvedCallback = fromOrqAgent(params.agentKey);
     }
     if (!resolvedCallback) {
         throw new Error("Either targetCallback or agentKey is required");
@@ -98,6 +121,21 @@ export async function simulate(params) {
             }
             result.metadata.evaluator_scores = scores;
         }
+        // Record aggregate token usage on the pipeline span
+        const totalUsage = results.reduce((acc, r) => ({
+            prompt: acc.prompt + (r.token_usage?.prompt_tokens ?? 0),
+            completion: acc.completion + (r.token_usage?.completion_tokens ?? 0),
+            total: acc.total + (r.token_usage?.total_tokens ?? 0),
+        }), { prompt: 0, completion: 0, total: 0 });
+        recordTokenUsage(pipelineSpan, {
+            promptTokens: totalUsage.prompt,
+            completionTokens: totalUsage.completion,
+            totalTokens: totalUsage.total,
+        });
+        setSpanAttrs(pipelineSpan, {
+            "orq.simulation.results_count": results.length,
+            "orq.simulation.goal_achieved_count": results.filter((r) => r.goal_achieved).length,
+        });
         return results;
     }
     finally {
@@ -110,11 +148,13 @@ export async function simulate(params) {
  * Convenience function that combines generation and simulation.
  */
 export async function generateAndSimulate(params) {
+    // Initialize tracing early so generation spans are captured
+    await initTracingIfNeeded();
     const { evaluationName, agentDescription, targetCallback, numPersonas = 5, numScenarios = 5, maxTurns = 10, model = "azure/gpt-4o-mini", evaluators, parallelism = 5, } = params;
     // Bridge agentKey to invoke() if no callback is provided
     let resolvedCallback = targetCallback;
     if (!resolvedCallback && params.agentKey) {
-        resolvedCallback = fromOrqDeployment(params.agentKey);
+        resolvedCallback = fromOrqAgent(params.agentKey);
     }
     if (!resolvedCallback) {
         throw new Error("Either targetCallback or agentKey is required for generateAndSimulate");
@@ -130,30 +170,44 @@ export async function generateAndSimulate(params) {
     catch (err) {
         throw new Error("Generators module not available. Install generators or provide pre-built datapoints using simulate() instead.", { cause: err });
     }
-    // Generate personas and scenarios in parallel
-    const personaGen = new PersonaGenerator({ model });
-    const scenarioGen = new ScenarioGenerator({ model });
-    const [personas, scenarios] = await Promise.all([
-        personaGen.generate({
-            agentDescription,
-            numPersonas,
-        }),
-        scenarioGen.generate({
-            agentDescription,
-            numScenarios,
-        }),
-    ]);
-    // Run simulations
-    return simulate({
-        evaluationName,
-        targetCallback: resolvedCallback,
-        personas,
-        scenarios,
-        maxTurns,
-        model,
-        evaluators,
-        parallelism,
-    });
+    try {
+        return await withSimulationSpan("orq.simulation.pipeline", {
+            "orq.simulation.evaluation_name": evaluationName,
+            "orq.simulation.mode": "generate_and_simulate",
+            "orq.simulation.num_personas": numPersonas,
+            "orq.simulation.num_scenarios": numScenarios,
+            "orq.simulation.max_turns": maxTurns,
+            "orq.simulation.parallelism": parallelism,
+        }, async (pipelineSpan) => {
+            // Generate personas and scenarios in parallel (under the pipeline span)
+            const personaGen = new PersonaGenerator({ model });
+            const scenarioGen = new ScenarioGenerator({ model });
+            const [personas, scenarios] = await Promise.all([
+                personaGen.generate({
+                    agentDescription,
+                    numPersonas,
+                }),
+                scenarioGen.generate({
+                    agentDescription,
+                    numScenarios,
+                }),
+            ]);
+            // Delegate to core logic (no duplicate pipeline span)
+            return _simulateCore({
+                evaluationName,
+                targetCallback: resolvedCallback,
+                personas,
+                scenarios,
+                maxTurns,
+                model,
+                evaluators,
+                parallelism,
+            }, pipelineSpan);
+        });
+    }
+    finally {
+        await flushTracing();
+    }
 }
 // Re-export evaluator utilities for convenience
 export { getAllEvaluators, getEvaluator, SIMULATION_EVALUATORS, } from "../evaluators/index.js";

package/dist/lib/integrations/simulation/tracing.d.ts ADDED Viewed

@@ -0,0 +1,111 @@
+/**
+ * OpenTelemetry tracing utilities for the agent simulation module.
+ *
+ * Provides span creation helpers that mirror the redteam module's tracing
+ * patterns, adapted for the TypeScript simulation module. All functions
+ * gracefully degrade to no-ops when tracing is not enabled.
+ *
+ * Span hierarchy:
+ *   orq.simulation.pipeline (root)
+ *     ├── orq.simulation.persona_generation
+ *     ├── orq.simulation.scenario_generation
+ *     ├── orq.simulation.run (per datapoint)
+ *     │   ├── orq.simulation.first_message_generation
+ *     │   └── orq.simulation.turn (per turn)
+ *     │       ├── orq.simulation.target_call
+ *     │       ├── orq.simulation.judge_evaluation
+ *     │       └── orq.simulation.user_simulator_call
+ */
+import type { Span } from "@opentelemetry/api";
+/**
+ * Execute a function within a simulation span (SpanKind.INTERNAL).
+ *
+ * Gracefully returns `fn(undefined)` when tracing is not enabled.
+ * Automatically records errors and sets span status.
+ */
+export declare function withSimulationSpan<T>(name: string, attributes: Record<string, string | number | boolean | undefined> | undefined, fn: (span: Span | undefined) => Promise<T>): Promise<T>;
+export interface LLMSpanOptions {
+    model: string;
+    operation?: string;
+    provider?: string;
+    temperature?: number;
+    maxTokens?: number;
+    purpose?: string;
+}
+/**
+ * Execute a function within a GenAI LLM span (SpanKind.CLIENT).
+ *
+ * Follows OTel GenAI semantic conventions for client inference spans.
+ * Span name is derived as `"{operation} {model}"`.
+ */
+export declare function withLLMSpan<T>(options: LLMSpanOptions, fn: (span: Span | undefined) => Promise<T>): Promise<T>;
+export interface TokenUsageAttrs {
+    promptTokens?: number;
+    completionTokens?: number;
+    totalTokens?: number;
+    cacheReadInputTokens?: number;
+    cacheCreationInputTokens?: number;
+}
+/**
+ * Record token usage attributes on a span.
+ *
+ * Sets both OTel GenAI names and bare attribute keys for platform
+ * compatibility (matches the redteam module's dual-naming convention).
+ */
+export declare function recordTokenUsage(span: Span | undefined, usage: TokenUsageAttrs): void;
+/**
+ * Record LLM input messages on a span.
+ *
+ * Sets both `gen_ai.input.messages` (OTel GenAI convention) and `input`
+ * (platform fallback), matching the redteam module's dual-attribute pattern.
+ * Suppressed when `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=false`.
+ */
+export declare function recordLLMInput(span: Span | undefined, messages: Array<{
+    role: string;
+    content: string;
+}>): void;
+/**
+ * Record a single LLM output string on a span.
+ *
+ * Sets `gen_ai.output.messages` and `output` (platform fallback). Suppressed
+ * when `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=false`.
+ */
+export declare function recordLLMOutput(span: Span | undefined, output: string): void;
+/**
+ * Record LLM response attributes on a span from an OpenAI-compatible response.
+ *
+ * Sets `gen_ai.output.messages` and `output` with the response content,
+ * plus token usage, finish reasons, and response metadata.
+ */
+export declare function recordLLMResponse(span: Span | undefined, response: {
+    id?: string;
+    model?: string;
+    usage?: {
+        prompt_tokens: number;
+        completion_tokens: number;
+        total_tokens: number;
+        prompt_tokens_details?: {
+            cached_tokens?: number;
+        } | null;
+    } | null;
+    choices?: Array<{
+        finish_reason?: string | null;
+        message?: {
+            role?: string;
+            content?: string | null;
+        };
+    }>;
+}): void;
+/**
+ * Batch set multiple attributes on a span. Skips undefined values.
+ */
+export declare function setSpanAttrs(span: Span | undefined, attrs: Record<string, string | number | boolean | undefined>): void;
+/**
+ * Get W3C trace context headers (traceparent/tracestate) for the current
+ * active span. Returns an empty object when tracing is not available.
+ *
+ * Used to propagate trace context into outgoing HTTP requests so the
+ * router can create child spans under the current simulation span.
+ */
+export declare function getTraceContextHeaders(): Promise<Record<string, string>>;
+//# sourceMappingURL=tracing.d.ts.map

package/dist/lib/integrations/simulation/tracing.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"tracing.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/tracing.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAQ/C;;;;;GAKG;AACH,wBAAsB,kBAAkB,CAAC,CAAC,EACxC,IAAI,EAAE,MAAM,EACZ,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC,GAAG,SAAS,EAC7E,EAAE,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG,SAAS,KAAK,OAAO,CAAC,CAAC,CAAC,GACzC,OAAO,CAAC,CAAC,CAAC,CAgDZ;AAMD,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;GAKG;AACH,wBAAsB,WAAW,CAAC,CAAC,EACjC,OAAO,EAAE,cAAc,EACvB,EAAE,EAAE,CAAC,IAAI,EAAE,IAAI,GAAG,SAAS,KAAK,OAAO,CAAC,CAAC,CAAC,GACzC,OAAO,CAAC,CAAC,CAAC,CA4DZ;AAMD,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,wBAAwB,CAAC,EAAE,MAAM,CAAC;CACnC;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,GAAG,SAAS,EACtB,KAAK,EAAE,eAAe,GACrB,IAAI,CAiCN;AAgCD;;;;;;GAMG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,IAAI,GAAG,SAAS,EACtB,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,GACjD,IAAI,CAON;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,IAAI,GAAG,SAAS,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAS5E;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,IAAI,GAAG,SAAS,EACtB,QAAQ,EAAE;IACR,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QACN,aAAa,EAAE,MAAM,CAAC;QACtB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,YAAY,EAAE,MAAM,CAAC;QACrB,qBAAqB,CAAC,EAAE;YAAE,aAAa,CAAC,EAAE,MAAM,CAAA;SAAE,GAAG,IAAI,CAAC;KAC3D,GAAG,IAAI,CAAC;IACT,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC9B,OAAO,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;SAAE,CAAC;KACtD,CAAC,CAAC;CACJ,GACA,IAAI,CAwCN;AAMD;;GAEG;AACH,wBAAgB,YAAY,CAC1B,IAAI,EAAE,IAAI,GAAG,SAAS,EACtB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,SAAS,CAAC,GAC3D,IAAI,CAON;AAED;;;;;;GAMG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CACrD,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CACvB,CASA"}