@orq-ai/evaluatorq 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/lib/integrations/simulation/adapters.d.ts +28 -5
  2. package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -1
  3. package/dist/lib/integrations/simulation/adapters.js +113 -7
  4. package/dist/lib/integrations/simulation/agents/base.d.ts +3 -0
  5. package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -1
  6. package/dist/lib/integrations/simulation/agents/base.js +104 -82
  7. package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -1
  8. package/dist/lib/integrations/simulation/agents/judge.js +1 -0
  9. package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -1
  10. package/dist/lib/integrations/simulation/agents/user-simulator.js +4 -1
  11. package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -1
  12. package/dist/lib/integrations/simulation/generators/first-message-generator.js +51 -28
  13. package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -1
  14. package/dist/lib/integrations/simulation/generators/persona-generator.js +144 -102
  15. package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -1
  16. package/dist/lib/integrations/simulation/generators/scenario-generator.js +274 -169
  17. package/dist/lib/integrations/simulation/index.d.ts +1 -1
  18. package/dist/lib/integrations/simulation/index.d.ts.map +1 -1
  19. package/dist/lib/integrations/simulation/index.js +1 -1
  20. package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -1
  21. package/dist/lib/integrations/simulation/runner/simulation.js +147 -85
  22. package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -1
  23. package/dist/lib/integrations/simulation/simulation/index.js +81 -27
  24. package/dist/lib/integrations/simulation/tracing.d.ts +111 -0
  25. package/dist/lib/integrations/simulation/tracing.d.ts.map +1 -0
  26. package/dist/lib/integrations/simulation/tracing.js +310 -0
  27. package/dist/lib/integrations/simulation/wrap-agent.js +2 -2
  28. package/dist/tsconfig.lib.tsbuildinfo +1 -1
  29. package/package.json +1 -1
@@ -0,0 +1,310 @@
1
+ /**
2
+ * OpenTelemetry tracing utilities for the agent simulation module.
3
+ *
4
+ * Provides span creation helpers that mirror the redteam module's tracing
5
+ * patterns, adapted for the TypeScript simulation module. All functions
6
+ * gracefully degrade to no-ops when tracing is not enabled.
7
+ *
8
+ * Span hierarchy:
9
+ * orq.simulation.pipeline (root)
10
+ * ├── orq.simulation.persona_generation
11
+ * ├── orq.simulation.scenario_generation
12
+ * ├── orq.simulation.run (per datapoint)
13
+ * │ ├── orq.simulation.first_message_generation
14
+ * │ └── orq.simulation.turn (per turn)
15
+ * │ ├── orq.simulation.target_call
16
+ * │ ├── orq.simulation.judge_evaluation
17
+ * │ └── orq.simulation.user_simulator_call
18
+ */
19
+ import { getTracer } from "../../tracing/setup.js";
20
+ // ---------------------------------------------------------------------------
21
+ // Internal span: orq.simulation.*
22
+ // ---------------------------------------------------------------------------
23
+ /**
24
+ * Execute a function within a simulation span (SpanKind.INTERNAL).
25
+ *
26
+ * Gracefully returns `fn(undefined)` when tracing is not enabled.
27
+ * Automatically records errors and sets span status.
28
+ */
29
+ export async function withSimulationSpan(name, attributes, fn) {
30
+ const tracer = getTracer();
31
+ if (!tracer) {
32
+ return fn(undefined);
33
+ }
34
+ let SpanStatusCode;
35
+ try {
36
+ ({ SpanStatusCode } = await import("@opentelemetry/api"));
37
+ }
38
+ catch {
39
+ // OTEL not available, run without span
40
+ return fn(undefined);
41
+ }
42
+ const cleanAttrs = {};
43
+ if (attributes) {
44
+ for (const [k, v] of Object.entries(attributes)) {
45
+ if (v !== undefined) {
46
+ cleanAttrs[k] = v;
47
+ }
48
+ }
49
+ }
50
+ return tracer.startActiveSpan(name, { attributes: cleanAttrs }, async (span) => {
51
+ try {
52
+ const result = await fn(span);
53
+ span.setStatus({ code: SpanStatusCode.OK });
54
+ return result;
55
+ }
56
+ catch (error) {
57
+ span.setStatus({
58
+ code: SpanStatusCode.ERROR,
59
+ message: error instanceof Error ? error.message : String(error),
60
+ });
61
+ span.recordException(error instanceof Error ? error : new Error(String(error)));
62
+ if (error instanceof Error) {
63
+ span.setAttribute("error.type", error.constructor.name);
64
+ }
65
+ throw error;
66
+ }
67
+ finally {
68
+ span.end();
69
+ }
70
+ });
71
+ }
72
+ /**
73
+ * Execute a function within a GenAI LLM span (SpanKind.CLIENT).
74
+ *
75
+ * Follows OTel GenAI semantic conventions for client inference spans.
76
+ * Span name is derived as `"{operation} {model}"`.
77
+ */
78
+ export async function withLLMSpan(options, fn) {
79
+ const tracer = getTracer();
80
+ if (!tracer) {
81
+ return fn(undefined);
82
+ }
83
+ let SpanKind;
84
+ let SpanStatusCode;
85
+ try {
86
+ ({ SpanKind, SpanStatusCode } = await import("@opentelemetry/api"));
87
+ }
88
+ catch {
89
+ return fn(undefined);
90
+ }
91
+ const operation = options.operation ?? "chat";
92
+ const provider = options.provider ?? deriveProvider(options.model);
93
+ const spanName = `${operation} ${options.model}`;
94
+ const attrs = {
95
+ "gen_ai.operation.name": operation,
96
+ "gen_ai.system": provider,
97
+ "gen_ai.provider.name": provider,
98
+ "gen_ai.request.model": options.model,
99
+ };
100
+ if (options.temperature !== undefined) {
101
+ attrs["gen_ai.request.temperature"] = options.temperature;
102
+ }
103
+ if (options.maxTokens !== undefined) {
104
+ attrs["gen_ai.request.max_tokens"] = options.maxTokens;
105
+ }
106
+ if (options.purpose) {
107
+ attrs["orq.simulation.llm_purpose"] = options.purpose;
108
+ }
109
+ return tracer.startActiveSpan(spanName, { kind: SpanKind.CLIENT, attributes: attrs }, async (span) => {
110
+ try {
111
+ const result = await fn(span);
112
+ span.setStatus({ code: SpanStatusCode.OK });
113
+ return result;
114
+ }
115
+ catch (error) {
116
+ span.setStatus({
117
+ code: SpanStatusCode.ERROR,
118
+ message: error instanceof Error ? error.message : String(error),
119
+ });
120
+ span.recordException(error instanceof Error ? error : new Error(String(error)));
121
+ if (error instanceof Error) {
122
+ span.setAttribute("error.type", error.constructor.name);
123
+ }
124
+ throw error;
125
+ }
126
+ finally {
127
+ span.end();
128
+ }
129
+ });
130
+ }
131
+ /**
132
+ * Record token usage attributes on a span.
133
+ *
134
+ * Sets both OTel GenAI names and bare attribute keys for platform
135
+ * compatibility (matches the redteam module's dual-naming convention).
136
+ */
137
+ export function recordTokenUsage(span, usage) {
138
+ if (!span)
139
+ return;
140
+ const prompt = usage.promptTokens ?? 0;
141
+ const completion = usage.completionTokens ?? 0;
142
+ const total = usage.totalTokens ?? prompt + completion;
143
+ // OTel GenAI semantic convention names
144
+ span.setAttribute("gen_ai.usage.input_tokens", prompt);
145
+ span.setAttribute("gen_ai.usage.output_tokens", completion);
146
+ span.setAttribute("gen_ai.usage.total_tokens", total);
147
+ if (usage.cacheReadInputTokens !== undefined) {
148
+ span.setAttribute("gen_ai.usage.cache_read.input_tokens", usage.cacheReadInputTokens);
149
+ }
150
+ if (usage.cacheCreationInputTokens !== undefined) {
151
+ span.setAttribute("gen_ai.usage.cache_creation.input_tokens", usage.cacheCreationInputTokens);
152
+ }
153
+ // Aliases for platform compatibility
154
+ span.setAttribute("gen_ai.usage.prompt_tokens", prompt);
155
+ span.setAttribute("gen_ai.usage.completion_tokens", completion);
156
+ span.setAttribute("prompt_tokens", prompt);
157
+ span.setAttribute("completion_tokens", completion);
158
+ span.setAttribute("input_tokens", prompt);
159
+ span.setAttribute("output_tokens", completion);
160
+ span.setAttribute("total_tokens", total);
161
+ }
162
+ // Max content length per message to avoid oversized spans (matches redteam)
163
+ const MAX_CONTENT_LEN = 2000;
164
+ function truncate(text) {
165
+ if (text.length <= MAX_CONTENT_LEN)
166
+ return text;
167
+ return `${text.slice(0, MAX_CONTENT_LEN)}…`;
168
+ }
169
+ /**
170
+ * Serialize an array of chat messages to JSON for span attributes.
171
+ */
172
+ function serializeMessages(messages) {
173
+ return JSON.stringify(messages.map((m) => ({ role: m.role, content: truncate(m.content) })));
174
+ }
175
+ /**
176
+ * The OTel GenAI semconv classifies `gen_ai.input.messages` and
177
+ * `gen_ai.output.messages` as opt-in because they may carry PII. Honor the
178
+ * spec env var; default to enabled for the platform UI to keep working.
179
+ */
180
+ function captureMessageContent() {
181
+ const flag = process.env.OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT;
182
+ if (flag === undefined)
183
+ return true;
184
+ return flag.toLowerCase() === "true" || flag === "1";
185
+ }
186
+ /**
187
+ * Record LLM input messages on a span.
188
+ *
189
+ * Sets both `gen_ai.input.messages` (OTel GenAI convention) and `input`
190
+ * (platform fallback), matching the redteam module's dual-attribute pattern.
191
+ * Suppressed when `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=false`.
192
+ */
193
+ export function recordLLMInput(span, messages) {
194
+ if (!span || messages.length === 0)
195
+ return;
196
+ if (!captureMessageContent())
197
+ return;
198
+ const serialized = serializeMessages(messages);
199
+ span.setAttribute("gen_ai.input.messages", serialized);
200
+ span.setAttribute("input", serialized);
201
+ }
202
+ /**
203
+ * Record a single LLM output string on a span.
204
+ *
205
+ * Sets `gen_ai.output.messages` and `output` (platform fallback). Suppressed
206
+ * when `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=false`.
207
+ */
208
+ export function recordLLMOutput(span, output) {
209
+ if (!span || !output)
210
+ return;
211
+ if (!captureMessageContent())
212
+ return;
213
+ const serialized = serializeMessages([
214
+ { role: "assistant", content: output },
215
+ ]);
216
+ span.setAttribute("gen_ai.output.messages", serialized);
217
+ span.setAttribute("output", serialized);
218
+ }
219
+ /**
220
+ * Record LLM response attributes on a span from an OpenAI-compatible response.
221
+ *
222
+ * Sets `gen_ai.output.messages` and `output` with the response content,
223
+ * plus token usage, finish reasons, and response metadata.
224
+ */
225
+ export function recordLLMResponse(span, response) {
226
+ if (!span)
227
+ return;
228
+ if (response.id) {
229
+ span.setAttribute("gen_ai.response.id", response.id);
230
+ }
231
+ if (response.model) {
232
+ span.setAttribute("gen_ai.response.model", response.model);
233
+ }
234
+ if (response.usage) {
235
+ recordTokenUsage(span, {
236
+ promptTokens: response.usage.prompt_tokens,
237
+ completionTokens: response.usage.completion_tokens,
238
+ totalTokens: response.usage.total_tokens,
239
+ cacheReadInputTokens: response.usage.prompt_tokens_details?.cached_tokens,
240
+ });
241
+ }
242
+ // Record output content (dual-attribute pattern). Opt-in per GenAI semconv.
243
+ if (captureMessageContent()) {
244
+ const outputMessages = response.choices
245
+ ?.filter((c) => c.message?.content)
246
+ .map((c) => ({
247
+ role: c.message?.role ?? "assistant",
248
+ content: c.message?.content ?? "",
249
+ }));
250
+ if (outputMessages && outputMessages.length > 0) {
251
+ const serialized = serializeMessages(outputMessages);
252
+ span.setAttribute("gen_ai.output.messages", serialized);
253
+ span.setAttribute("output", serialized);
254
+ }
255
+ }
256
+ const finishReasons = response.choices
257
+ ?.map((c) => c.finish_reason)
258
+ .filter((r) => Boolean(r));
259
+ if (finishReasons && finishReasons.length > 0) {
260
+ span.setAttribute("gen_ai.response.finish_reasons", finishReasons);
261
+ }
262
+ }
263
+ // ---------------------------------------------------------------------------
264
+ // Attribute helpers
265
+ // ---------------------------------------------------------------------------
266
+ /**
267
+ * Batch set multiple attributes on a span. Skips undefined values.
268
+ */
269
+ export function setSpanAttrs(span, attrs) {
270
+ if (!span)
271
+ return;
272
+ for (const [key, value] of Object.entries(attrs)) {
273
+ if (value !== undefined) {
274
+ span.setAttribute(key, value);
275
+ }
276
+ }
277
+ }
278
+ /**
279
+ * Get W3C trace context headers (traceparent/tracestate) for the current
280
+ * active span. Returns an empty object when tracing is not available.
281
+ *
282
+ * Used to propagate trace context into outgoing HTTP requests so the
283
+ * router can create child spans under the current simulation span.
284
+ */
285
+ export async function getTraceContextHeaders() {
286
+ try {
287
+ const { context, propagation } = await import("@opentelemetry/api");
288
+ const headers = {};
289
+ propagation.inject(context.active(), headers);
290
+ return headers;
291
+ }
292
+ catch {
293
+ return {};
294
+ }
295
+ }
296
+ // ---------------------------------------------------------------------------
297
+ // Helpers
298
+ // ---------------------------------------------------------------------------
299
+ // OTel GenAI semconv `gen_ai.system` enum values. The router uses prefixes
300
+ // like "azure/" that don't map 1:1 to the spec — translate the known ones.
301
+ const PROVIDER_ALIASES = {
302
+ azure: "azure.ai.openai",
303
+ };
304
+ function deriveProvider(model) {
305
+ if (model.includes("/")) {
306
+ const prefix = model.split("/")[0];
307
+ return PROVIDER_ALIASES[prefix] ?? prefix;
308
+ }
309
+ return "openai";
310
+ }
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * Follows the same pattern as wrapAISdkAgent() and wrapLangChainAgent().
5
5
  */
6
- import { fromOrqDeployment } from "./adapters.js";
6
+ import { fromOrqAgent } from "./adapters.js";
7
7
  import { toOpenResponses } from "./convert.js";
8
8
  import { simulate } from "./simulation/index.js";
9
9
  /**
@@ -51,7 +51,7 @@ export function wrapSimulationAgent(options) {
51
51
  // Resolve the target callback
52
52
  let resolvedCallback = targetCallback;
53
53
  if (!resolvedCallback && agentKey) {
54
- resolvedCallback = fromOrqDeployment(agentKey);
54
+ resolvedCallback = fromOrqAgent(agentKey);
55
55
  }
56
56
  if (!resolvedCallback) {
57
57
  throw new Error("wrapSimulationAgent requires either targetCallback or agentKey");