@orq-ai/evaluatorq 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/integrations/simulation/adapters.d.ts +28 -5
- package/dist/lib/integrations/simulation/adapters.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/adapters.js +113 -7
- package/dist/lib/integrations/simulation/agents/base.d.ts +3 -0
- package/dist/lib/integrations/simulation/agents/base.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/agents/base.js +104 -82
- package/dist/lib/integrations/simulation/agents/judge.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/agents/judge.js +1 -0
- package/dist/lib/integrations/simulation/agents/user-simulator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/agents/user-simulator.js +4 -1
- package/dist/lib/integrations/simulation/generators/first-message-generator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/generators/first-message-generator.js +51 -28
- package/dist/lib/integrations/simulation/generators/persona-generator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/generators/persona-generator.js +144 -102
- package/dist/lib/integrations/simulation/generators/scenario-generator.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/generators/scenario-generator.js +274 -169
- package/dist/lib/integrations/simulation/index.d.ts +1 -1
- package/dist/lib/integrations/simulation/index.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/index.js +1 -1
- package/dist/lib/integrations/simulation/runner/simulation.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/runner/simulation.js +147 -85
- package/dist/lib/integrations/simulation/simulation/index.d.ts.map +1 -1
- package/dist/lib/integrations/simulation/simulation/index.js +81 -27
- package/dist/lib/integrations/simulation/tracing.d.ts +111 -0
- package/dist/lib/integrations/simulation/tracing.d.ts.map +1 -0
- package/dist/lib/integrations/simulation/tracing.js +310 -0
- package/dist/lib/integrations/simulation/wrap-agent.js +2 -2
- package/dist/tsconfig.lib.tsbuildinfo +1 -1
- package/package.json +1 -1
|
@@ -8,15 +8,14 @@ import type { ChatMessage } from "./types.js";
|
|
|
8
8
|
/**
|
|
9
9
|
* Creates a simulation `targetCallback` from an Orq deployment key.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* so you can compose it freely.
|
|
11
|
+
* Uses the deployments API (`client.deployments.invoke()`).
|
|
12
|
+
* For agents, use {@link fromOrqAgent} instead.
|
|
14
13
|
*
|
|
15
14
|
* @example
|
|
16
15
|
* ```typescript
|
|
17
16
|
* import { fromOrqDeployment, simulate } from "@orq-ai/evaluatorq/simulation";
|
|
18
17
|
*
|
|
19
|
-
* const callback = fromOrqDeployment("my-
|
|
18
|
+
* const callback = fromOrqDeployment("my-deployment-key");
|
|
20
19
|
*
|
|
21
20
|
* const results = await simulate({
|
|
22
21
|
* evaluationName: "my-sim",
|
|
@@ -26,7 +25,31 @@ import type { ChatMessage } from "./types.js";
|
|
|
26
25
|
* });
|
|
27
26
|
* ```
|
|
28
27
|
*/
|
|
29
|
-
export declare function fromOrqDeployment(
|
|
28
|
+
export declare function fromOrqDeployment(deploymentKey: string): (messages: ChatMessage[]) => Promise<string>;
|
|
29
|
+
/**
|
|
30
|
+
* Creates a simulation `targetCallback` from an Orq agent key.
|
|
31
|
+
*
|
|
32
|
+
* Uses the agents streaming API to get synchronous responses.
|
|
33
|
+
* Propagates OTel trace context so agent-side LLM spans appear
|
|
34
|
+
* under the simulation's `target_call` span.
|
|
35
|
+
*
|
|
36
|
+
* This is the adapter used internally when you pass `agentKey` to `simulate()`.
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* ```typescript
|
|
40
|
+
* import { fromOrqAgent, simulate } from "@orq-ai/evaluatorq/simulation";
|
|
41
|
+
*
|
|
42
|
+
* const callback = fromOrqAgent("my-agent-key");
|
|
43
|
+
*
|
|
44
|
+
* const results = await simulate({
|
|
45
|
+
* evaluationName: "my-sim",
|
|
46
|
+
* targetCallback: callback,
|
|
47
|
+
* personas: [...],
|
|
48
|
+
* scenarios: [...],
|
|
49
|
+
* });
|
|
50
|
+
* ```
|
|
51
|
+
*/
|
|
52
|
+
export declare function fromOrqAgent(agentKey: string): (messages: ChatMessage[]) => Promise<string>;
|
|
30
53
|
/**
|
|
31
54
|
* Creates a simulation `targetCallback` from a plain function that calls
|
|
32
55
|
* an OpenAI-compatible chat completions API.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/adapters.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;
|
|
1
|
+
{"version":3,"file":"adapters.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/adapters.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,iBAAiB,CAC/B,aAAa,EAAE,MAAM,GACpB,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAS9C;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,EAAE,MAAM,GACf,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAuG9C;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,mBAAmB,CACjC,EAAE,EAAE,CACF,QAAQ,EAAE,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,KAC/C,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,GAC5B,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,OAAO,CAAC,MAAM,CAAC,CAI9C"}
|
|
@@ -4,18 +4,18 @@
|
|
|
4
4
|
* These helpers create `targetCallback` functions from common agent sources,
|
|
5
5
|
* so users don't need to wire the plumbing themselves.
|
|
6
6
|
*/
|
|
7
|
+
import { randomUUID } from "node:crypto";
|
|
7
8
|
/**
|
|
8
9
|
* Creates a simulation `targetCallback` from an Orq deployment key.
|
|
9
10
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* so you can compose it freely.
|
|
11
|
+
* Uses the deployments API (`client.deployments.invoke()`).
|
|
12
|
+
* For agents, use {@link fromOrqAgent} instead.
|
|
13
13
|
*
|
|
14
14
|
* @example
|
|
15
15
|
* ```typescript
|
|
16
16
|
* import { fromOrqDeployment, simulate } from "@orq-ai/evaluatorq/simulation";
|
|
17
17
|
*
|
|
18
|
-
* const callback = fromOrqDeployment("my-
|
|
18
|
+
* const callback = fromOrqDeployment("my-deployment-key");
|
|
19
19
|
*
|
|
20
20
|
* const results = await simulate({
|
|
21
21
|
* evaluationName: "my-sim",
|
|
@@ -25,13 +25,119 @@
|
|
|
25
25
|
* });
|
|
26
26
|
* ```
|
|
27
27
|
*/
|
|
28
|
-
export function fromOrqDeployment(
|
|
28
|
+
export function fromOrqDeployment(deploymentKey) {
|
|
29
|
+
if (!deploymentKey.trim()) {
|
|
30
|
+
throw new Error("deploymentKey must be a non-empty string");
|
|
31
|
+
}
|
|
32
|
+
return async (messages) => {
|
|
33
|
+
const { invoke } = await import("../../deployment-helper.js");
|
|
34
|
+
return invoke(deploymentKey, { messages });
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Creates a simulation `targetCallback` from an Orq agent key.
|
|
39
|
+
*
|
|
40
|
+
* Uses the agents streaming API to get synchronous responses.
|
|
41
|
+
* Propagates OTel trace context so agent-side LLM spans appear
|
|
42
|
+
* under the simulation's `target_call` span.
|
|
43
|
+
*
|
|
44
|
+
* This is the adapter used internally when you pass `agentKey` to `simulate()`.
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* ```typescript
|
|
48
|
+
* import { fromOrqAgent, simulate } from "@orq-ai/evaluatorq/simulation";
|
|
49
|
+
*
|
|
50
|
+
* const callback = fromOrqAgent("my-agent-key");
|
|
51
|
+
*
|
|
52
|
+
* const results = await simulate({
|
|
53
|
+
* evaluationName: "my-sim",
|
|
54
|
+
* targetCallback: callback,
|
|
55
|
+
* personas: [...],
|
|
56
|
+
* scenarios: [...],
|
|
57
|
+
* });
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export function fromOrqAgent(agentKey) {
|
|
29
61
|
if (!agentKey.trim()) {
|
|
30
62
|
throw new Error("agentKey must be a non-empty string");
|
|
31
63
|
}
|
|
64
|
+
// Cache client across calls to avoid creating a new one per turn
|
|
65
|
+
// biome-ignore lint/suspicious/noExplicitAny: cached client type depends on dynamic import
|
|
66
|
+
let cachedClient = null;
|
|
67
|
+
// Multi-turn continuity: the agent stream API maintains conversation state
|
|
68
|
+
// via task_id. On the first turn, we start a new conversation. On subsequent
|
|
69
|
+
// turns, we pass the task_id from the previous response to continue the
|
|
70
|
+
// conversation. Use first message content to identify conversations.
|
|
71
|
+
const conversationTasks = new Map();
|
|
72
|
+
// Unique per adapter instance — used as map key prefix to prevent cross-simulation
|
|
73
|
+
// key collisions when two simulations share the same opening message content.
|
|
74
|
+
const adapterInstanceId = randomUUID();
|
|
32
75
|
return async (messages) => {
|
|
33
|
-
const
|
|
34
|
-
|
|
76
|
+
const apiKey = process.env.ORQ_API_KEY;
|
|
77
|
+
if (!apiKey) {
|
|
78
|
+
throw new Error("ORQ_API_KEY environment variable must be set to use the agent adapter.");
|
|
79
|
+
}
|
|
80
|
+
if (!cachedClient) {
|
|
81
|
+
const { Orq } = await import("@orq-ai/node");
|
|
82
|
+
const serverURL = process.env.ORQ_BASE_URL || "https://my.orq.ai";
|
|
83
|
+
cachedClient = new Orq({ apiKey, serverURL });
|
|
84
|
+
}
|
|
85
|
+
const firstUserMessage = messages.find((m) => m.role === "user");
|
|
86
|
+
if (!firstUserMessage) {
|
|
87
|
+
throw new Error(`fromOrqAgent: conversation has no user message to send to "${agentKey}".`);
|
|
88
|
+
}
|
|
89
|
+
// Check if this is a continuation of an existing conversation
|
|
90
|
+
// Key prefixed with adapterInstanceId to prevent collisions across simulations
|
|
91
|
+
// with identical opening messages.
|
|
92
|
+
const conversationKey = `${adapterInstanceId}:${firstUserMessage.content}`;
|
|
93
|
+
const existingTaskId = conversationTasks.get(conversationKey);
|
|
94
|
+
// Send only the latest user message; prior turns are reconstructed
|
|
95
|
+
// server-side from the task_id.
|
|
96
|
+
const lastUserMessage = [...messages]
|
|
97
|
+
.reverse()
|
|
98
|
+
.find((m) => m.role === "user");
|
|
99
|
+
const messageText = lastUserMessage?.content ?? "";
|
|
100
|
+
// Propagate OTel trace context so the agent's server-side LLM spans
|
|
101
|
+
// are linked as children of the current simulation span
|
|
102
|
+
let traceHeaders = {};
|
|
103
|
+
try {
|
|
104
|
+
const { getTraceContextHeaders } = await import("./tracing.js");
|
|
105
|
+
traceHeaders = await getTraceContextHeaders();
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
// Tracing not available — continue without propagation
|
|
109
|
+
}
|
|
110
|
+
// Build request with optional taskId for conversation continuation
|
|
111
|
+
const streamRequest = {
|
|
112
|
+
message: {
|
|
113
|
+
role: "user",
|
|
114
|
+
parts: [{ kind: "text", text: messageText }],
|
|
115
|
+
},
|
|
116
|
+
...(existingTaskId && { taskId: existingTaskId }),
|
|
117
|
+
};
|
|
118
|
+
const stream = await cachedClient.agents.stream(streamRequest, agentKey, {
|
|
119
|
+
headers: traceHeaders,
|
|
120
|
+
});
|
|
121
|
+
// Consume stream and extract the final agent message + task ID
|
|
122
|
+
let lastMessage;
|
|
123
|
+
let taskId;
|
|
124
|
+
for await (const event of stream) {
|
|
125
|
+
const data = event.data;
|
|
126
|
+
if (data?.type === "event.agents.inactive") {
|
|
127
|
+
const innerData = data.data;
|
|
128
|
+
lastMessage = innerData?.lastMessage ?? "";
|
|
129
|
+
taskId = innerData?.taskId ?? undefined;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
if (!lastMessage) {
|
|
133
|
+
throw new Error(`Agent stream for "${agentKey}" ended without an event.agents.inactive event. ` +
|
|
134
|
+
"The agent may have errored out server-side.");
|
|
135
|
+
}
|
|
136
|
+
// Store task ID for conversation continuation
|
|
137
|
+
if (taskId) {
|
|
138
|
+
conversationTasks.set(conversationKey, taskId);
|
|
139
|
+
}
|
|
140
|
+
return lastMessage;
|
|
35
141
|
};
|
|
36
142
|
}
|
|
37
143
|
/**
|
|
@@ -57,6 +57,7 @@ export declare abstract class BaseAgent {
|
|
|
57
57
|
maxTokens?: number;
|
|
58
58
|
timeout?: number;
|
|
59
59
|
signal?: AbortSignal;
|
|
60
|
+
llmPurpose?: string;
|
|
60
61
|
}): Promise<string>;
|
|
61
62
|
/**
|
|
62
63
|
* Get cumulative token usage for this agent.
|
|
@@ -85,6 +86,8 @@ export declare abstract class BaseAgent {
|
|
|
85
86
|
tools?: OpenAI.Chat.Completions.ChatCompletionTool[];
|
|
86
87
|
/** External abort signal — aborts in-flight LLM requests immediately. */
|
|
87
88
|
signal?: AbortSignal;
|
|
89
|
+
/** Semantic purpose for OTel span (e.g. "judge", "user_simulator"). */
|
|
90
|
+
llmPurpose?: string;
|
|
88
91
|
}): Promise<LLMResult>;
|
|
89
92
|
}
|
|
90
93
|
//# sourceMappingURL=base.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/agents/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/agents/base.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAQ5B,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAQ3D;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,6BAA6B,EAAE,CAAC;CACtE;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,uDAAuD;IACvD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,2DAA2D;IAC3D,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAUD;;;;;;;;;GASG;AACH,8BAAsB,SAAS;IAC7B,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,KAAK,CAAa;gBAEd,MAAM,CAAC,EAAE,WAAW;IA2BhC,qCAAqC;IACrC,QAAQ,KAAK,IAAI,IAAI,MAAM,CAAC;IAE5B,oCAAoC;IACpC,QAAQ,KAAK,YAAY,IAAI,MAAM,CAAC;IAMpC;;;;;;;OAOG;IACG,YAAY,CAChB,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,CAAC,EAAE;QACR,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,GACA,OAAO,CAAC,MAAM,CAAC;IAkBlB;;OAEG;IACH,QAAQ,IAAI,UAAU;IAItB;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;;;OAIG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAgB5B;;;;;OAKG;cACa,OAAO,CACrB,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,CAAC,EAAE;QACR,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EAAE,CAAC;QACrD,yEAAyE;QACzE,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,uEAAuE;QACvE,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,GACA,OAAO,CAAC,SAAS,CAAC;CAyJtB"}
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* including LLM interaction with retry logic.
|
|
6
6
|
*/
|
|
7
7
|
import OpenAI from "openai";
|
|
8
|
+
import { getTraceContextHeaders, recordLLMInput, recordLLMResponse, withLLMSpan, } from "../tracing.js";
|
|
8
9
|
// Retry configuration
|
|
9
10
|
const MAX_RETRY_ATTEMPTS = 5;
|
|
10
11
|
const RETRY_MIN_WAIT_MS = 2_000;
|
|
@@ -69,6 +70,7 @@ export class BaseAgent {
|
|
|
69
70
|
maxTokens: options?.maxTokens,
|
|
70
71
|
timeout: options?.timeout,
|
|
71
72
|
signal: options?.signal,
|
|
73
|
+
llmPurpose: options?.llmPurpose,
|
|
72
74
|
});
|
|
73
75
|
if (!result.content) {
|
|
74
76
|
throw new Error(`${this.name}: LLM call failed -- no content in response`);
|
|
@@ -114,95 +116,115 @@ export class BaseAgent {
|
|
|
114
116
|
const temperature = options?.temperature ?? 0.7;
|
|
115
117
|
const maxTokens = options?.maxTokens ?? 2048;
|
|
116
118
|
const timeoutS = options?.timeout ?? DEFAULT_TIMEOUT_S;
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
119
|
+
return withLLMSpan({
|
|
120
|
+
model: this.model,
|
|
121
|
+
temperature,
|
|
122
|
+
maxTokens,
|
|
123
|
+
purpose: options?.llmPurpose,
|
|
124
|
+
}, async (span) => {
|
|
125
|
+
const fullMessages = [
|
|
126
|
+
{ role: "system", content: this.systemPrompt },
|
|
127
|
+
...messages.map((m) => ({
|
|
128
|
+
role: m.role,
|
|
129
|
+
content: m.content,
|
|
130
|
+
})),
|
|
131
|
+
];
|
|
132
|
+
// Record input messages on the span for platform UI display
|
|
133
|
+
recordLLMInput(span, fullMessages.map((m) => ({
|
|
134
|
+
role: String(m.role),
|
|
135
|
+
content: typeof m.content === "string" ? m.content : "",
|
|
136
|
+
})));
|
|
137
|
+
// Inject W3C trace context so the router links its spans to the
|
|
138
|
+
// current simulation trace. The active span and trace context don't
|
|
139
|
+
// change across retries, so compute the headers once.
|
|
140
|
+
const traceHeaders = await getTraceContextHeaders();
|
|
141
|
+
let lastError;
|
|
142
|
+
for (let attempt = 1; attempt <= MAX_RETRY_ATTEMPTS; attempt++) {
|
|
136
143
|
try {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
temperature,
|
|
141
|
-
max_tokens: maxTokens,
|
|
142
|
-
};
|
|
143
|
-
if (options?.tools && options.tools.length > 0) {
|
|
144
|
-
params.tools = options.tools;
|
|
145
|
-
params.tool_choice = "auto";
|
|
146
|
-
}
|
|
147
|
-
const response = await this.client.chat.completions.create(params, {
|
|
148
|
-
signal: controller.signal,
|
|
149
|
-
});
|
|
150
|
-
clearTimeout(timer);
|
|
151
|
-
const choice = response.choices[0];
|
|
152
|
-
if (!choice) {
|
|
153
|
-
throw new Error(`${this.name}: No choices in response`);
|
|
144
|
+
// Bail immediately if already cancelled
|
|
145
|
+
if (options?.signal?.aborted) {
|
|
146
|
+
throw new Error("Cancelled");
|
|
154
147
|
}
|
|
155
|
-
const
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
148
|
+
const controller = new AbortController();
|
|
149
|
+
const timer = setTimeout(() => controller.abort(), timeoutS * 1000);
|
|
150
|
+
// Link external signal to this request's controller
|
|
151
|
+
const onAbort = () => controller.abort();
|
|
152
|
+
options?.signal?.addEventListener("abort", onAbort, { once: true });
|
|
153
|
+
try {
|
|
154
|
+
const params = {
|
|
155
|
+
model: this.model,
|
|
156
|
+
messages: fullMessages,
|
|
157
|
+
temperature,
|
|
158
|
+
max_tokens: maxTokens,
|
|
159
|
+
};
|
|
160
|
+
if (options?.tools && options.tools.length > 0) {
|
|
161
|
+
params.tools = options.tools;
|
|
162
|
+
params.tool_choice = "auto";
|
|
163
|
+
}
|
|
164
|
+
const response = await this.client.chat.completions.create(params, {
|
|
165
|
+
signal: controller.signal,
|
|
166
|
+
headers: traceHeaders,
|
|
167
|
+
});
|
|
168
|
+
clearTimeout(timer);
|
|
169
|
+
const choice = response.choices[0];
|
|
170
|
+
if (!choice) {
|
|
171
|
+
throw new Error(`${this.name}: No choices in response`);
|
|
172
|
+
}
|
|
173
|
+
const message = choice.message;
|
|
174
|
+
// Record LLM response on the span (token usage, finish reason, etc.)
|
|
175
|
+
recordLLMResponse(span, response);
|
|
176
|
+
// Accumulate token usage
|
|
177
|
+
if (response.usage) {
|
|
178
|
+
this.usage.prompt_tokens += response.usage.prompt_tokens;
|
|
179
|
+
this.usage.completion_tokens +=
|
|
180
|
+
response.usage.completion_tokens;
|
|
181
|
+
this.usage.total_tokens += response.usage.total_tokens;
|
|
182
|
+
}
|
|
183
|
+
const result = {
|
|
184
|
+
content: message.content ?? "",
|
|
185
|
+
};
|
|
186
|
+
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
187
|
+
result.tool_calls = message.tool_calls;
|
|
188
|
+
}
|
|
189
|
+
return result;
|
|
161
190
|
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
166
|
-
result.tool_calls = message.tool_calls;
|
|
191
|
+
finally {
|
|
192
|
+
clearTimeout(timer);
|
|
193
|
+
options?.signal?.removeEventListener("abort", onAbort);
|
|
167
194
|
}
|
|
168
|
-
return result;
|
|
169
|
-
}
|
|
170
|
-
finally {
|
|
171
|
-
clearTimeout(timer);
|
|
172
|
-
options?.signal?.removeEventListener("abort", onAbort);
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
catch (err) {
|
|
176
|
-
lastError = err;
|
|
177
|
-
// Abort errors (from timeout cancellation) should never be retried
|
|
178
|
-
if (err instanceof Error && err.name === "AbortError") {
|
|
179
|
-
throw err;
|
|
180
|
-
}
|
|
181
|
-
// Determine if retryable
|
|
182
|
-
const isApiError = err instanceof OpenAI.APIError;
|
|
183
|
-
const status = isApiError ? err.status : undefined;
|
|
184
|
-
const isNetworkError = !isApiError &&
|
|
185
|
-
err instanceof Error &&
|
|
186
|
-
"code" in err &&
|
|
187
|
-
typeof err.code === "string" &&
|
|
188
|
-
/^E(CONN|TIMEOUT|NOTFOUND|RESET)/.test(err.code ?? "");
|
|
189
|
-
// Re-throw immediately for external cancellation
|
|
190
|
-
if (options?.signal?.aborted)
|
|
191
|
-
throw err;
|
|
192
|
-
if (!isRetryableStatus(status) && !isNetworkError) {
|
|
193
|
-
throw err;
|
|
194
195
|
}
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
catch (err) {
|
|
197
|
+
lastError = err;
|
|
198
|
+
// Abort errors (from timeout cancellation) should never be retried
|
|
199
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
202
|
+
// Determine if retryable
|
|
203
|
+
const isApiError = err instanceof OpenAI.APIError;
|
|
204
|
+
const status = isApiError ? err.status : undefined;
|
|
205
|
+
const isNetworkError = !isApiError &&
|
|
206
|
+
err instanceof Error &&
|
|
207
|
+
"code" in err &&
|
|
208
|
+
typeof err.code === "string" &&
|
|
209
|
+
/^E(CONN|TIMEOUT|NOTFOUND|RESET)/.test(err.code ?? "");
|
|
210
|
+
// Re-throw immediately for external cancellation
|
|
211
|
+
if (options?.signal?.aborted)
|
|
212
|
+
throw err;
|
|
213
|
+
if (!isRetryableStatus(status) && !isNetworkError) {
|
|
214
|
+
throw err;
|
|
215
|
+
}
|
|
216
|
+
if (attempt < MAX_RETRY_ATTEMPTS) {
|
|
217
|
+
const baseWait = RETRY_MIN_WAIT_MS * 2 ** (attempt - 1);
|
|
218
|
+
const waitMs = Math.min(baseWait, RETRY_MAX_WAIT_MS);
|
|
219
|
+
// Add jitter (0-25% of wait time)
|
|
220
|
+
const jitter = Math.random() * waitMs * 0.25;
|
|
221
|
+
await sleepCancellable(waitMs + jitter, options?.signal);
|
|
222
|
+
}
|
|
201
223
|
}
|
|
202
224
|
}
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
225
|
+
throw (lastError ??
|
|
226
|
+
new Error(`${this.name}: Max retries (${MAX_RETRY_ATTEMPTS}) exceeded`));
|
|
227
|
+
});
|
|
206
228
|
}
|
|
207
229
|
}
|
|
208
230
|
// ---------------------------------------------------------------------------
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/agents/judge.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,MAAM,MAAM,QAAQ,CAAC;AAEjC,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEpE,OAAO,KAAK,EAAE,WAAW,EAAa,MAAM,WAAW,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAiCtC,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EA2DnE,CAAC;AAMF,eAAO,MAAM,oBAAoB,+wCAwB8B,CAAC;AAMhE,MAAM,WAAW,gBAAiB,SAAQ,WAAW;IACnD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,SAAS,EAAE,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAmBD;;;;;GAKG;AACH,qBAAa,UAAW,SAAQ,SAAS;IACvC,OAAO,CAAC,IAAI,CAAS;IACrB,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,CAAC,EAAE,gBAAgB;IAOrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,YAAY,IAAI,MAAM,CASzB;IAED;;;;;OAKG;IACG,QAAQ,CACZ,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,WAAW,CAAA;KAAE,GACjC,OAAO,CAAC,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/agents/judge.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,MAAM,MAAM,QAAQ,CAAC;AAEjC,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAEpE,OAAO,KAAK,EAAE,WAAW,EAAa,MAAM,WAAW,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAiCtC,eAAO,MAAM,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,kBAAkB,EA2DnE,CAAC;AAMF,eAAO,MAAM,oBAAoB,+wCAwB8B,CAAC;AAMhE,MAAM,WAAW,gBAAiB,SAAQ,WAAW;IACnD,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,SAAS,EAAE,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAmBD;;;;;GAKG;AACH,qBAAa,UAAW,SAAQ,SAAS;IACvC,OAAO,CAAC,IAAI,CAAS;IACrB,OAAO,CAAC,QAAQ,CAAc;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,CAAC,EAAE,gBAAgB;IAOrC,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,YAAY,IAAI,MAAM,CASzB;IAED;;;;;OAKG;IACG,QAAQ,CACZ,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,WAAW,CAAA;KAAE,GACjC,OAAO,CAAC,QAAQ,CAAC;IAwBpB,OAAO,CAAC,aAAa;IAiGrB;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,oBAAoB;IAkBnC;;OAEG;IACH,OAAO,CAAC,cAAc;CAuBvB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"user-simulator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/agents/user-simulator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAMtC,eAAO,MAAM,6BAA6B,urBAgBE,CAAC;AAM7C,MAAM,WAAW,wBAAyB,SAAQ,WAAW;IAC3D,4DAA4D;IAC5D,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAMD;;;;;GAKG;AACH,qBAAa,kBAAmB,SAAQ,SAAS;IAC/C,OAAO,CAAC,kBAAkB,CAAgB;gBAE9B,MAAM,CAAC,EAAE,wBAAwB;IAK7C,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,YAAY,IAAI,MAAM,CAKzB;IAED;;;;;OAKG;IACG,oBAAoB,CAAC,QAAQ,CAAC,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"user-simulator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/agents/user-simulator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAMtC,eAAO,MAAM,6BAA6B,urBAgBE,CAAC;AAM7C,MAAM,WAAW,wBAAyB,SAAQ,WAAW;IAC3D,4DAA4D;IAC5D,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAMD;;;;;GAKG;AACH,qBAAa,kBAAmB,SAAQ,SAAS;IAC/C,OAAO,CAAC,kBAAkB,CAAgB;gBAE9B,MAAM,CAAC,EAAE,wBAAwB;IAK7C,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,YAAY,IAAI,MAAM,CAKzB;IAED;;;;;OAKG;IACG,oBAAoB,CAAC,QAAQ,CAAC,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAcrE;;;;;OAKG;IACH,aAAa,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI;CAWvE"}
|
|
@@ -61,7 +61,10 @@ export class UserSimulatorAgent extends BaseAgent {
|
|
|
61
61
|
role: "user",
|
|
62
62
|
content: "Generate your first message to start the conversation. Remember your goal and persona.",
|
|
63
63
|
});
|
|
64
|
-
return this.respondAsync(promptMessages, {
|
|
64
|
+
return this.respondAsync(promptMessages, {
|
|
65
|
+
temperature: 0.8,
|
|
66
|
+
llmPurpose: "first_message",
|
|
67
|
+
});
|
|
65
68
|
}
|
|
66
69
|
/**
|
|
67
70
|
* Update the persona and scenario context.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"first-message-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/first-message-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"first-message-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/first-message-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAS5B,OAAO,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAuDrD;;GAEG;AACH,MAAM,WAAW,2BAA2B;IAC1C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;GAKG;AACH,qBAAa,qBAAqB;IAChC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,2BAA2B;IAkBhD;;;;;;;;;OASG;IACG,QAAQ,CAAC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;CAyFtE"}
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
* Generates contextually appropriate first messages based on persona and scenario.
|
|
5
5
|
*/
|
|
6
6
|
import OpenAI from "openai";
|
|
7
|
+
import { getTraceContextHeaders, recordLLMInput, recordLLMResponse, withLLMSpan, withSimulationSpan, } from "../tracing.js";
|
|
7
8
|
import { buildPersonaSystemPrompt, buildScenarioUserContext, } from "../utils/prompt-builders.js";
|
|
8
9
|
// Temperature setting for message generation
|
|
9
10
|
const TEMPERATURE_FIRST_MESSAGE = 0.8;
|
|
@@ -88,9 +89,14 @@ export class FirstMessageGenerator {
|
|
|
88
89
|
* @returns Generated first message string
|
|
89
90
|
*/
|
|
90
91
|
async generate(persona, scenario) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
return withSimulationSpan("orq.simulation.first_message_generation", {
|
|
93
|
+
"orq.simulation.persona": persona.name,
|
|
94
|
+
"orq.simulation.scenario": scenario.name,
|
|
95
|
+
"orq.simulation.model": this.model,
|
|
96
|
+
}, async (_span) => {
|
|
97
|
+
const personaContext = buildPersonaSystemPrompt(persona);
|
|
98
|
+
const scenarioContext = buildScenarioUserContext(scenario);
|
|
99
|
+
const userPrompt = `PERSONA:
|
|
94
100
|
${personaContext}
|
|
95
101
|
|
|
96
102
|
SCENARIO:
|
|
@@ -99,33 +105,50 @@ ${scenarioContext}
|
|
|
99
105
|
Generate the FIRST message this user would send to start the conversation.
|
|
100
106
|
The message should immediately convey their goal and emotional state.
|
|
101
107
|
Keep it natural - this is how they would actually open a conversation.`;
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
model: this.model,
|
|
105
|
-
messages: [
|
|
108
|
+
try {
|
|
109
|
+
const llmMessages = [
|
|
106
110
|
{ role: "system", content: FIRST_MESSAGE_PROMPT },
|
|
107
111
|
{ role: "user", content: userPrompt },
|
|
108
|
-
]
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
112
|
+
];
|
|
113
|
+
const response = await withLLMSpan({
|
|
114
|
+
model: this.model,
|
|
115
|
+
temperature: TEMPERATURE_FIRST_MESSAGE,
|
|
116
|
+
maxTokens: 500,
|
|
117
|
+
purpose: "first_message",
|
|
118
|
+
}, async (llmSpan) => {
|
|
119
|
+
recordLLMInput(llmSpan, llmMessages.map((m) => ({
|
|
120
|
+
role: m.role,
|
|
121
|
+
content: typeof m.content === "string" ? m.content : "",
|
|
122
|
+
})));
|
|
123
|
+
const traceHeaders = await getTraceContextHeaders();
|
|
124
|
+
const res = await this.client.chat.completions.create({
|
|
125
|
+
model: this.model,
|
|
126
|
+
messages: llmMessages,
|
|
127
|
+
temperature: TEMPERATURE_FIRST_MESSAGE,
|
|
128
|
+
max_tokens: 500,
|
|
129
|
+
}, { headers: traceHeaders });
|
|
130
|
+
recordLLMResponse(llmSpan, res);
|
|
131
|
+
return res;
|
|
132
|
+
});
|
|
133
|
+
let message = response.choices[0]?.message.content ?? "";
|
|
134
|
+
message = message.trim().replace(/^["']|["']$/g, "");
|
|
135
|
+
console.debug(`Generated first message: ${message.substring(0, 100)}...`);
|
|
136
|
+
return message;
|
|
125
137
|
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
138
|
+
catch (e) {
|
|
139
|
+
// Re-throw auth errors — a bad API key should fail fast, not silently
|
|
140
|
+
// produce meaningless results for the entire simulation run.
|
|
141
|
+
if (e instanceof Error &&
|
|
142
|
+
"status" in e &&
|
|
143
|
+
(e.status === 401 ||
|
|
144
|
+
e.status === 403)) {
|
|
145
|
+
throw e;
|
|
146
|
+
}
|
|
147
|
+
console.warn(`FirstMessageGenerator: API call failed, using generic fallback. Error: ${e}`);
|
|
148
|
+
// Fallback to a generic message based on scenario (no persona traits applied)
|
|
149
|
+
_span?.setAttribute("orq.simulation.first_message_fallback", true);
|
|
150
|
+
return `Hi, I need help with: ${scenario.goal}`;
|
|
151
|
+
}
|
|
152
|
+
});
|
|
130
153
|
}
|
|
131
154
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"persona-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/persona-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"persona-generator.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/generators/persona-generator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AAS5B,OAAO,KAAK,EAAsB,OAAO,EAAE,MAAM,aAAa,CAAC;AAiE/D;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;GAKG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,sBAAsB;IAkB3C;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAsC5B;;OAEG;IACG,QAAQ,CAAC,MAAM,EAAE;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IA+EtB;;;;;OAKG;IACG,oBAAoB,CAAC,MAAM,EAAE;QACjC,gBAAgB,EAAE,MAAM,CAAC;QACzB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;IA2KtB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA2B3B;;OAEG;IACH,OAAO,CAAC,oBAAoB;CAyB7B"}
|