@kognitivedev/voice-tracing 0.2.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2 -0
- package/.turbo/turbo-test.log +288 -0
- package/CHANGELOG.md +10 -0
- package/dist/backend-adapter.d.ts +16 -0
- package/dist/backend-adapter.js +58 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +10 -0
- package/dist/kognitive-adapter.d.ts +8 -0
- package/dist/kognitive-adapter.js +17 -0
- package/dist/reporter.d.ts +12 -0
- package/dist/reporter.js +1249 -0
- package/dist/types.d.ts +177 -0
- package/dist/types.js +2 -0
- package/package.json +37 -0
- package/src/__tests__/voice-tracing.test.ts +341 -0
- package/src/backend-adapter.ts +82 -0
- package/src/index.ts +12 -0
- package/src/kognitive-adapter.ts +22 -0
- package/src/reporter.ts +1492 -0
- package/src/types.ts +76 -0
- package/tsconfig.json +17 -0
package/src/reporter.ts
ADDED
|
@@ -0,0 +1,1492 @@
|
|
|
1
|
+
import {
|
|
2
|
+
buildRemoteLogPayload,
|
|
3
|
+
buildRemoteRunPayload,
|
|
4
|
+
buildRemoteTraceFinishPayload,
|
|
5
|
+
buildRemoteTraceStartPayload,
|
|
6
|
+
createRemoteExecutionContext,
|
|
7
|
+
createRemoteRunContext,
|
|
8
|
+
generateId,
|
|
9
|
+
} from "@kognitivedev/shared";
|
|
10
|
+
import type { KognitiveContentPart, KognitiveUIMessage, Metadata, ResourceId } from "@kognitivedev/shared";
|
|
11
|
+
import type {
|
|
12
|
+
PersistedConversationMessage,
|
|
13
|
+
PersistedConversationPart,
|
|
14
|
+
VoiceTracingAdapter,
|
|
15
|
+
VoiceTracingReporterConfig,
|
|
16
|
+
VoiceTracingSessionEvent,
|
|
17
|
+
VoiceTracingTelemetryEvent,
|
|
18
|
+
VoiceTracingUsageSnapshot,
|
|
19
|
+
} from "./types";
|
|
20
|
+
|
|
21
|
+
type UsageEventInput = {
|
|
22
|
+
eventType: string;
|
|
23
|
+
modelId?: string;
|
|
24
|
+
inputTokens?: number;
|
|
25
|
+
outputTokens?: number;
|
|
26
|
+
cachedInputTokens?: number;
|
|
27
|
+
costCentsOverride?: number;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
type SessionEventInput = {
|
|
31
|
+
eventType: string;
|
|
32
|
+
payload: Record<string, unknown>;
|
|
33
|
+
agentName?: string;
|
|
34
|
+
traceId?: string;
|
|
35
|
+
agentRunId?: string;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
type ActiveTurn = {
|
|
39
|
+
execution: ReturnType<typeof createRemoteExecutionContext>;
|
|
40
|
+
turnIndex: number;
|
|
41
|
+
startedAt: string;
|
|
42
|
+
inputPreview: string;
|
|
43
|
+
startMessageId?: string;
|
|
44
|
+
transcriptionUsage?: UsageEventInput;
|
|
45
|
+
responseId?: string;
|
|
46
|
+
outputPreview?: string;
|
|
47
|
+
completedAt?: string;
|
|
48
|
+
completionStatus?: string;
|
|
49
|
+
pendingToolCallIds: Set<string>;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
type SyntheticTraceEvent = {
|
|
53
|
+
eventType: string;
|
|
54
|
+
spanKey?: string;
|
|
55
|
+
status?: string;
|
|
56
|
+
payload?: Record<string, unknown>;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
type SyntheticVoiceTurn = {
|
|
60
|
+
turnIndex: number;
|
|
61
|
+
messages: PersistedConversationMessage[];
|
|
62
|
+
inputPreview: string;
|
|
63
|
+
outputPreview: string | null;
|
|
64
|
+
toolEvents: SyntheticTraceEvent[];
|
|
65
|
+
hasAssistantOutput: boolean;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
type ReportingLinkage = {
|
|
69
|
+
sessionDbId?: string | null;
|
|
70
|
+
agentRunDbId?: string | null;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
type VoiceModelPricingOverride = {
|
|
74
|
+
textInputPerMillion?: number;
|
|
75
|
+
textOutputPerMillion?: number;
|
|
76
|
+
textCachedInputPerMillion?: number;
|
|
77
|
+
audioInputPerMillion?: number;
|
|
78
|
+
audioOutputPerMillion?: number;
|
|
79
|
+
audioCachedInputPerMillion?: number;
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
const VOICE_MODEL_PRICING_OVERRIDES: Record<string, VoiceModelPricingOverride> = {
|
|
83
|
+
// Official OpenAI model docs as of 2026-04-06:
|
|
84
|
+
// gpt-realtime: $4/$16 text input/output per 1M, $32/$64 audio input/output per 1M, $0.40 cached input per 1M.
|
|
85
|
+
"gpt-realtime": {
|
|
86
|
+
textInputPerMillion: 4,
|
|
87
|
+
textOutputPerMillion: 16,
|
|
88
|
+
textCachedInputPerMillion: 0.4,
|
|
89
|
+
audioInputPerMillion: 32,
|
|
90
|
+
audioOutputPerMillion: 64,
|
|
91
|
+
audioCachedInputPerMillion: 0.4,
|
|
92
|
+
},
|
|
93
|
+
"gpt-realtime-1.5": {
|
|
94
|
+
textInputPerMillion: 4,
|
|
95
|
+
textOutputPerMillion: 16,
|
|
96
|
+
textCachedInputPerMillion: 0.4,
|
|
97
|
+
audioInputPerMillion: 32,
|
|
98
|
+
audioOutputPerMillion: 64,
|
|
99
|
+
audioCachedInputPerMillion: 0.4,
|
|
100
|
+
},
|
|
101
|
+
// GPT-4o Transcribe: $2.50 audio input and $10 text output per 1M.
|
|
102
|
+
"gpt-4o-transcribe": {
|
|
103
|
+
audioInputPerMillion: 2.5,
|
|
104
|
+
textOutputPerMillion: 10,
|
|
105
|
+
},
|
|
106
|
+
"gpt-4o-mini-transcribe": {
|
|
107
|
+
audioInputPerMillion: 1.25,
|
|
108
|
+
textOutputPerMillion: 5,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
function getUserId(resourceId: ResourceId): string {
|
|
113
|
+
return resourceId.userId ? String(resourceId.userId) : "system";
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function logVoiceTracing(message: string, payload: Record<string, unknown>) {
|
|
117
|
+
console.log(`[VoiceTracing:reporter] ${message}`, payload);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function getSessionMetadata(config: VoiceTelemetryReporterConfig) {
|
|
121
|
+
return {
|
|
122
|
+
kind: "voice",
|
|
123
|
+
agentName: config.agentName,
|
|
124
|
+
transport: config.transport ?? "webrtc",
|
|
125
|
+
callId: config.callId,
|
|
126
|
+
modelId: config.modelId,
|
|
127
|
+
transcriptionModelId: config.transcriptionModelId,
|
|
128
|
+
voice: config.voice,
|
|
129
|
+
sourceSessionId: config.resourceId.sessionId ? String(config.resourceId.sessionId) : undefined,
|
|
130
|
+
...(config.metadata ?? {}),
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function extractReportingLinkage(value: unknown): ReportingLinkage {
|
|
135
|
+
if (!value || typeof value !== "object") return {};
|
|
136
|
+
const record = value as Record<string, unknown>;
|
|
137
|
+
return {
|
|
138
|
+
sessionDbId: typeof record.sessionDbId === "string" ? record.sessionDbId : null,
|
|
139
|
+
agentRunDbId: typeof record.runDbId === "string"
|
|
140
|
+
? record.runDbId
|
|
141
|
+
: typeof record.agentRunDbId === "string"
|
|
142
|
+
? record.agentRunDbId
|
|
143
|
+
: null,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function preview(text: string, max = 240): string {
|
|
148
|
+
return text.slice(0, max);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function roundCents(value: number): number {
|
|
152
|
+
return Math.round(value * 1_000_000) / 1_000_000;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function toCostCents(tokens: number | undefined, dollarsPerMillion: number | undefined): number {
|
|
156
|
+
if (!tokens || !dollarsPerMillion) return 0;
|
|
157
|
+
return (tokens / 1_000_000) * dollarsPerMillion * 100;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function calculateVoiceUsageCostCents(usage: VoiceTracingUsageSnapshot | undefined, modelId: string | undefined): number | undefined {
|
|
161
|
+
if (!usage || usage.type !== "tokens" || !modelId) return undefined;
|
|
162
|
+
|
|
163
|
+
const pricing = VOICE_MODEL_PRICING_OVERRIDES[modelId];
|
|
164
|
+
if (!pricing) return undefined;
|
|
165
|
+
|
|
166
|
+
const inputAudioTokens = usage.inputAudioTokens ?? 0;
|
|
167
|
+
const inputTextTokens = usage.inputTextTokens ?? 0;
|
|
168
|
+
const cachedAudioTokens = usage.cachedAudioTokens ?? 0;
|
|
169
|
+
const cachedTextTokens = usage.cachedTextTokens ?? 0;
|
|
170
|
+
const regularAudioTokens = Math.max(0, inputAudioTokens - cachedAudioTokens);
|
|
171
|
+
const regularTextTokens = Math.max(0, inputTextTokens - cachedTextTokens);
|
|
172
|
+
|
|
173
|
+
const outputAudioTokens = usage.outputAudioTokens ?? 0;
|
|
174
|
+
const outputTextTokens = usage.outputTextTokens ?? 0;
|
|
175
|
+
|
|
176
|
+
const detailedTokenCount =
|
|
177
|
+
inputAudioTokens
|
|
178
|
+
+ inputTextTokens
|
|
179
|
+
+ outputAudioTokens
|
|
180
|
+
+ outputTextTokens
|
|
181
|
+
+ cachedAudioTokens
|
|
182
|
+
+ cachedTextTokens;
|
|
183
|
+
|
|
184
|
+
if (detailedTokenCount === 0) {
|
|
185
|
+
return undefined;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const totalCostCents =
|
|
189
|
+
toCostCents(regularAudioTokens, pricing.audioInputPerMillion)
|
|
190
|
+
+ toCostCents(regularTextTokens, pricing.textInputPerMillion)
|
|
191
|
+
+ toCostCents(cachedAudioTokens, pricing.audioCachedInputPerMillion)
|
|
192
|
+
+ toCostCents(cachedTextTokens, pricing.textCachedInputPerMillion)
|
|
193
|
+
+ toCostCents(outputAudioTokens, pricing.audioOutputPerMillion)
|
|
194
|
+
+ toCostCents(outputTextTokens, pricing.textOutputPerMillion);
|
|
195
|
+
|
|
196
|
+
return roundCents(totalCostCents);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function normalizeImagePart(part: Extract<KognitiveContentPart, { type: "image" }>): PersistedConversationPart | null {
|
|
200
|
+
if (typeof part.url === "string" && part.url.length > 0) {
|
|
201
|
+
return {
|
|
202
|
+
type: "image",
|
|
203
|
+
source: {
|
|
204
|
+
type: "url",
|
|
205
|
+
url: part.url,
|
|
206
|
+
},
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (typeof part.image !== "string" || part.image.length === 0) {
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const dataUrlMatch = part.image.match(/^data:([^;,]+);base64,(.+)$/);
|
|
215
|
+
if (dataUrlMatch) {
|
|
216
|
+
return {
|
|
217
|
+
type: "image",
|
|
218
|
+
source: {
|
|
219
|
+
type: "base64",
|
|
220
|
+
media_type: dataUrlMatch[1],
|
|
221
|
+
data: dataUrlMatch[2],
|
|
222
|
+
},
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
type: "image",
|
|
228
|
+
source: {
|
|
229
|
+
type: "base64",
|
|
230
|
+
media_type: part.mediaType ?? "image/png",
|
|
231
|
+
data: part.image,
|
|
232
|
+
},
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function normalizeFilePart(part: Extract<KognitiveContentPart, { type: "file" }>): PersistedConversationPart {
|
|
237
|
+
return {
|
|
238
|
+
type: "file",
|
|
239
|
+
file: {
|
|
240
|
+
url: typeof part.url === "string" ? part.url : undefined,
|
|
241
|
+
name: part.filename ?? part.name,
|
|
242
|
+
mime_type: part.mediaType ?? part.mimeType ?? part.mime_type,
|
|
243
|
+
},
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function normalizeDynamicToolPart(part: Extract<KognitiveContentPart, { type: "dynamic-tool" }>): PersistedConversationPart[] {
|
|
248
|
+
const parts: PersistedConversationPart[] = [];
|
|
249
|
+
|
|
250
|
+
if (part.input !== undefined) {
|
|
251
|
+
parts.push({
|
|
252
|
+
type: "tool-call",
|
|
253
|
+
toolCallId: part.toolCallId,
|
|
254
|
+
toolName: part.toolName,
|
|
255
|
+
input: part.input,
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (part.output !== undefined || part.errorText) {
|
|
260
|
+
parts.push({
|
|
261
|
+
type: "tool-result",
|
|
262
|
+
toolCallId: part.toolCallId,
|
|
263
|
+
toolName: part.toolName,
|
|
264
|
+
result: part.output ?? part.errorText,
|
|
265
|
+
...(part.state === "error" || part.errorText ? { isError: true } : {}),
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (parts.length === 0) {
|
|
270
|
+
parts.push({
|
|
271
|
+
type: "text",
|
|
272
|
+
text: `${part.toolName} ${part.state}`,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
return parts;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function normalizeConversationParts(parts: KognitiveContentPart[]): PersistedConversationPart[] {
|
|
280
|
+
const normalized: PersistedConversationPart[] = [];
|
|
281
|
+
const textBuffer: string[] = [];
|
|
282
|
+
|
|
283
|
+
const flushText = () => {
|
|
284
|
+
const text = textBuffer.join(" ").trim();
|
|
285
|
+
if (!text) return;
|
|
286
|
+
normalized.push({ type: "text", text });
|
|
287
|
+
textBuffer.length = 0;
|
|
288
|
+
};
|
|
289
|
+
|
|
290
|
+
for (const part of parts) {
|
|
291
|
+
switch (part.type) {
|
|
292
|
+
case "text":
|
|
293
|
+
case "reasoning":
|
|
294
|
+
if (part.text.trim()) textBuffer.push(part.text.trim());
|
|
295
|
+
break;
|
|
296
|
+
case "tool-call":
|
|
297
|
+
flushText();
|
|
298
|
+
normalized.push({
|
|
299
|
+
type: "tool-call",
|
|
300
|
+
toolCallId: part.toolCallId,
|
|
301
|
+
toolName: part.toolName,
|
|
302
|
+
input: part.input,
|
|
303
|
+
});
|
|
304
|
+
break;
|
|
305
|
+
case "tool-result":
|
|
306
|
+
flushText();
|
|
307
|
+
normalized.push({
|
|
308
|
+
type: "tool-result",
|
|
309
|
+
toolCallId: part.toolCallId,
|
|
310
|
+
toolName: part.toolName,
|
|
311
|
+
output: part.output,
|
|
312
|
+
result: part.result,
|
|
313
|
+
...(part.isError ? { isError: true } : {}),
|
|
314
|
+
});
|
|
315
|
+
break;
|
|
316
|
+
case "image": {
|
|
317
|
+
flushText();
|
|
318
|
+
const imagePart = normalizeImagePart(part);
|
|
319
|
+
if (imagePart) normalized.push(imagePart);
|
|
320
|
+
break;
|
|
321
|
+
}
|
|
322
|
+
case "file":
|
|
323
|
+
flushText();
|
|
324
|
+
normalized.push(normalizeFilePart(part));
|
|
325
|
+
break;
|
|
326
|
+
case "dynamic-tool":
|
|
327
|
+
flushText();
|
|
328
|
+
normalized.push(...normalizeDynamicToolPart(part));
|
|
329
|
+
break;
|
|
330
|
+
case "data":
|
|
331
|
+
flushText();
|
|
332
|
+
normalized.push({
|
|
333
|
+
type: "text",
|
|
334
|
+
text: typeof part.data === "string" ? part.data : JSON.stringify(part.data, null, 2),
|
|
335
|
+
});
|
|
336
|
+
break;
|
|
337
|
+
default:
|
|
338
|
+
break;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
flushText();
|
|
343
|
+
return normalized;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function toPersistedConversationMessages(messages: KognitiveUIMessage[]): PersistedConversationMessage[] {
|
|
347
|
+
return messages.reduce<PersistedConversationMessage[]>((acc, message) => {
|
|
348
|
+
const normalizedParts = normalizeConversationParts(message.parts);
|
|
349
|
+
if (normalizedParts.length === 0) return acc;
|
|
350
|
+
|
|
351
|
+
if (normalizedParts.length === 1 && normalizedParts[0].type === "text" && typeof normalizedParts[0].text === "string") {
|
|
352
|
+
acc.push({
|
|
353
|
+
role: message.role,
|
|
354
|
+
content: normalizedParts[0].text,
|
|
355
|
+
metadata: message.metadata,
|
|
356
|
+
});
|
|
357
|
+
return acc;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
acc.push({
|
|
361
|
+
role: message.role,
|
|
362
|
+
content: normalizedParts,
|
|
363
|
+
metadata: message.metadata,
|
|
364
|
+
});
|
|
365
|
+
return acc;
|
|
366
|
+
}, []);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function getPersistedMessageText(message: PersistedConversationMessage): string {
|
|
370
|
+
if (typeof message.content === "string") return message.content;
|
|
371
|
+
|
|
372
|
+
return message.content
|
|
373
|
+
.map((part) => {
|
|
374
|
+
if (part.type === "text" && typeof part.text === "string") return part.text;
|
|
375
|
+
if (part.type === "tool-call" && typeof part.toolName === "string") return `Called ${part.toolName}`;
|
|
376
|
+
if (part.type === "tool-result" && typeof part.toolName === "string") return `Received ${part.toolName} result`;
|
|
377
|
+
return "";
|
|
378
|
+
})
|
|
379
|
+
.filter(Boolean)
|
|
380
|
+
.join(" ")
|
|
381
|
+
.trim();
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
function findLatestMessageText(messages: PersistedConversationMessage[], role: PersistedConversationMessage["role"]): string | null {
|
|
385
|
+
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
386
|
+
const message = messages[index];
|
|
387
|
+
if (message.role !== role) continue;
|
|
388
|
+
const text = getPersistedMessageText(message);
|
|
389
|
+
if (text) return text;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
return null;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
function buildPersistedMessageSignature(messages: PersistedConversationMessage[]): string {
|
|
396
|
+
return JSON.stringify(messages);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function getUiMessageText(message: KognitiveUIMessage): string {
|
|
400
|
+
return message.parts
|
|
401
|
+
.flatMap((part) => {
|
|
402
|
+
if (part.type === "text" || part.type === "reasoning") {
|
|
403
|
+
return part.text.trim() ? [part.text.trim()] : [];
|
|
404
|
+
}
|
|
405
|
+
return [];
|
|
406
|
+
})
|
|
407
|
+
.join(" ")
|
|
408
|
+
.trim();
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function buildAssistantCompletionKey(event: Extract<VoiceTelemetryEvent, { type: "voice.response.output.completed" }>): string {
|
|
412
|
+
if (event.itemId) {
|
|
413
|
+
return `item:${event.itemId}`;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (event.responseId && event.outputText) {
|
|
417
|
+
return `response:${event.responseId}:${event.outputText}`;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if (event.responseId) {
|
|
421
|
+
return `response:${event.responseId}`;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return `text:${event.outputText ?? ""}`;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function buildTurnPreview(messages: PersistedConversationMessage[], role: PersistedConversationMessage["role"], maxLength: number): string {
|
|
428
|
+
const text = messages
|
|
429
|
+
.filter((message) => message.role === role)
|
|
430
|
+
.map(getPersistedMessageText)
|
|
431
|
+
.filter(Boolean)
|
|
432
|
+
.join("\n")
|
|
433
|
+
.trim();
|
|
434
|
+
|
|
435
|
+
return preview(text, maxLength);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
function extractSyntheticToolEvents(messages: PersistedConversationMessage[]): SyntheticTraceEvent[] {
|
|
439
|
+
const events: SyntheticTraceEvent[] = [];
|
|
440
|
+
|
|
441
|
+
for (const message of messages) {
|
|
442
|
+
if (!Array.isArray(message.content)) continue;
|
|
443
|
+
|
|
444
|
+
for (const part of message.content) {
|
|
445
|
+
if (part.type === "tool-call" && typeof part.toolCallId === "string" && typeof part.toolName === "string") {
|
|
446
|
+
events.push({
|
|
447
|
+
eventType: "tool.started",
|
|
448
|
+
spanKey: part.toolCallId,
|
|
449
|
+
status: "active",
|
|
450
|
+
payload: {
|
|
451
|
+
toolCallId: part.toolCallId,
|
|
452
|
+
toolName: part.toolName,
|
|
453
|
+
inputPreview: JSON.stringify(part.input ?? {}).slice(0, 220),
|
|
454
|
+
},
|
|
455
|
+
});
|
|
456
|
+
continue;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if (part.type === "tool-result" && typeof part.toolCallId === "string" && typeof part.toolName === "string") {
|
|
460
|
+
const isError = part.isError === true;
|
|
461
|
+
events.push({
|
|
462
|
+
eventType: isError ? "tool.failed" : "tool.completed",
|
|
463
|
+
spanKey: part.toolCallId,
|
|
464
|
+
status: isError ? "error" : "completed",
|
|
465
|
+
payload: {
|
|
466
|
+
toolCallId: part.toolCallId,
|
|
467
|
+
toolName: part.toolName,
|
|
468
|
+
...(isError
|
|
469
|
+
? { errorMessage: JSON.stringify(part.result ?? part.output ?? "Tool execution failed").slice(0, 220) }
|
|
470
|
+
: { outputPreview: JSON.stringify(part.result ?? part.output ?? {}).slice(0, 220) }),
|
|
471
|
+
},
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return events;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
function deriveSyntheticVoiceTurns(messages: KognitiveUIMessage[]): SyntheticVoiceTurn[] {
|
|
481
|
+
const persistedMessages = toPersistedConversationMessages(messages);
|
|
482
|
+
if (persistedMessages.length === 0) return [];
|
|
483
|
+
|
|
484
|
+
const turns: SyntheticVoiceTurn[] = [];
|
|
485
|
+
let currentMessages: PersistedConversationMessage[] = [];
|
|
486
|
+
let turnIndex = -1;
|
|
487
|
+
|
|
488
|
+
const finalizeCurrentTurn = () => {
|
|
489
|
+
if (currentMessages.length === 0) return;
|
|
490
|
+
|
|
491
|
+
const inputPreview = buildTurnPreview(currentMessages, "user", 220);
|
|
492
|
+
const outputPreview = buildTurnPreview(currentMessages, "assistant", 240) || null;
|
|
493
|
+
const hasAssistantOutput = currentMessages.some((message) => message.role === "assistant");
|
|
494
|
+
|
|
495
|
+
if (!inputPreview && !hasAssistantOutput) {
|
|
496
|
+
currentMessages = [];
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
turnIndex += 1;
|
|
501
|
+
turns.push({
|
|
502
|
+
turnIndex,
|
|
503
|
+
messages: currentMessages,
|
|
504
|
+
inputPreview,
|
|
505
|
+
outputPreview,
|
|
506
|
+
toolEvents: extractSyntheticToolEvents(currentMessages),
|
|
507
|
+
hasAssistantOutput,
|
|
508
|
+
});
|
|
509
|
+
currentMessages = [];
|
|
510
|
+
};
|
|
511
|
+
|
|
512
|
+
for (const message of persistedMessages) {
|
|
513
|
+
if (message.role === "user" && currentMessages.some((current) => current.role !== "user")) {
|
|
514
|
+
finalizeCurrentTurn();
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
currentMessages.push(message);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
finalizeCurrentTurn();
|
|
521
|
+
return turns;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
function usageToCostEvent(eventType: string, usage?: VoiceTracingUsageSnapshot, modelId?: string): UsageEventInput | undefined {
|
|
525
|
+
if (!usage) return undefined;
|
|
526
|
+
const costCentsOverride = calculateVoiceUsageCostCents(usage, modelId);
|
|
527
|
+
if (usage.type !== "tokens") {
|
|
528
|
+
return {
|
|
529
|
+
eventType,
|
|
530
|
+
modelId,
|
|
531
|
+
costCentsOverride,
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
return {
|
|
535
|
+
eventType,
|
|
536
|
+
modelId,
|
|
537
|
+
inputTokens: usage.inputTokens ?? 0,
|
|
538
|
+
outputTokens: usage.outputTokens ?? 0,
|
|
539
|
+
cachedInputTokens: usage.cachedInputTokens ?? 0,
|
|
540
|
+
costCentsOverride,
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
function payloadForSessionEvent(event: VoiceTracingTelemetryEvent): SessionEventInput | null {
|
|
545
|
+
switch (event.type) {
|
|
546
|
+
case "voice.user.transcribed":
|
|
547
|
+
return {
|
|
548
|
+
eventType: "voice.user.transcribed",
|
|
549
|
+
payload: {
|
|
550
|
+
transcript: event.transcript,
|
|
551
|
+
itemId: event.itemId,
|
|
552
|
+
},
|
|
553
|
+
};
|
|
554
|
+
case "voice.assistant.started":
|
|
555
|
+
return {
|
|
556
|
+
eventType: "voice.assistant.started",
|
|
557
|
+
payload: {
|
|
558
|
+
responseId: event.responseId,
|
|
559
|
+
},
|
|
560
|
+
};
|
|
561
|
+
case "voice.assistant.stopped":
|
|
562
|
+
return {
|
|
563
|
+
eventType: "voice.assistant.stopped",
|
|
564
|
+
payload: {
|
|
565
|
+
responseId: event.responseId,
|
|
566
|
+
},
|
|
567
|
+
};
|
|
568
|
+
case "voice.tool.started":
|
|
569
|
+
return {
|
|
570
|
+
eventType: "tool.started",
|
|
571
|
+
payload: {
|
|
572
|
+
toolCallId: event.toolCallId,
|
|
573
|
+
toolName: event.toolName,
|
|
574
|
+
input: event.input,
|
|
575
|
+
},
|
|
576
|
+
};
|
|
577
|
+
case "voice.tool.completed":
|
|
578
|
+
return {
|
|
579
|
+
eventType: "tool.completed",
|
|
580
|
+
payload: {
|
|
581
|
+
toolCallId: event.toolCallId,
|
|
582
|
+
toolName: event.toolName,
|
|
583
|
+
output: event.output,
|
|
584
|
+
},
|
|
585
|
+
};
|
|
586
|
+
case "voice.tool.failed":
|
|
587
|
+
return {
|
|
588
|
+
eventType: "tool.failed",
|
|
589
|
+
payload: {
|
|
590
|
+
toolCallId: event.toolCallId,
|
|
591
|
+
toolName: event.toolName,
|
|
592
|
+
error: event.error,
|
|
593
|
+
},
|
|
594
|
+
};
|
|
595
|
+
case "voice.interrupted":
|
|
596
|
+
return {
|
|
597
|
+
eventType: "voice.turn.interrupted",
|
|
598
|
+
payload: {
|
|
599
|
+
responseId: event.responseId,
|
|
600
|
+
reason: event.reason,
|
|
601
|
+
},
|
|
602
|
+
};
|
|
603
|
+
case "voice.response.done":
|
|
604
|
+
return {
|
|
605
|
+
eventType: "voice.turn.completed",
|
|
606
|
+
payload: {
|
|
607
|
+
responseId: event.responseId,
|
|
608
|
+
status: event.status,
|
|
609
|
+
outputText: event.outputText,
|
|
610
|
+
},
|
|
611
|
+
};
|
|
612
|
+
case "voice.response.output.completed":
|
|
613
|
+
return {
|
|
614
|
+
eventType: "voice.assistant.output.completed",
|
|
615
|
+
payload: {
|
|
616
|
+
responseId: event.responseId,
|
|
617
|
+
itemId: event.itemId,
|
|
618
|
+
status: event.status,
|
|
619
|
+
outputText: event.outputText,
|
|
620
|
+
},
|
|
621
|
+
};
|
|
622
|
+
default:
|
|
623
|
+
return null;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
export function createVoiceTracingReporter(config: VoiceTracingReporterConfig) {
|
|
628
|
+
const sink = config.adapter;
|
|
629
|
+
const sessionId = config.callId;
|
|
630
|
+
const userId = getUserId(config.resourceId);
|
|
631
|
+
const sessionMetadata = getSessionMetadata(config);
|
|
632
|
+
const pendingSessionEvents: SessionEventInput[] = [{
|
|
633
|
+
eventType: "voice.call.started",
|
|
634
|
+
payload: {
|
|
635
|
+
callId: config.callId,
|
|
636
|
+
modelId: config.modelId,
|
|
637
|
+
voice: config.voice,
|
|
638
|
+
transport: config.transport ?? "webrtc",
|
|
639
|
+
},
|
|
640
|
+
}];
|
|
641
|
+
const connectionRun = createRemoteRunContext({
|
|
642
|
+
agentName: config.agentName,
|
|
643
|
+
sessionId,
|
|
644
|
+
requestedRunScope: "session",
|
|
645
|
+
});
|
|
646
|
+
let activeTurn: ActiveTurn | null = null;
|
|
647
|
+
let turnCounter = -1;
|
|
648
|
+
let latestMessages: KognitiveUIMessage[] = [];
|
|
649
|
+
const seenUserMessageIds = new Set<string>();
|
|
650
|
+
const seenAssistantCompletionKeys = new Set<string>();
|
|
651
|
+
let hasReportedRuns = false;
|
|
652
|
+
let reporterQueue = Promise.resolve();
|
|
653
|
+
let linkage: ReportingLinkage = {};
|
|
654
|
+
let lastFlushedMessageSignature: string | null = null;
|
|
655
|
+
logVoiceTracing("initialized", {
|
|
656
|
+
sessionId,
|
|
657
|
+
agentName: config.agentName,
|
|
658
|
+
modelId: config.modelId,
|
|
659
|
+
transcriptionModelId: config.transcriptionModelId ?? null,
|
|
660
|
+
});
|
|
661
|
+
|
|
662
|
+
const enqueueReporterTask = <T>(task: () => Promise<T> | T) => {
|
|
663
|
+
const nextTask = reporterQueue.then(task, task);
|
|
664
|
+
reporterQueue = nextTask.then(() => undefined, () => undefined);
|
|
665
|
+
return nextTask;
|
|
666
|
+
};
|
|
667
|
+
|
|
668
|
+
const ensureSessionBootstrap = async () => {
|
|
669
|
+
if (linkage.sessionDbId || linkage.agentRunDbId) {
|
|
670
|
+
return linkage;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
const runResult = await sink.reportAgentRun(buildRemoteRunPayload({
|
|
674
|
+
execution: connectionRun,
|
|
675
|
+
userId,
|
|
676
|
+
sessionId,
|
|
677
|
+
modelId: config.modelId,
|
|
678
|
+
status: "running",
|
|
679
|
+
inputPreview: "",
|
|
680
|
+
triggerType: "voice",
|
|
681
|
+
agentType: "voice",
|
|
682
|
+
metadata: sessionMetadata,
|
|
683
|
+
sessionMetadata,
|
|
684
|
+
skipConversationLog: true,
|
|
685
|
+
skipPipelines: true,
|
|
686
|
+
skipCostRecording: true,
|
|
687
|
+
}));
|
|
688
|
+
linkage = {
|
|
689
|
+
...linkage,
|
|
690
|
+
...extractReportingLinkage(runResult),
|
|
691
|
+
};
|
|
692
|
+
|
|
693
|
+
await sink.reportConversationLog(buildRemoteLogPayload({
|
|
694
|
+
execution: createRemoteExecutionContext({
|
|
695
|
+
agentName: config.agentName,
|
|
696
|
+
sessionId,
|
|
697
|
+
requestedRunScope: "session",
|
|
698
|
+
turnId: "voice-call-start",
|
|
699
|
+
turnIndex: 0,
|
|
700
|
+
}),
|
|
701
|
+
userId,
|
|
702
|
+
sessionId,
|
|
703
|
+
messages: [],
|
|
704
|
+
modelId: config.modelId,
|
|
705
|
+
sessionMetadata,
|
|
706
|
+
incrementMessageCount: false,
|
|
707
|
+
skipPipelines: true,
|
|
708
|
+
skipTraceTracking: true,
|
|
709
|
+
sessionEvents: pendingSessionEvents.splice(0, pendingSessionEvents.length),
|
|
710
|
+
metadata: { source: "voice", phase: "call-start" },
|
|
711
|
+
}));
|
|
712
|
+
|
|
713
|
+
return linkage;
|
|
714
|
+
};
|
|
715
|
+
|
|
716
|
+
const withLinkage = <T extends Record<string, unknown>>(payload: T): T & ReportingLinkage => ({
|
|
717
|
+
...payload,
|
|
718
|
+
...(linkage.sessionDbId ? { sessionDbId: linkage.sessionDbId } : {}),
|
|
719
|
+
...(linkage.agentRunDbId ? { agentRunDbId: linkage.agentRunDbId } : {}),
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
const reportConnectionRunCompletion = async (reason?: string) => {
|
|
723
|
+
await ensureSessionBootstrap();
|
|
724
|
+
await sink.reportAgentRun(withLinkage(buildRemoteRunPayload({
|
|
725
|
+
execution: connectionRun,
|
|
726
|
+
finalizeRun: true,
|
|
727
|
+
userId,
|
|
728
|
+
sessionId,
|
|
729
|
+
modelId: config.modelId,
|
|
730
|
+
status: "completed",
|
|
731
|
+
triggerType: "voice",
|
|
732
|
+
agentType: "voice",
|
|
733
|
+
messages: toPersistedConversationMessages(latestMessages),
|
|
734
|
+
metadata: { ...sessionMetadata, endReason: reason ?? "disconnect" },
|
|
735
|
+
sessionMetadata,
|
|
736
|
+
skipConversationLog: true,
|
|
737
|
+
skipPipelines: true,
|
|
738
|
+
skipCostRecording: true,
|
|
739
|
+
incrementSessionMessageCount: false,
|
|
740
|
+
completedAt: new Date().toISOString(),
|
|
741
|
+
})));
|
|
742
|
+
};
|
|
743
|
+
|
|
744
|
+
const startTurn = async (transcript: string, usage?: VoiceTracingUsageSnapshot, startMessageId?: string) => {
|
|
745
|
+
turnCounter += 1;
|
|
746
|
+
const turnId = generateId();
|
|
747
|
+
const startedAt = new Date().toISOString();
|
|
748
|
+
const execution = createRemoteExecutionContext({
|
|
749
|
+
agentName: config.agentName,
|
|
750
|
+
sessionId,
|
|
751
|
+
requestedRunScope: "session",
|
|
752
|
+
turnId,
|
|
753
|
+
turnIndex: turnCounter,
|
|
754
|
+
});
|
|
755
|
+
|
|
756
|
+
activeTurn = {
|
|
757
|
+
execution,
|
|
758
|
+
turnIndex: turnCounter,
|
|
759
|
+
startedAt,
|
|
760
|
+
inputPreview: preview(transcript, 220),
|
|
761
|
+
startMessageId,
|
|
762
|
+
transcriptionUsage: usageToCostEvent("voice_transcription", usage, config.transcriptionModelId ?? config.modelId),
|
|
763
|
+
pendingToolCallIds: new Set(),
|
|
764
|
+
};
|
|
765
|
+
if (startMessageId) {
|
|
766
|
+
seenUserMessageIds.add(startMessageId);
|
|
767
|
+
}
|
|
768
|
+
logVoiceTracing("turn started", {
|
|
769
|
+
sessionId,
|
|
770
|
+
runId: execution.runId,
|
|
771
|
+
turnId,
|
|
772
|
+
traceId: execution.traceId,
|
|
773
|
+
turnIndex: activeTurn.turnIndex,
|
|
774
|
+
transcriptPreview: activeTurn.inputPreview,
|
|
775
|
+
startMessageId: startMessageId ?? null,
|
|
776
|
+
hasUsage: !!usage,
|
|
777
|
+
transcriptionCostCentsOverride: activeTurn.transcriptionUsage?.costCentsOverride ?? null,
|
|
778
|
+
});
|
|
779
|
+
|
|
780
|
+
await ensureSessionBootstrap();
|
|
781
|
+
const traceStartPayload = buildRemoteTraceStartPayload({
|
|
782
|
+
execution,
|
|
783
|
+
userId,
|
|
784
|
+
sessionId,
|
|
785
|
+
requestPreview: activeTurn.inputPreview,
|
|
786
|
+
modelId: config.modelId,
|
|
787
|
+
metadata: {
|
|
788
|
+
source: "voice",
|
|
789
|
+
callId: config.callId,
|
|
790
|
+
},
|
|
791
|
+
startedAt,
|
|
792
|
+
});
|
|
793
|
+
await sink.reportTraceEvents(withLinkage({
|
|
794
|
+
...traceStartPayload,
|
|
795
|
+
start: {
|
|
796
|
+
...traceStartPayload.start,
|
|
797
|
+
...(linkage.sessionDbId ? { sessionDbId: linkage.sessionDbId } : {}),
|
|
798
|
+
...(linkage.agentRunDbId ? { agentRunDbId: linkage.agentRunDbId } : {}),
|
|
799
|
+
},
|
|
800
|
+
}));
|
|
801
|
+
};
|
|
802
|
+
|
|
803
|
+
const getPendingUserMessagesFromState = () => latestMessages
|
|
804
|
+
.filter((message) => message.role === "user" && !seenUserMessageIds.has(message.id))
|
|
805
|
+
.map((message) => ({
|
|
806
|
+
id: message.id,
|
|
807
|
+
transcript: getUiMessageText(message),
|
|
808
|
+
}))
|
|
809
|
+
.filter((message) => message.transcript.length > 0);
|
|
810
|
+
|
|
811
|
+
const getTurnMessages = (turn: ActiveTurn, nextUserMessageId?: string) => {
|
|
812
|
+
const startIndex = turn.startMessageId
|
|
813
|
+
? latestMessages.findIndex((message) => message.id === turn.startMessageId)
|
|
814
|
+
: -1;
|
|
815
|
+
const nextUserIndex = nextUserMessageId
|
|
816
|
+
? latestMessages.findIndex((message) => message.id === nextUserMessageId)
|
|
817
|
+
: -1;
|
|
818
|
+
|
|
819
|
+
const sliceStart = startIndex >= 0 ? startIndex : 0;
|
|
820
|
+
const sliceEnd = nextUserIndex > sliceStart ? nextUserIndex : latestMessages.length;
|
|
821
|
+
return latestMessages.slice(sliceStart, sliceEnd);
|
|
822
|
+
};
|
|
823
|
+
|
|
824
|
+
const finalizeActiveTurn = async (args: {
|
|
825
|
+
completedAt?: string;
|
|
826
|
+
responseStatus?: string;
|
|
827
|
+
usage?: VoiceTracingUsageSnapshot;
|
|
828
|
+
nextUserMessageId?: string;
|
|
829
|
+
forceInterrupt?: boolean;
|
|
830
|
+
reason?: string;
|
|
831
|
+
} = {}) => {
|
|
832
|
+
if (!activeTurn) return false;
|
|
833
|
+
|
|
834
|
+
const turn = activeTurn;
|
|
835
|
+
const turnMessages = getTurnMessages(turn, args.nextUserMessageId);
|
|
836
|
+
const responseUsage = usageToCostEvent("voice_chat", args.usage, config.modelId);
|
|
837
|
+
const fallbackAssistantPreview = findLatestMessageText(toPersistedConversationMessages(turnMessages), "assistant");
|
|
838
|
+
// Send cumulative messages up to the end of this turn (not beyond the next user turn's start).
|
|
839
|
+
const nextUserIndex = args.nextUserMessageId
|
|
840
|
+
? latestMessages.findIndex((message) => message.id === args.nextUserMessageId)
|
|
841
|
+
: -1;
|
|
842
|
+
const turnEndIndex = nextUserIndex > 0 ? nextUserIndex : latestMessages.length;
|
|
843
|
+
const persistedMessages = toPersistedConversationMessages(latestMessages.slice(0, turnEndIndex));
|
|
844
|
+
const outputPreview = turn.outputPreview ?? fallbackAssistantPreview ?? undefined;
|
|
845
|
+
const completedAt = args.completedAt ?? turn.completedAt ?? new Date().toISOString();
|
|
846
|
+
const durationMs = Math.max(0, new Date(completedAt).getTime() - new Date(turn.startedAt).getTime());
|
|
847
|
+
const inferredStatus = args.forceInterrupt
|
|
848
|
+
? "interrupted"
|
|
849
|
+
: args.responseStatus
|
|
850
|
+
?? turn.completionStatus
|
|
851
|
+
?? (outputPreview ? "completed" : "interrupted");
|
|
852
|
+
const finishState = inferredStatus === "completed" ? "completed" : "error";
|
|
853
|
+
|
|
854
|
+
logVoiceTracing("turn finalized", {
|
|
855
|
+
sessionId,
|
|
856
|
+
traceId: turn.execution.traceId,
|
|
857
|
+
runId: turn.execution.runId,
|
|
858
|
+
status: inferredStatus,
|
|
859
|
+
nextUserMessageId: args.nextUserMessageId ?? null,
|
|
860
|
+
messageCount: persistedMessages.length,
|
|
861
|
+
outputPreview: outputPreview ?? null,
|
|
862
|
+
});
|
|
863
|
+
|
|
864
|
+
await ensureSessionBootstrap();
|
|
865
|
+
await sink.reportAgentRun(withLinkage(buildRemoteRunPayload({
|
|
866
|
+
execution: turn.execution,
|
|
867
|
+
userId,
|
|
868
|
+
sessionId,
|
|
869
|
+
modelId: config.modelId,
|
|
870
|
+
status: inferredStatus,
|
|
871
|
+
inputPreview: turn.inputPreview,
|
|
872
|
+
outputPreview,
|
|
873
|
+
triggerType: "voice",
|
|
874
|
+
agentType: "voice",
|
|
875
|
+
durationMs,
|
|
876
|
+
startedAt: turn.startedAt,
|
|
877
|
+
completedAt,
|
|
878
|
+
inputTokens: responseUsage?.inputTokens,
|
|
879
|
+
outputTokens: responseUsage?.outputTokens,
|
|
880
|
+
cachedInputTokens: responseUsage?.cachedInputTokens,
|
|
881
|
+
costCentsOverride: responseUsage?.costCentsOverride,
|
|
882
|
+
costEventType: responseUsage ? "voice_chat" : undefined,
|
|
883
|
+
additionalCostEvents: turn.transcriptionUsage ? [turn.transcriptionUsage] : [],
|
|
884
|
+
messages: persistedMessages,
|
|
885
|
+
metadata: {
|
|
886
|
+
...sessionMetadata,
|
|
887
|
+
responseId: turn.responseId ?? null,
|
|
888
|
+
turnIndex: turn.turnIndex,
|
|
889
|
+
responseStatus: inferredStatus,
|
|
890
|
+
endReason: args.reason ?? null,
|
|
891
|
+
},
|
|
892
|
+
})));
|
|
893
|
+
hasReportedRuns = true;
|
|
894
|
+
|
|
895
|
+
await sink.reportTraceEvents(withLinkage(buildRemoteTraceFinishPayload({
|
|
896
|
+
execution: turn.execution,
|
|
897
|
+
state: finishState,
|
|
898
|
+
responsePreview: outputPreview ?? undefined,
|
|
899
|
+
durationMs,
|
|
900
|
+
usage: {
|
|
901
|
+
inputTokens: responseUsage?.inputTokens,
|
|
902
|
+
outputTokens: responseUsage?.outputTokens,
|
|
903
|
+
cachedInputTokens: responseUsage?.cachedInputTokens,
|
|
904
|
+
},
|
|
905
|
+
payload: {
|
|
906
|
+
responseStatus: inferredStatus,
|
|
907
|
+
responseId: turn.responseId ?? null,
|
|
908
|
+
reason: args.reason ?? null,
|
|
909
|
+
},
|
|
910
|
+
errorMessage: finishState === "error" ? inferredStatus : undefined,
|
|
911
|
+
})));
|
|
912
|
+
|
|
913
|
+
await flushConversationLog({
|
|
914
|
+
execution: turn.execution,
|
|
915
|
+
messages: persistedMessages,
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
activeTurn = null;
|
|
919
|
+
return true;
|
|
920
|
+
};
|
|
921
|
+
|
|
922
|
+
const syncTurnsFromState = async (usage?: VoiceTracingUsageSnapshot) => {
|
|
923
|
+
const pending = getPendingUserMessagesFromState();
|
|
924
|
+
if (pending.length === 0) {
|
|
925
|
+
logVoiceTracing("no active turn found from state", {
|
|
926
|
+
sessionId,
|
|
927
|
+
latestMessageCount: latestMessages.length,
|
|
928
|
+
seenUserMessageCount: seenUserMessageIds.size,
|
|
929
|
+
});
|
|
930
|
+
return;
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
for (const pendingUser of pending) {
|
|
934
|
+
if (activeTurn) {
|
|
935
|
+
await finalizeActiveTurn({
|
|
936
|
+
nextUserMessageId: pendingUser.id,
|
|
937
|
+
forceInterrupt: !activeTurn.completedAt && !activeTurn.outputPreview,
|
|
938
|
+
reason: "next_user_turn",
|
|
939
|
+
});
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
logVoiceTracing("turn recovered from state", {
|
|
943
|
+
sessionId,
|
|
944
|
+
messageId: pendingUser.id,
|
|
945
|
+
transcriptPreview: preview(pendingUser.transcript, 120),
|
|
946
|
+
});
|
|
947
|
+
await startTurn(pendingUser.transcript, usage, pendingUser.id);
|
|
948
|
+
}
|
|
949
|
+
};
|
|
950
|
+
|
|
951
|
+
const enrichSessionEvent = (event: SessionEventInput): SessionEventInput => ({
|
|
952
|
+
...event,
|
|
953
|
+
agentName: event.agentName ?? config.agentName,
|
|
954
|
+
traceId: event.traceId ?? activeTurn?.execution.traceId,
|
|
955
|
+
agentRunId: event.agentRunId ?? activeTurn?.execution.runId,
|
|
956
|
+
});
|
|
957
|
+
|
|
958
|
+
const flushConversationLog = async (args: {
|
|
959
|
+
execution?: ReturnType<typeof createRemoteExecutionContext>;
|
|
960
|
+
messages?: PersistedConversationMessage[];
|
|
961
|
+
} = {}) => {
|
|
962
|
+
await ensureSessionBootstrap();
|
|
963
|
+
const execution = args.execution ?? createRemoteExecutionContext({
|
|
964
|
+
agentName: config.agentName,
|
|
965
|
+
sessionId,
|
|
966
|
+
requestedRunScope: "session",
|
|
967
|
+
turnId: "voice-session-flush",
|
|
968
|
+
turnIndex: turnCounter >= 0 ? turnCounter : 0,
|
|
969
|
+
});
|
|
970
|
+
const messages = args.messages ?? toPersistedConversationMessages(latestMessages);
|
|
971
|
+
const messageSignature = buildPersistedMessageSignature(messages);
|
|
972
|
+
const hasPendingSessionEvents = pendingSessionEvents.length > 0;
|
|
973
|
+
const shouldSkipReplayMessages = messageSignature === lastFlushedMessageSignature;
|
|
974
|
+
if (shouldSkipReplayMessages && !hasPendingSessionEvents) {
|
|
975
|
+
logVoiceTracing("conversation flush skipped", {
|
|
976
|
+
sessionId,
|
|
977
|
+
traceId: execution.traceId,
|
|
978
|
+
agentRunId: execution.runId,
|
|
979
|
+
reason: "duplicate_messages",
|
|
980
|
+
});
|
|
981
|
+
return;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
const messagesToPersist = shouldSkipReplayMessages ? [] : messages;
|
|
985
|
+
await sink.reportConversationLog(withLinkage(buildRemoteLogPayload({
|
|
986
|
+
execution,
|
|
987
|
+
userId,
|
|
988
|
+
sessionId,
|
|
989
|
+
messages: messagesToPersist,
|
|
990
|
+
modelId: config.modelId,
|
|
991
|
+
incrementMessageCount: false,
|
|
992
|
+
skipPipelines: true,
|
|
993
|
+
skipTraceTracking: true,
|
|
994
|
+
sessionMetadata,
|
|
995
|
+
sessionEvents: pendingSessionEvents.splice(0, pendingSessionEvents.length),
|
|
996
|
+
metadata: { source: "voice", phase: "session-flush" },
|
|
997
|
+
})));
|
|
998
|
+
if (!shouldSkipReplayMessages) {
|
|
999
|
+
lastFlushedMessageSignature = messageSignature;
|
|
1000
|
+
}
|
|
1001
|
+
logVoiceTracing("conversation flushed", {
|
|
1002
|
+
sessionId,
|
|
1003
|
+
traceId: execution.traceId,
|
|
1004
|
+
agentRunId: execution.runId,
|
|
1005
|
+
messageCount: messagesToPersist.length,
|
|
1006
|
+
replaySkipped: shouldSkipReplayMessages,
|
|
1007
|
+
});
|
|
1008
|
+
};
|
|
1009
|
+
|
|
1010
|
+
const emitSyntheticTurnsFromConversation = async (args: {
|
|
1011
|
+
reason?: string;
|
|
1012
|
+
responseAt?: string;
|
|
1013
|
+
responseId?: string;
|
|
1014
|
+
responseStatus?: string;
|
|
1015
|
+
usage?: VoiceTracingUsageSnapshot;
|
|
1016
|
+
} = {}) => {
|
|
1017
|
+
if (hasReportedRuns) {
|
|
1018
|
+
logVoiceTracing("synthetic turn recovery skipped", {
|
|
1019
|
+
sessionId,
|
|
1020
|
+
reason: args.reason ?? null,
|
|
1021
|
+
hasReportedRuns,
|
|
1022
|
+
});
|
|
1023
|
+
return false;
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
const turns = deriveSyntheticVoiceTurns(latestMessages);
|
|
1027
|
+
if (turns.length === 0) {
|
|
1028
|
+
logVoiceTracing("synthetic turn recovery found no turns", {
|
|
1029
|
+
sessionId,
|
|
1030
|
+
reason: args.reason ?? null,
|
|
1031
|
+
messageCount: latestMessages.length,
|
|
1032
|
+
});
|
|
1033
|
+
return false;
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
const baseCompletedAt = args.responseAt ? new Date(args.responseAt).getTime() : Date.now();
|
|
1037
|
+
logVoiceTracing("synthetic turns recovered", {
|
|
1038
|
+
sessionId,
|
|
1039
|
+
reason: args.reason ?? null,
|
|
1040
|
+
turnCount: turns.length,
|
|
1041
|
+
messageCount: latestMessages.length,
|
|
1042
|
+
responseStatus: args.responseStatus ?? null,
|
|
1043
|
+
});
|
|
1044
|
+
|
|
1045
|
+
let accumulatedSyntheticMessages: PersistedConversationMessage[] = [];
|
|
1046
|
+
for (const [index, turn] of turns.entries()) {
|
|
1047
|
+
const syntheticTurnId = generateId();
|
|
1048
|
+
const execution = createRemoteExecutionContext({
|
|
1049
|
+
agentName: config.agentName,
|
|
1050
|
+
sessionId,
|
|
1051
|
+
requestedRunScope: "session",
|
|
1052
|
+
turnId: syntheticTurnId,
|
|
1053
|
+
turnIndex: turn.turnIndex,
|
|
1054
|
+
});
|
|
1055
|
+
const syntheticCompletedAt = new Date(baseCompletedAt - Math.max(0, turns.length - index - 1) * 1000).toISOString();
|
|
1056
|
+
const syntheticStartedAt = new Date(new Date(syntheticCompletedAt).getTime() - 1000).toISOString();
|
|
1057
|
+
const isLastTurn = index === turns.length - 1;
|
|
1058
|
+
const responseUsage = isLastTurn ? usageToCostEvent("voice_chat", args.usage, config.modelId) : undefined;
|
|
1059
|
+
const status = isLastTurn && args.responseStatus
|
|
1060
|
+
? (args.responseStatus === "cancelled" || args.responseStatus === "incomplete"
|
|
1061
|
+
? "interrupted"
|
|
1062
|
+
: args.responseStatus === "failed"
|
|
1063
|
+
? "failed"
|
|
1064
|
+
: "completed")
|
|
1065
|
+
: turn.hasAssistantOutput
|
|
1066
|
+
? "completed"
|
|
1067
|
+
: "interrupted";
|
|
1068
|
+
const finishState = status === "completed" ? "completed" : "error";
|
|
1069
|
+
accumulatedSyntheticMessages = [...accumulatedSyntheticMessages, ...turn.messages];
|
|
1070
|
+
|
|
1071
|
+
await ensureSessionBootstrap();
|
|
1072
|
+
await sink.reportAgentRun(withLinkage(buildRemoteRunPayload({
|
|
1073
|
+
execution,
|
|
1074
|
+
userId,
|
|
1075
|
+
sessionId,
|
|
1076
|
+
modelId: config.modelId,
|
|
1077
|
+
status,
|
|
1078
|
+
inputPreview: turn.inputPreview,
|
|
1079
|
+
outputPreview: turn.outputPreview ?? undefined,
|
|
1080
|
+
triggerType: "voice",
|
|
1081
|
+
agentType: "voice",
|
|
1082
|
+
startedAt: syntheticStartedAt,
|
|
1083
|
+
completedAt: syntheticCompletedAt,
|
|
1084
|
+
durationMs: 1000,
|
|
1085
|
+
inputTokens: responseUsage?.inputTokens,
|
|
1086
|
+
outputTokens: responseUsage?.outputTokens,
|
|
1087
|
+
cachedInputTokens: responseUsage?.cachedInputTokens,
|
|
1088
|
+
costCentsOverride: responseUsage?.costCentsOverride,
|
|
1089
|
+
costEventType: "voice_chat",
|
|
1090
|
+
messages: accumulatedSyntheticMessages,
|
|
1091
|
+
incrementSessionMessageCount: false,
|
|
1092
|
+
skipConversationLog: true,
|
|
1093
|
+
skipPipelines: true,
|
|
1094
|
+
metadata: {
|
|
1095
|
+
...sessionMetadata,
|
|
1096
|
+
responseId: isLastTurn ? args.responseId ?? null : null,
|
|
1097
|
+
turnIndex: turn.turnIndex,
|
|
1098
|
+
responseStatus: status,
|
|
1099
|
+
recovery: "conversation_fallback",
|
|
1100
|
+
recoveryReason: args.reason ?? null,
|
|
1101
|
+
},
|
|
1102
|
+
})));
|
|
1103
|
+
hasReportedRuns = true;
|
|
1104
|
+
|
|
1105
|
+
const traceStartPayload = buildRemoteTraceStartPayload({
|
|
1106
|
+
execution,
|
|
1107
|
+
userId,
|
|
1108
|
+
sessionId,
|
|
1109
|
+
requestPreview: turn.inputPreview,
|
|
1110
|
+
modelId: config.modelId,
|
|
1111
|
+
metadata: {
|
|
1112
|
+
source: "voice",
|
|
1113
|
+
callId: config.callId,
|
|
1114
|
+
synthetic: true,
|
|
1115
|
+
recovery: "conversation_fallback",
|
|
1116
|
+
},
|
|
1117
|
+
startedAt: syntheticStartedAt,
|
|
1118
|
+
});
|
|
1119
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1120
|
+
...traceStartPayload,
|
|
1121
|
+
start: {
|
|
1122
|
+
...traceStartPayload.start,
|
|
1123
|
+
...(linkage.sessionDbId ? { sessionDbId: linkage.sessionDbId } : {}),
|
|
1124
|
+
...(linkage.agentRunDbId ? { agentRunDbId: linkage.agentRunDbId } : {}),
|
|
1125
|
+
},
|
|
1126
|
+
events: turn.toolEvents,
|
|
1127
|
+
finish: buildRemoteTraceFinishPayload({
|
|
1128
|
+
execution,
|
|
1129
|
+
state: finishState,
|
|
1130
|
+
responsePreview: turn.outputPreview ?? undefined,
|
|
1131
|
+
durationMs: 1000,
|
|
1132
|
+
usage: {
|
|
1133
|
+
inputTokens: responseUsage?.inputTokens,
|
|
1134
|
+
outputTokens: responseUsage?.outputTokens,
|
|
1135
|
+
cachedInputTokens: responseUsage?.cachedInputTokens,
|
|
1136
|
+
},
|
|
1137
|
+
errorMessage: finishState === "error" ? status : undefined,
|
|
1138
|
+
}).finish,
|
|
1139
|
+
}));
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
return true;
|
|
1143
|
+
};
|
|
1144
|
+
|
|
1145
|
+
return {
|
|
1146
|
+
handleSessionEvent(event: VoiceTracingSessionEvent) {
|
|
1147
|
+
void enqueueReporterTask(async () => {
|
|
1148
|
+
latestMessages = event.state.messages;
|
|
1149
|
+
logVoiceTracing("session state updated", {
|
|
1150
|
+
sessionId,
|
|
1151
|
+
messageCount: latestMessages.length,
|
|
1152
|
+
toolInvocationCount: event.state.toolInvocations.length,
|
|
1153
|
+
connectionStatus: event.state.connectionStatus,
|
|
1154
|
+
agentState: event.state.agentState,
|
|
1155
|
+
speechState: event.state.speechState,
|
|
1156
|
+
transcriptionStatus: event.state.transcriptionStatus,
|
|
1157
|
+
hasActiveTurn: !!activeTurn,
|
|
1158
|
+
});
|
|
1159
|
+
await syncTurnsFromState();
|
|
1160
|
+
});
|
|
1161
|
+
},
|
|
1162
|
+
async handleTelemetry(event: VoiceTracingTelemetryEvent) {
|
|
1163
|
+
return enqueueReporterTask(async () => {
|
|
1164
|
+
logVoiceTracing("telemetry received", {
|
|
1165
|
+
sessionId,
|
|
1166
|
+
type: event.type,
|
|
1167
|
+
responseId: "responseId" in event ? event.responseId ?? null : null,
|
|
1168
|
+
itemId: "itemId" in event ? event.itemId ?? null : null,
|
|
1169
|
+
toolCallId: "toolCallId" in event ? event.toolCallId ?? null : null,
|
|
1170
|
+
hasActiveTurn: !!activeTurn,
|
|
1171
|
+
});
|
|
1172
|
+
if (event.type === "voice.user.transcribed") {
|
|
1173
|
+
if (event.itemId && seenUserMessageIds.has(event.itemId)) {
|
|
1174
|
+
logVoiceTracing("duplicate user turn ignored", {
|
|
1175
|
+
sessionId,
|
|
1176
|
+
itemId: event.itemId,
|
|
1177
|
+
});
|
|
1178
|
+
return;
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
await syncTurnsFromState(event.usage);
|
|
1182
|
+
if (!activeTurn) {
|
|
1183
|
+
await startTurn(event.transcript, event.usage, event.itemId);
|
|
1184
|
+
}
|
|
1185
|
+
const sessionEvent = payloadForSessionEvent(event);
|
|
1186
|
+
if (sessionEvent) pendingSessionEvents.push(enrichSessionEvent(sessionEvent));
|
|
1187
|
+
return;
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
await syncTurnsFromState();
|
|
1191
|
+
|
|
1192
|
+
const sessionEvent = payloadForSessionEvent(event);
|
|
1193
|
+
if (sessionEvent) pendingSessionEvents.push(enrichSessionEvent(sessionEvent));
|
|
1194
|
+
|
|
1195
|
+
if (event.type === "voice.response.created" && activeTurn) {
|
|
1196
|
+
activeTurn.responseId = event.responseId;
|
|
1197
|
+
logVoiceTracing("response linked to turn", {
|
|
1198
|
+
sessionId,
|
|
1199
|
+
traceId: activeTurn.execution.traceId,
|
|
1200
|
+
runId: activeTurn.execution.runId,
|
|
1201
|
+
responseId: event.responseId,
|
|
1202
|
+
});
|
|
1203
|
+
return;
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
if (event.type === "voice.response.output.completed" && activeTurn) {
|
|
1207
|
+
const completionKey = buildAssistantCompletionKey(event);
|
|
1208
|
+
if (seenAssistantCompletionKeys.has(completionKey)) {
|
|
1209
|
+
logVoiceTracing("duplicate assistant completion ignored", {
|
|
1210
|
+
sessionId,
|
|
1211
|
+
responseId: event.responseId ?? null,
|
|
1212
|
+
itemId: event.itemId ?? null,
|
|
1213
|
+
});
|
|
1214
|
+
return;
|
|
1215
|
+
}
|
|
1216
|
+
seenAssistantCompletionKeys.add(completionKey);
|
|
1217
|
+
activeTurn.responseId = event.responseId ?? activeTurn.responseId;
|
|
1218
|
+
activeTurn.outputPreview = event.outputText
|
|
1219
|
+
? preview(event.outputText, 240)
|
|
1220
|
+
: findLatestMessageText(toPersistedConversationMessages(latestMessages), "assistant") ?? activeTurn.outputPreview;
|
|
1221
|
+
activeTurn.completedAt = event.at;
|
|
1222
|
+
activeTurn.completionStatus = event.status ?? "completed";
|
|
1223
|
+
logVoiceTracing("assistant output completed", {
|
|
1224
|
+
sessionId,
|
|
1225
|
+
traceId: activeTurn.execution.traceId,
|
|
1226
|
+
runId: activeTurn.execution.runId,
|
|
1227
|
+
responseId: activeTurn.responseId ?? null,
|
|
1228
|
+
itemId: event.itemId ?? null,
|
|
1229
|
+
outputPreview: activeTurn.outputPreview ?? null,
|
|
1230
|
+
});
|
|
1231
|
+
await ensureSessionBootstrap();
|
|
1232
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1233
|
+
traceId: activeTurn.execution.traceId,
|
|
1234
|
+
events: [{
|
|
1235
|
+
eventType: "assistant.completed",
|
|
1236
|
+
status: "completed",
|
|
1237
|
+
payload: {
|
|
1238
|
+
preview: activeTurn.outputPreview ?? null,
|
|
1239
|
+
responseId: activeTurn.responseId ?? null,
|
|
1240
|
+
itemId: event.itemId ?? null,
|
|
1241
|
+
},
|
|
1242
|
+
}],
|
|
1243
|
+
}));
|
|
1244
|
+
return;
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
if (event.type === "voice.tool.started" && activeTurn) {
|
|
1248
|
+
activeTurn.pendingToolCallIds.add(event.toolCallId);
|
|
1249
|
+
logVoiceTracing("tool started", {
|
|
1250
|
+
sessionId,
|
|
1251
|
+
traceId: activeTurn.execution.traceId,
|
|
1252
|
+
toolCallId: event.toolCallId,
|
|
1253
|
+
toolName: event.toolName,
|
|
1254
|
+
pendingCount: activeTurn.pendingToolCallIds.size,
|
|
1255
|
+
});
|
|
1256
|
+
await ensureSessionBootstrap();
|
|
1257
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1258
|
+
traceId: activeTurn.execution.traceId,
|
|
1259
|
+
events: [{
|
|
1260
|
+
eventType: "tool.started",
|
|
1261
|
+
spanKey: event.toolCallId,
|
|
1262
|
+
status: "active",
|
|
1263
|
+
payload: {
|
|
1264
|
+
toolCallId: event.toolCallId,
|
|
1265
|
+
toolName: event.toolName,
|
|
1266
|
+
inputPreview: JSON.stringify(event.input ?? {}).slice(0, 220),
|
|
1267
|
+
},
|
|
1268
|
+
}],
|
|
1269
|
+
}));
|
|
1270
|
+
return;
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
if (event.type === "voice.tool.completed" && activeTurn) {
|
|
1274
|
+
activeTurn.pendingToolCallIds.delete(event.toolCallId);
|
|
1275
|
+
logVoiceTracing("tool completed", {
|
|
1276
|
+
sessionId,
|
|
1277
|
+
traceId: activeTurn.execution.traceId,
|
|
1278
|
+
toolCallId: event.toolCallId,
|
|
1279
|
+
toolName: event.toolName,
|
|
1280
|
+
pendingCount: activeTurn.pendingToolCallIds.size,
|
|
1281
|
+
hasCompletedAt: !!activeTurn.completedAt,
|
|
1282
|
+
});
|
|
1283
|
+
await ensureSessionBootstrap();
|
|
1284
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1285
|
+
traceId: activeTurn.execution.traceId,
|
|
1286
|
+
events: [{
|
|
1287
|
+
eventType: "tool.completed",
|
|
1288
|
+
spanKey: event.toolCallId,
|
|
1289
|
+
status: "completed",
|
|
1290
|
+
payload: {
|
|
1291
|
+
toolCallId: event.toolCallId,
|
|
1292
|
+
toolName: event.toolName,
|
|
1293
|
+
outputPreview: JSON.stringify(event.output ?? {}).slice(0, 220),
|
|
1294
|
+
},
|
|
1295
|
+
}],
|
|
1296
|
+
}));
|
|
1297
|
+
// All tool calls done and response.done already fired — finalize the turn now.
|
|
1298
|
+
// This handles the case where there is no second response.done (single-response turns).
|
|
1299
|
+
if (activeTurn.pendingToolCallIds.size === 0 && activeTurn.completedAt) {
|
|
1300
|
+
logVoiceTracing("finalizing after last tool completed", {
|
|
1301
|
+
sessionId,
|
|
1302
|
+
traceId: activeTurn.execution.traceId,
|
|
1303
|
+
runId: activeTurn.execution.runId,
|
|
1304
|
+
});
|
|
1305
|
+
await finalizeActiveTurn({
|
|
1306
|
+
completedAt: activeTurn.completedAt,
|
|
1307
|
+
responseStatus: activeTurn.completionStatus,
|
|
1308
|
+
});
|
|
1309
|
+
}
|
|
1310
|
+
return;
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
if (event.type === "voice.tool.failed" && activeTurn) {
|
|
1314
|
+
activeTurn.pendingToolCallIds.delete(event.toolCallId);
|
|
1315
|
+
await ensureSessionBootstrap();
|
|
1316
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1317
|
+
traceId: activeTurn.execution.traceId,
|
|
1318
|
+
events: [{
|
|
1319
|
+
eventType: "tool.failed",
|
|
1320
|
+
spanKey: event.toolCallId,
|
|
1321
|
+
status: "error",
|
|
1322
|
+
payload: {
|
|
1323
|
+
toolCallId: event.toolCallId,
|
|
1324
|
+
toolName: event.toolName,
|
|
1325
|
+
errorMessage: event.error,
|
|
1326
|
+
},
|
|
1327
|
+
}],
|
|
1328
|
+
}));
|
|
1329
|
+
if (activeTurn.pendingToolCallIds.size === 0 && activeTurn.completedAt) {
|
|
1330
|
+
await finalizeActiveTurn({
|
|
1331
|
+
completedAt: activeTurn.completedAt,
|
|
1332
|
+
responseStatus: activeTurn.completionStatus,
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
return;
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
if ((event.type === "voice.assistant.started" || event.type === "voice.assistant.stopped") && activeTurn) {
|
|
1339
|
+
await ensureSessionBootstrap();
|
|
1340
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1341
|
+
traceId: activeTurn.execution.traceId,
|
|
1342
|
+
events: [{
|
|
1343
|
+
eventType: event.type,
|
|
1344
|
+
status: "completed",
|
|
1345
|
+
payload: {
|
|
1346
|
+
responseId: event.responseId,
|
|
1347
|
+
},
|
|
1348
|
+
}],
|
|
1349
|
+
}));
|
|
1350
|
+
return;
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
if (event.type === "voice.interrupted" && activeTurn) {
|
|
1354
|
+
await ensureSessionBootstrap();
|
|
1355
|
+
await sink.reportTraceEvents(withLinkage({
|
|
1356
|
+
traceId: activeTurn.execution.traceId,
|
|
1357
|
+
events: [{
|
|
1358
|
+
eventType: "trace.interrupted",
|
|
1359
|
+
status: "cancelled",
|
|
1360
|
+
payload: {
|
|
1361
|
+
reason: event.reason,
|
|
1362
|
+
responseId: event.responseId,
|
|
1363
|
+
},
|
|
1364
|
+
}],
|
|
1365
|
+
}));
|
|
1366
|
+
return;
|
|
1367
|
+
}
|
|
1368
|
+
|
|
1369
|
+
if (event.type === "voice.response.done" && activeTurn) {
|
|
1370
|
+
activeTurn.responseId = event.responseId;
|
|
1371
|
+
activeTurn.outputPreview = event.outputText
|
|
1372
|
+
? preview(event.outputText, 240)
|
|
1373
|
+
: activeTurn.outputPreview;
|
|
1374
|
+
activeTurn.completedAt = event.at;
|
|
1375
|
+
activeTurn.completionStatus = event.status === "cancelled" || event.status === "incomplete"
|
|
1376
|
+
? "interrupted"
|
|
1377
|
+
: event.status === "failed"
|
|
1378
|
+
? "failed"
|
|
1379
|
+
: "completed";
|
|
1380
|
+
|
|
1381
|
+
// The browser-session SDK fires agent_tool_start AFTER response.done, so
|
|
1382
|
+
// pendingToolCallIds may not yet be populated via voice.tool.started.
|
|
1383
|
+
// Pre-populate from rawEvent output items so we can defer finalization.
|
|
1384
|
+
// Use call_id first (matches voice.tool.completed.toolCallId), fall back to id.
|
|
1385
|
+
const rawResponse = typeof event.rawEvent?.response === "object" && event.rawEvent.response !== null
|
|
1386
|
+
? event.rawEvent.response as Record<string, unknown>
|
|
1387
|
+
: null;
|
|
1388
|
+
const rawOutputs = Array.isArray(rawResponse?.output) ? rawResponse.output as unknown[] : [];
|
|
1389
|
+
for (const output of rawOutputs) {
|
|
1390
|
+
if (typeof output === "object" && output !== null) {
|
|
1391
|
+
const item = output as Record<string, unknown>;
|
|
1392
|
+
if (item.type === "function_call") {
|
|
1393
|
+
if (typeof item.call_id === "string") activeTurn.pendingToolCallIds.add(item.call_id);
|
|
1394
|
+
else if (typeof item.id === "string") activeTurn.pendingToolCallIds.add(item.id);
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
logVoiceTracing("response done rawEvent check", {
|
|
1399
|
+
sessionId,
|
|
1400
|
+
traceId: activeTurn.execution.traceId,
|
|
1401
|
+
hasRawResponse: !!rawResponse,
|
|
1402
|
+
rawOutputCount: rawOutputs.length,
|
|
1403
|
+
functionCallCount: rawOutputs.filter((o) => typeof o === "object" && o !== null && (o as Record<string, unknown>).type === "function_call").length,
|
|
1404
|
+
pendingCount: activeTurn.pendingToolCallIds.size,
|
|
1405
|
+
});
|
|
1406
|
+
|
|
1407
|
+
if (activeTurn.pendingToolCallIds.size > 0) {
|
|
1408
|
+
// Tool calls detected — keep activeTurn alive so voice.tool.started and
|
|
1409
|
+
// voice.tool.completed can be properly traced against this turn.
|
|
1410
|
+
logVoiceTracing("response done deferred — tool calls in output", {
|
|
1411
|
+
sessionId,
|
|
1412
|
+
traceId: activeTurn.execution.traceId,
|
|
1413
|
+
runId: activeTurn.execution.runId,
|
|
1414
|
+
status: event.status,
|
|
1415
|
+
pendingCount: activeTurn.pendingToolCallIds.size,
|
|
1416
|
+
});
|
|
1417
|
+
return;
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
logVoiceTracing("response completed", {
|
|
1421
|
+
sessionId,
|
|
1422
|
+
traceId: activeTurn.execution.traceId,
|
|
1423
|
+
runId: activeTurn.execution.runId,
|
|
1424
|
+
status: event.status,
|
|
1425
|
+
outputPreview: activeTurn.outputPreview ?? null,
|
|
1426
|
+
});
|
|
1427
|
+
await finalizeActiveTurn({
|
|
1428
|
+
completedAt: event.at,
|
|
1429
|
+
responseStatus: activeTurn.completionStatus,
|
|
1430
|
+
usage: event.usage,
|
|
1431
|
+
});
|
|
1432
|
+
return;
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
if (event.type === "voice.response.done" && !activeTurn) {
|
|
1436
|
+
logVoiceTracing("response completed without active turn", {
|
|
1437
|
+
sessionId,
|
|
1438
|
+
responseId: event.responseId,
|
|
1439
|
+
status: event.status,
|
|
1440
|
+
latestMessageCount: latestMessages.length,
|
|
1441
|
+
});
|
|
1442
|
+
await emitSyntheticTurnsFromConversation({
|
|
1443
|
+
reason: "response.done_without_active_turn",
|
|
1444
|
+
responseAt: event.at,
|
|
1445
|
+
responseId: event.responseId,
|
|
1446
|
+
responseStatus: event.status,
|
|
1447
|
+
usage: event.usage,
|
|
1448
|
+
});
|
|
1449
|
+
}
|
|
1450
|
+
});
|
|
1451
|
+
},
|
|
1452
|
+
flushCallEnd(reason?: string) {
|
|
1453
|
+
void enqueueReporterTask(async () => {
|
|
1454
|
+
logVoiceTracing("flush call end", {
|
|
1455
|
+
sessionId,
|
|
1456
|
+
reason: reason ?? null,
|
|
1457
|
+
hasActiveTurn: !!activeTurn,
|
|
1458
|
+
});
|
|
1459
|
+
pendingSessionEvents.push({
|
|
1460
|
+
eventType: "voice.call.ended",
|
|
1461
|
+
payload: { callId: config.callId, reason },
|
|
1462
|
+
});
|
|
1463
|
+
|
|
1464
|
+
if (activeTurn) {
|
|
1465
|
+
await finalizeActiveTurn({
|
|
1466
|
+
completedAt: activeTurn.completedAt ?? new Date().toISOString(),
|
|
1467
|
+
responseStatus: activeTurn.completionStatus,
|
|
1468
|
+
forceInterrupt: !activeTurn.completedAt && !activeTurn.outputPreview,
|
|
1469
|
+
reason: reason ?? "disconnect",
|
|
1470
|
+
});
|
|
1471
|
+
await reportConnectionRunCompletion(reason);
|
|
1472
|
+
return;
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
await emitSyntheticTurnsFromConversation({
|
|
1476
|
+
reason: reason ?? "disconnect",
|
|
1477
|
+
});
|
|
1478
|
+
|
|
1479
|
+
await flushConversationLog();
|
|
1480
|
+
|
|
1481
|
+
await reportConnectionRunCompletion(reason);
|
|
1482
|
+
});
|
|
1483
|
+
},
|
|
1484
|
+
};
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
export const createVoiceTelemetryReporter = createVoiceTracingReporter;
|
|
1488
|
+
export type VoiceReportingSink = VoiceTracingAdapter;
|
|
1489
|
+
export type VoiceTelemetryReporterConfig = VoiceTracingReporterConfig;
|
|
1490
|
+
export type VoiceSessionEvent = VoiceTracingSessionEvent;
|
|
1491
|
+
export type VoiceTelemetryEvent = VoiceTracingTelemetryEvent;
|
|
1492
|
+
export type VoiceUsageSnapshot = VoiceTracingUsageSnapshot;
|