@kognitivedev/voice-tracing 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1249 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createVoiceTelemetryReporter = void 0;
4
+ exports.createVoiceTracingReporter = createVoiceTracingReporter;
5
+ const shared_1 = require("@kognitivedev/shared");
6
+ const VOICE_MODEL_PRICING_OVERRIDES = {
7
+ // Official OpenAI model docs as of 2026-04-06:
8
+ // gpt-realtime: $4/$16 text input/output per 1M, $32/$64 audio input/output per 1M, $0.40 cached input per 1M.
9
+ "gpt-realtime": {
10
+ textInputPerMillion: 4,
11
+ textOutputPerMillion: 16,
12
+ textCachedInputPerMillion: 0.4,
13
+ audioInputPerMillion: 32,
14
+ audioOutputPerMillion: 64,
15
+ audioCachedInputPerMillion: 0.4,
16
+ },
17
+ "gpt-realtime-1.5": {
18
+ textInputPerMillion: 4,
19
+ textOutputPerMillion: 16,
20
+ textCachedInputPerMillion: 0.4,
21
+ audioInputPerMillion: 32,
22
+ audioOutputPerMillion: 64,
23
+ audioCachedInputPerMillion: 0.4,
24
+ },
25
+ // GPT-4o Transcribe: $2.50 audio input and $10 text output per 1M.
26
+ "gpt-4o-transcribe": {
27
+ audioInputPerMillion: 2.5,
28
+ textOutputPerMillion: 10,
29
+ },
30
+ "gpt-4o-mini-transcribe": {
31
+ audioInputPerMillion: 1.25,
32
+ textOutputPerMillion: 5,
33
+ },
34
+ };
35
+ function getUserId(resourceId) {
36
+ return resourceId.userId ? String(resourceId.userId) : "system";
37
+ }
38
+ function logVoiceTracing(message, payload) {
39
+ console.log(`[VoiceTracing:reporter] ${message}`, payload);
40
+ }
41
+ function getSessionMetadata(config) {
42
+ var _a, _b;
43
+ return Object.assign({ kind: "voice", agentName: config.agentName, transport: (_a = config.transport) !== null && _a !== void 0 ? _a : "webrtc", callId: config.callId, modelId: config.modelId, transcriptionModelId: config.transcriptionModelId, voice: config.voice, sourceSessionId: config.resourceId.sessionId ? String(config.resourceId.sessionId) : undefined }, ((_b = config.metadata) !== null && _b !== void 0 ? _b : {}));
44
+ }
45
+ function extractReportingLinkage(value) {
46
+ if (!value || typeof value !== "object")
47
+ return {};
48
+ const record = value;
49
+ return {
50
+ sessionDbId: typeof record.sessionDbId === "string" ? record.sessionDbId : null,
51
+ agentRunDbId: typeof record.runDbId === "string"
52
+ ? record.runDbId
53
+ : typeof record.agentRunDbId === "string"
54
+ ? record.agentRunDbId
55
+ : null,
56
+ };
57
+ }
58
+ function preview(text, max = 240) {
59
+ return text.slice(0, max);
60
+ }
61
+ function roundCents(value) {
62
+ return Math.round(value * 1000000) / 1000000;
63
+ }
64
+ function toCostCents(tokens, dollarsPerMillion) {
65
+ if (!tokens || !dollarsPerMillion)
66
+ return 0;
67
+ return (tokens / 1000000) * dollarsPerMillion * 100;
68
+ }
69
+ function calculateVoiceUsageCostCents(usage, modelId) {
70
+ var _a, _b, _c, _d, _e, _f;
71
+ if (!usage || usage.type !== "tokens" || !modelId)
72
+ return undefined;
73
+ const pricing = VOICE_MODEL_PRICING_OVERRIDES[modelId];
74
+ if (!pricing)
75
+ return undefined;
76
+ const inputAudioTokens = (_a = usage.inputAudioTokens) !== null && _a !== void 0 ? _a : 0;
77
+ const inputTextTokens = (_b = usage.inputTextTokens) !== null && _b !== void 0 ? _b : 0;
78
+ const cachedAudioTokens = (_c = usage.cachedAudioTokens) !== null && _c !== void 0 ? _c : 0;
79
+ const cachedTextTokens = (_d = usage.cachedTextTokens) !== null && _d !== void 0 ? _d : 0;
80
+ const regularAudioTokens = Math.max(0, inputAudioTokens - cachedAudioTokens);
81
+ const regularTextTokens = Math.max(0, inputTextTokens - cachedTextTokens);
82
+ const outputAudioTokens = (_e = usage.outputAudioTokens) !== null && _e !== void 0 ? _e : 0;
83
+ const outputTextTokens = (_f = usage.outputTextTokens) !== null && _f !== void 0 ? _f : 0;
84
+ const detailedTokenCount = inputAudioTokens
85
+ + inputTextTokens
86
+ + outputAudioTokens
87
+ + outputTextTokens
88
+ + cachedAudioTokens
89
+ + cachedTextTokens;
90
+ if (detailedTokenCount === 0) {
91
+ return undefined;
92
+ }
93
+ const totalCostCents = toCostCents(regularAudioTokens, pricing.audioInputPerMillion)
94
+ + toCostCents(regularTextTokens, pricing.textInputPerMillion)
95
+ + toCostCents(cachedAudioTokens, pricing.audioCachedInputPerMillion)
96
+ + toCostCents(cachedTextTokens, pricing.textCachedInputPerMillion)
97
+ + toCostCents(outputAudioTokens, pricing.audioOutputPerMillion)
98
+ + toCostCents(outputTextTokens, pricing.textOutputPerMillion);
99
+ return roundCents(totalCostCents);
100
+ }
101
+ function normalizeImagePart(part) {
102
+ var _a;
103
+ if (typeof part.url === "string" && part.url.length > 0) {
104
+ return {
105
+ type: "image",
106
+ source: {
107
+ type: "url",
108
+ url: part.url,
109
+ },
110
+ };
111
+ }
112
+ if (typeof part.image !== "string" || part.image.length === 0) {
113
+ return null;
114
+ }
115
+ const dataUrlMatch = part.image.match(/^data:([^;,]+);base64,(.+)$/);
116
+ if (dataUrlMatch) {
117
+ return {
118
+ type: "image",
119
+ source: {
120
+ type: "base64",
121
+ media_type: dataUrlMatch[1],
122
+ data: dataUrlMatch[2],
123
+ },
124
+ };
125
+ }
126
+ return {
127
+ type: "image",
128
+ source: {
129
+ type: "base64",
130
+ media_type: (_a = part.mediaType) !== null && _a !== void 0 ? _a : "image/png",
131
+ data: part.image,
132
+ },
133
+ };
134
+ }
135
+ function normalizeFilePart(part) {
136
+ var _a, _b, _c;
137
+ return {
138
+ type: "file",
139
+ file: {
140
+ url: typeof part.url === "string" ? part.url : undefined,
141
+ name: (_a = part.filename) !== null && _a !== void 0 ? _a : part.name,
142
+ mime_type: (_c = (_b = part.mediaType) !== null && _b !== void 0 ? _b : part.mimeType) !== null && _c !== void 0 ? _c : part.mime_type,
143
+ },
144
+ };
145
+ }
146
+ function normalizeDynamicToolPart(part) {
147
+ var _a;
148
+ const parts = [];
149
+ if (part.input !== undefined) {
150
+ parts.push({
151
+ type: "tool-call",
152
+ toolCallId: part.toolCallId,
153
+ toolName: part.toolName,
154
+ input: part.input,
155
+ });
156
+ }
157
+ if (part.output !== undefined || part.errorText) {
158
+ parts.push(Object.assign({ type: "tool-result", toolCallId: part.toolCallId, toolName: part.toolName, result: (_a = part.output) !== null && _a !== void 0 ? _a : part.errorText }, (part.state === "error" || part.errorText ? { isError: true } : {})));
159
+ }
160
+ if (parts.length === 0) {
161
+ parts.push({
162
+ type: "text",
163
+ text: `${part.toolName} ${part.state}`,
164
+ });
165
+ }
166
+ return parts;
167
+ }
168
+ function normalizeConversationParts(parts) {
169
+ const normalized = [];
170
+ const textBuffer = [];
171
+ const flushText = () => {
172
+ const text = textBuffer.join(" ").trim();
173
+ if (!text)
174
+ return;
175
+ normalized.push({ type: "text", text });
176
+ textBuffer.length = 0;
177
+ };
178
+ for (const part of parts) {
179
+ switch (part.type) {
180
+ case "text":
181
+ case "reasoning":
182
+ if (part.text.trim())
183
+ textBuffer.push(part.text.trim());
184
+ break;
185
+ case "tool-call":
186
+ flushText();
187
+ normalized.push({
188
+ type: "tool-call",
189
+ toolCallId: part.toolCallId,
190
+ toolName: part.toolName,
191
+ input: part.input,
192
+ });
193
+ break;
194
+ case "tool-result":
195
+ flushText();
196
+ normalized.push(Object.assign({ type: "tool-result", toolCallId: part.toolCallId, toolName: part.toolName, output: part.output, result: part.result }, (part.isError ? { isError: true } : {})));
197
+ break;
198
+ case "image": {
199
+ flushText();
200
+ const imagePart = normalizeImagePart(part);
201
+ if (imagePart)
202
+ normalized.push(imagePart);
203
+ break;
204
+ }
205
+ case "file":
206
+ flushText();
207
+ normalized.push(normalizeFilePart(part));
208
+ break;
209
+ case "dynamic-tool":
210
+ flushText();
211
+ normalized.push(...normalizeDynamicToolPart(part));
212
+ break;
213
+ case "data":
214
+ flushText();
215
+ normalized.push({
216
+ type: "text",
217
+ text: typeof part.data === "string" ? part.data : JSON.stringify(part.data, null, 2),
218
+ });
219
+ break;
220
+ default:
221
+ break;
222
+ }
223
+ }
224
+ flushText();
225
+ return normalized;
226
+ }
227
+ function toPersistedConversationMessages(messages) {
228
+ return messages.reduce((acc, message) => {
229
+ const normalizedParts = normalizeConversationParts(message.parts);
230
+ if (normalizedParts.length === 0)
231
+ return acc;
232
+ if (normalizedParts.length === 1 && normalizedParts[0].type === "text" && typeof normalizedParts[0].text === "string") {
233
+ acc.push({
234
+ role: message.role,
235
+ content: normalizedParts[0].text,
236
+ metadata: message.metadata,
237
+ });
238
+ return acc;
239
+ }
240
+ acc.push({
241
+ role: message.role,
242
+ content: normalizedParts,
243
+ metadata: message.metadata,
244
+ });
245
+ return acc;
246
+ }, []);
247
+ }
248
+ function getPersistedMessageText(message) {
249
+ if (typeof message.content === "string")
250
+ return message.content;
251
+ return message.content
252
+ .map((part) => {
253
+ if (part.type === "text" && typeof part.text === "string")
254
+ return part.text;
255
+ if (part.type === "tool-call" && typeof part.toolName === "string")
256
+ return `Called ${part.toolName}`;
257
+ if (part.type === "tool-result" && typeof part.toolName === "string")
258
+ return `Received ${part.toolName} result`;
259
+ return "";
260
+ })
261
+ .filter(Boolean)
262
+ .join(" ")
263
+ .trim();
264
+ }
265
+ function findLatestMessageText(messages, role) {
266
+ for (let index = messages.length - 1; index >= 0; index -= 1) {
267
+ const message = messages[index];
268
+ if (message.role !== role)
269
+ continue;
270
+ const text = getPersistedMessageText(message);
271
+ if (text)
272
+ return text;
273
+ }
274
+ return null;
275
+ }
276
+ function buildPersistedMessageSignature(messages) {
277
+ return JSON.stringify(messages);
278
+ }
279
+ function getUiMessageText(message) {
280
+ return message.parts
281
+ .flatMap((part) => {
282
+ if (part.type === "text" || part.type === "reasoning") {
283
+ return part.text.trim() ? [part.text.trim()] : [];
284
+ }
285
+ return [];
286
+ })
287
+ .join(" ")
288
+ .trim();
289
+ }
290
+ function buildAssistantCompletionKey(event) {
291
+ var _a;
292
+ if (event.itemId) {
293
+ return `item:${event.itemId}`;
294
+ }
295
+ if (event.responseId && event.outputText) {
296
+ return `response:${event.responseId}:${event.outputText}`;
297
+ }
298
+ if (event.responseId) {
299
+ return `response:${event.responseId}`;
300
+ }
301
+ return `text:${(_a = event.outputText) !== null && _a !== void 0 ? _a : ""}`;
302
+ }
303
+ function buildTurnPreview(messages, role, maxLength) {
304
+ const text = messages
305
+ .filter((message) => message.role === role)
306
+ .map(getPersistedMessageText)
307
+ .filter(Boolean)
308
+ .join("\n")
309
+ .trim();
310
+ return preview(text, maxLength);
311
+ }
312
+ function extractSyntheticToolEvents(messages) {
313
+ var _a, _b, _c, _d, _e;
314
+ const events = [];
315
+ for (const message of messages) {
316
+ if (!Array.isArray(message.content))
317
+ continue;
318
+ for (const part of message.content) {
319
+ if (part.type === "tool-call" && typeof part.toolCallId === "string" && typeof part.toolName === "string") {
320
+ events.push({
321
+ eventType: "tool.started",
322
+ spanKey: part.toolCallId,
323
+ status: "active",
324
+ payload: {
325
+ toolCallId: part.toolCallId,
326
+ toolName: part.toolName,
327
+ inputPreview: JSON.stringify((_a = part.input) !== null && _a !== void 0 ? _a : {}).slice(0, 220),
328
+ },
329
+ });
330
+ continue;
331
+ }
332
+ if (part.type === "tool-result" && typeof part.toolCallId === "string" && typeof part.toolName === "string") {
333
+ const isError = part.isError === true;
334
+ events.push({
335
+ eventType: isError ? "tool.failed" : "tool.completed",
336
+ spanKey: part.toolCallId,
337
+ status: isError ? "error" : "completed",
338
+ payload: Object.assign({ toolCallId: part.toolCallId, toolName: part.toolName }, (isError
339
+ ? { errorMessage: JSON.stringify((_c = (_b = part.result) !== null && _b !== void 0 ? _b : part.output) !== null && _c !== void 0 ? _c : "Tool execution failed").slice(0, 220) }
340
+ : { outputPreview: JSON.stringify((_e = (_d = part.result) !== null && _d !== void 0 ? _d : part.output) !== null && _e !== void 0 ? _e : {}).slice(0, 220) })),
341
+ });
342
+ }
343
+ }
344
+ }
345
+ return events;
346
+ }
347
+ function deriveSyntheticVoiceTurns(messages) {
348
+ const persistedMessages = toPersistedConversationMessages(messages);
349
+ if (persistedMessages.length === 0)
350
+ return [];
351
+ const turns = [];
352
+ let currentMessages = [];
353
+ let turnIndex = -1;
354
+ const finalizeCurrentTurn = () => {
355
+ if (currentMessages.length === 0)
356
+ return;
357
+ const inputPreview = buildTurnPreview(currentMessages, "user", 220);
358
+ const outputPreview = buildTurnPreview(currentMessages, "assistant", 240) || null;
359
+ const hasAssistantOutput = currentMessages.some((message) => message.role === "assistant");
360
+ if (!inputPreview && !hasAssistantOutput) {
361
+ currentMessages = [];
362
+ return;
363
+ }
364
+ turnIndex += 1;
365
+ turns.push({
366
+ turnIndex,
367
+ messages: currentMessages,
368
+ inputPreview,
369
+ outputPreview,
370
+ toolEvents: extractSyntheticToolEvents(currentMessages),
371
+ hasAssistantOutput,
372
+ });
373
+ currentMessages = [];
374
+ };
375
+ for (const message of persistedMessages) {
376
+ if (message.role === "user" && currentMessages.some((current) => current.role !== "user")) {
377
+ finalizeCurrentTurn();
378
+ }
379
+ currentMessages.push(message);
380
+ }
381
+ finalizeCurrentTurn();
382
+ return turns;
383
+ }
384
+ function usageToCostEvent(eventType, usage, modelId) {
385
+ var _a, _b, _c;
386
+ if (!usage)
387
+ return undefined;
388
+ const costCentsOverride = calculateVoiceUsageCostCents(usage, modelId);
389
+ if (usage.type !== "tokens") {
390
+ return {
391
+ eventType,
392
+ modelId,
393
+ costCentsOverride,
394
+ };
395
+ }
396
+ return {
397
+ eventType,
398
+ modelId,
399
+ inputTokens: (_a = usage.inputTokens) !== null && _a !== void 0 ? _a : 0,
400
+ outputTokens: (_b = usage.outputTokens) !== null && _b !== void 0 ? _b : 0,
401
+ cachedInputTokens: (_c = usage.cachedInputTokens) !== null && _c !== void 0 ? _c : 0,
402
+ costCentsOverride,
403
+ };
404
+ }
405
+ function payloadForSessionEvent(event) {
406
+ switch (event.type) {
407
+ case "voice.user.transcribed":
408
+ return {
409
+ eventType: "voice.user.transcribed",
410
+ payload: {
411
+ transcript: event.transcript,
412
+ itemId: event.itemId,
413
+ },
414
+ };
415
+ case "voice.assistant.started":
416
+ return {
417
+ eventType: "voice.assistant.started",
418
+ payload: {
419
+ responseId: event.responseId,
420
+ },
421
+ };
422
+ case "voice.assistant.stopped":
423
+ return {
424
+ eventType: "voice.assistant.stopped",
425
+ payload: {
426
+ responseId: event.responseId,
427
+ },
428
+ };
429
+ case "voice.tool.started":
430
+ return {
431
+ eventType: "tool.started",
432
+ payload: {
433
+ toolCallId: event.toolCallId,
434
+ toolName: event.toolName,
435
+ input: event.input,
436
+ },
437
+ };
438
+ case "voice.tool.completed":
439
+ return {
440
+ eventType: "tool.completed",
441
+ payload: {
442
+ toolCallId: event.toolCallId,
443
+ toolName: event.toolName,
444
+ output: event.output,
445
+ },
446
+ };
447
+ case "voice.tool.failed":
448
+ return {
449
+ eventType: "tool.failed",
450
+ payload: {
451
+ toolCallId: event.toolCallId,
452
+ toolName: event.toolName,
453
+ error: event.error,
454
+ },
455
+ };
456
+ case "voice.interrupted":
457
+ return {
458
+ eventType: "voice.turn.interrupted",
459
+ payload: {
460
+ responseId: event.responseId,
461
+ reason: event.reason,
462
+ },
463
+ };
464
+ case "voice.response.done":
465
+ return {
466
+ eventType: "voice.turn.completed",
467
+ payload: {
468
+ responseId: event.responseId,
469
+ status: event.status,
470
+ outputText: event.outputText,
471
+ },
472
+ };
473
+ case "voice.response.output.completed":
474
+ return {
475
+ eventType: "voice.assistant.output.completed",
476
+ payload: {
477
+ responseId: event.responseId,
478
+ itemId: event.itemId,
479
+ status: event.status,
480
+ outputText: event.outputText,
481
+ },
482
+ };
483
+ default:
484
+ return null;
485
+ }
486
+ }
487
+ function createVoiceTracingReporter(config) {
488
+ var _a, _b;
489
+ const sink = config.adapter;
490
+ const sessionId = config.callId;
491
+ const userId = getUserId(config.resourceId);
492
+ const sessionMetadata = getSessionMetadata(config);
493
+ const pendingSessionEvents = [{
494
+ eventType: "voice.call.started",
495
+ payload: {
496
+ callId: config.callId,
497
+ modelId: config.modelId,
498
+ voice: config.voice,
499
+ transport: (_a = config.transport) !== null && _a !== void 0 ? _a : "webrtc",
500
+ },
501
+ }];
502
+ const connectionRun = (0, shared_1.createRemoteRunContext)({
503
+ agentName: config.agentName,
504
+ sessionId,
505
+ requestedRunScope: "session",
506
+ });
507
+ let activeTurn = null;
508
+ let turnCounter = -1;
509
+ let latestMessages = [];
510
+ const seenUserMessageIds = new Set();
511
+ const seenAssistantCompletionKeys = new Set();
512
+ let hasReportedRuns = false;
513
+ let reporterQueue = Promise.resolve();
514
+ let linkage = {};
515
+ let lastFlushedMessageSignature = null;
516
+ logVoiceTracing("initialized", {
517
+ sessionId,
518
+ agentName: config.agentName,
519
+ modelId: config.modelId,
520
+ transcriptionModelId: (_b = config.transcriptionModelId) !== null && _b !== void 0 ? _b : null,
521
+ });
522
+ const enqueueReporterTask = (task) => {
523
+ const nextTask = reporterQueue.then(task, task);
524
+ reporterQueue = nextTask.then(() => undefined, () => undefined);
525
+ return nextTask;
526
+ };
527
+ const ensureSessionBootstrap = async () => {
528
+ if (linkage.sessionDbId || linkage.agentRunDbId) {
529
+ return linkage;
530
+ }
531
+ const runResult = await sink.reportAgentRun((0, shared_1.buildRemoteRunPayload)({
532
+ execution: connectionRun,
533
+ userId,
534
+ sessionId,
535
+ modelId: config.modelId,
536
+ status: "running",
537
+ inputPreview: "",
538
+ triggerType: "voice",
539
+ agentType: "voice",
540
+ metadata: sessionMetadata,
541
+ sessionMetadata,
542
+ skipConversationLog: true,
543
+ skipPipelines: true,
544
+ skipCostRecording: true,
545
+ }));
546
+ linkage = Object.assign(Object.assign({}, linkage), extractReportingLinkage(runResult));
547
+ await sink.reportConversationLog((0, shared_1.buildRemoteLogPayload)({
548
+ execution: (0, shared_1.createRemoteExecutionContext)({
549
+ agentName: config.agentName,
550
+ sessionId,
551
+ requestedRunScope: "session",
552
+ turnId: "voice-call-start",
553
+ turnIndex: 0,
554
+ }),
555
+ userId,
556
+ sessionId,
557
+ messages: [],
558
+ modelId: config.modelId,
559
+ sessionMetadata,
560
+ incrementMessageCount: false,
561
+ skipPipelines: true,
562
+ skipTraceTracking: true,
563
+ sessionEvents: pendingSessionEvents.splice(0, pendingSessionEvents.length),
564
+ metadata: { source: "voice", phase: "call-start" },
565
+ }));
566
+ return linkage;
567
+ };
568
+ const withLinkage = (payload) => (Object.assign(Object.assign(Object.assign({}, payload), (linkage.sessionDbId ? { sessionDbId: linkage.sessionDbId } : {})), (linkage.agentRunDbId ? { agentRunDbId: linkage.agentRunDbId } : {})));
569
+ const reportConnectionRunCompletion = async (reason) => {
570
+ await ensureSessionBootstrap();
571
+ await sink.reportAgentRun(withLinkage((0, shared_1.buildRemoteRunPayload)({
572
+ execution: connectionRun,
573
+ finalizeRun: true,
574
+ userId,
575
+ sessionId,
576
+ modelId: config.modelId,
577
+ status: "completed",
578
+ triggerType: "voice",
579
+ agentType: "voice",
580
+ messages: toPersistedConversationMessages(latestMessages),
581
+ metadata: Object.assign(Object.assign({}, sessionMetadata), { endReason: reason !== null && reason !== void 0 ? reason : "disconnect" }),
582
+ sessionMetadata,
583
+ skipConversationLog: true,
584
+ skipPipelines: true,
585
+ skipCostRecording: true,
586
+ incrementSessionMessageCount: false,
587
+ completedAt: new Date().toISOString(),
588
+ })));
589
+ };
590
+ const startTurn = async (transcript, usage, startMessageId) => {
591
+ var _a, _b, _c;
592
+ turnCounter += 1;
593
+ const turnId = (0, shared_1.generateId)();
594
+ const startedAt = new Date().toISOString();
595
+ const execution = (0, shared_1.createRemoteExecutionContext)({
596
+ agentName: config.agentName,
597
+ sessionId,
598
+ requestedRunScope: "session",
599
+ turnId,
600
+ turnIndex: turnCounter,
601
+ });
602
+ activeTurn = {
603
+ execution,
604
+ turnIndex: turnCounter,
605
+ startedAt,
606
+ inputPreview: preview(transcript, 220),
607
+ startMessageId,
608
+ transcriptionUsage: usageToCostEvent("voice_transcription", usage, (_a = config.transcriptionModelId) !== null && _a !== void 0 ? _a : config.modelId),
609
+ pendingToolCallIds: new Set(),
610
+ };
611
+ if (startMessageId) {
612
+ seenUserMessageIds.add(startMessageId);
613
+ }
614
+ logVoiceTracing("turn started", {
615
+ sessionId,
616
+ runId: execution.runId,
617
+ turnId,
618
+ traceId: execution.traceId,
619
+ turnIndex: activeTurn.turnIndex,
620
+ transcriptPreview: activeTurn.inputPreview,
621
+ startMessageId: startMessageId !== null && startMessageId !== void 0 ? startMessageId : null,
622
+ hasUsage: !!usage,
623
+ transcriptionCostCentsOverride: (_c = (_b = activeTurn.transcriptionUsage) === null || _b === void 0 ? void 0 : _b.costCentsOverride) !== null && _c !== void 0 ? _c : null,
624
+ });
625
+ await ensureSessionBootstrap();
626
+ const traceStartPayload = (0, shared_1.buildRemoteTraceStartPayload)({
627
+ execution,
628
+ userId,
629
+ sessionId,
630
+ requestPreview: activeTurn.inputPreview,
631
+ modelId: config.modelId,
632
+ metadata: {
633
+ source: "voice",
634
+ callId: config.callId,
635
+ },
636
+ startedAt,
637
+ });
638
+ await sink.reportTraceEvents(withLinkage(Object.assign(Object.assign({}, traceStartPayload), { start: Object.assign(Object.assign(Object.assign({}, traceStartPayload.start), (linkage.sessionDbId ? { sessionDbId: linkage.sessionDbId } : {})), (linkage.agentRunDbId ? { agentRunDbId: linkage.agentRunDbId } : {})) })));
639
+ };
640
+ const getPendingUserMessagesFromState = () => latestMessages
641
+ .filter((message) => message.role === "user" && !seenUserMessageIds.has(message.id))
642
+ .map((message) => ({
643
+ id: message.id,
644
+ transcript: getUiMessageText(message),
645
+ }))
646
+ .filter((message) => message.transcript.length > 0);
647
+ const getTurnMessages = (turn, nextUserMessageId) => {
648
+ const startIndex = turn.startMessageId
649
+ ? latestMessages.findIndex((message) => message.id === turn.startMessageId)
650
+ : -1;
651
+ const nextUserIndex = nextUserMessageId
652
+ ? latestMessages.findIndex((message) => message.id === nextUserMessageId)
653
+ : -1;
654
+ const sliceStart = startIndex >= 0 ? startIndex : 0;
655
+ const sliceEnd = nextUserIndex > sliceStart ? nextUserIndex : latestMessages.length;
656
+ return latestMessages.slice(sliceStart, sliceEnd);
657
+ };
658
+ const finalizeActiveTurn = async (args = {}) => {
659
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
660
+ if (!activeTurn)
661
+ return false;
662
+ const turn = activeTurn;
663
+ const turnMessages = getTurnMessages(turn, args.nextUserMessageId);
664
+ const responseUsage = usageToCostEvent("voice_chat", args.usage, config.modelId);
665
+ const fallbackAssistantPreview = findLatestMessageText(toPersistedConversationMessages(turnMessages), "assistant");
666
+ // Send cumulative messages up to the end of this turn (not beyond the next user turn's start).
667
+ const nextUserIndex = args.nextUserMessageId
668
+ ? latestMessages.findIndex((message) => message.id === args.nextUserMessageId)
669
+ : -1;
670
+ const turnEndIndex = nextUserIndex > 0 ? nextUserIndex : latestMessages.length;
671
+ const persistedMessages = toPersistedConversationMessages(latestMessages.slice(0, turnEndIndex));
672
+ const outputPreview = (_b = (_a = turn.outputPreview) !== null && _a !== void 0 ? _a : fallbackAssistantPreview) !== null && _b !== void 0 ? _b : undefined;
673
+ const completedAt = (_d = (_c = args.completedAt) !== null && _c !== void 0 ? _c : turn.completedAt) !== null && _d !== void 0 ? _d : new Date().toISOString();
674
+ const durationMs = Math.max(0, new Date(completedAt).getTime() - new Date(turn.startedAt).getTime());
675
+ const inferredStatus = args.forceInterrupt
676
+ ? "interrupted"
677
+ : (_f = (_e = args.responseStatus) !== null && _e !== void 0 ? _e : turn.completionStatus) !== null && _f !== void 0 ? _f : (outputPreview ? "completed" : "interrupted");
678
+ const finishState = inferredStatus === "completed" ? "completed" : "error";
679
+ logVoiceTracing("turn finalized", {
680
+ sessionId,
681
+ traceId: turn.execution.traceId,
682
+ runId: turn.execution.runId,
683
+ status: inferredStatus,
684
+ nextUserMessageId: (_g = args.nextUserMessageId) !== null && _g !== void 0 ? _g : null,
685
+ messageCount: persistedMessages.length,
686
+ outputPreview: outputPreview !== null && outputPreview !== void 0 ? outputPreview : null,
687
+ });
688
+ await ensureSessionBootstrap();
689
+ await sink.reportAgentRun(withLinkage((0, shared_1.buildRemoteRunPayload)({
690
+ execution: turn.execution,
691
+ userId,
692
+ sessionId,
693
+ modelId: config.modelId,
694
+ status: inferredStatus,
695
+ inputPreview: turn.inputPreview,
696
+ outputPreview,
697
+ triggerType: "voice",
698
+ agentType: "voice",
699
+ durationMs,
700
+ startedAt: turn.startedAt,
701
+ completedAt,
702
+ inputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.inputTokens,
703
+ outputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.outputTokens,
704
+ cachedInputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.cachedInputTokens,
705
+ costCentsOverride: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.costCentsOverride,
706
+ costEventType: responseUsage ? "voice_chat" : undefined,
707
+ additionalCostEvents: turn.transcriptionUsage ? [turn.transcriptionUsage] : [],
708
+ messages: persistedMessages,
709
+ metadata: Object.assign(Object.assign({}, sessionMetadata), { responseId: (_h = turn.responseId) !== null && _h !== void 0 ? _h : null, turnIndex: turn.turnIndex, responseStatus: inferredStatus, endReason: (_j = args.reason) !== null && _j !== void 0 ? _j : null }),
710
+ })));
711
+ hasReportedRuns = true;
712
+ await sink.reportTraceEvents(withLinkage((0, shared_1.buildRemoteTraceFinishPayload)({
713
+ execution: turn.execution,
714
+ state: finishState,
715
+ responsePreview: outputPreview !== null && outputPreview !== void 0 ? outputPreview : undefined,
716
+ durationMs,
717
+ usage: {
718
+ inputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.inputTokens,
719
+ outputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.outputTokens,
720
+ cachedInputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.cachedInputTokens,
721
+ },
722
+ payload: {
723
+ responseStatus: inferredStatus,
724
+ responseId: (_k = turn.responseId) !== null && _k !== void 0 ? _k : null,
725
+ reason: (_l = args.reason) !== null && _l !== void 0 ? _l : null,
726
+ },
727
+ errorMessage: finishState === "error" ? inferredStatus : undefined,
728
+ })));
729
+ await flushConversationLog({
730
+ execution: turn.execution,
731
+ messages: persistedMessages,
732
+ });
733
+ activeTurn = null;
734
+ return true;
735
+ };
736
+ const syncTurnsFromState = async (usage) => {
737
+ const pending = getPendingUserMessagesFromState();
738
+ if (pending.length === 0) {
739
+ logVoiceTracing("no active turn found from state", {
740
+ sessionId,
741
+ latestMessageCount: latestMessages.length,
742
+ seenUserMessageCount: seenUserMessageIds.size,
743
+ });
744
+ return;
745
+ }
746
+ for (const pendingUser of pending) {
747
+ if (activeTurn) {
748
+ await finalizeActiveTurn({
749
+ nextUserMessageId: pendingUser.id,
750
+ forceInterrupt: !activeTurn.completedAt && !activeTurn.outputPreview,
751
+ reason: "next_user_turn",
752
+ });
753
+ }
754
+ logVoiceTracing("turn recovered from state", {
755
+ sessionId,
756
+ messageId: pendingUser.id,
757
+ transcriptPreview: preview(pendingUser.transcript, 120),
758
+ });
759
+ await startTurn(pendingUser.transcript, usage, pendingUser.id);
760
+ }
761
+ };
762
+ const enrichSessionEvent = (event) => {
763
+ var _a, _b, _c;
764
+ return (Object.assign(Object.assign({}, event), { agentName: (_a = event.agentName) !== null && _a !== void 0 ? _a : config.agentName, traceId: (_b = event.traceId) !== null && _b !== void 0 ? _b : activeTurn === null || activeTurn === void 0 ? void 0 : activeTurn.execution.traceId, agentRunId: (_c = event.agentRunId) !== null && _c !== void 0 ? _c : activeTurn === null || activeTurn === void 0 ? void 0 : activeTurn.execution.runId }));
765
+ };
766
+ const flushConversationLog = async (args = {}) => {
767
+ var _a, _b;
768
+ await ensureSessionBootstrap();
769
+ const execution = (_a = args.execution) !== null && _a !== void 0 ? _a : (0, shared_1.createRemoteExecutionContext)({
770
+ agentName: config.agentName,
771
+ sessionId,
772
+ requestedRunScope: "session",
773
+ turnId: "voice-session-flush",
774
+ turnIndex: turnCounter >= 0 ? turnCounter : 0,
775
+ });
776
+ const messages = (_b = args.messages) !== null && _b !== void 0 ? _b : toPersistedConversationMessages(latestMessages);
777
+ const messageSignature = buildPersistedMessageSignature(messages);
778
+ const hasPendingSessionEvents = pendingSessionEvents.length > 0;
779
+ const shouldSkipReplayMessages = messageSignature === lastFlushedMessageSignature;
780
+ if (shouldSkipReplayMessages && !hasPendingSessionEvents) {
781
+ logVoiceTracing("conversation flush skipped", {
782
+ sessionId,
783
+ traceId: execution.traceId,
784
+ agentRunId: execution.runId,
785
+ reason: "duplicate_messages",
786
+ });
787
+ return;
788
+ }
789
+ const messagesToPersist = shouldSkipReplayMessages ? [] : messages;
790
+ await sink.reportConversationLog(withLinkage((0, shared_1.buildRemoteLogPayload)({
791
+ execution,
792
+ userId,
793
+ sessionId,
794
+ messages: messagesToPersist,
795
+ modelId: config.modelId,
796
+ incrementMessageCount: false,
797
+ skipPipelines: true,
798
+ skipTraceTracking: true,
799
+ sessionMetadata,
800
+ sessionEvents: pendingSessionEvents.splice(0, pendingSessionEvents.length),
801
+ metadata: { source: "voice", phase: "session-flush" },
802
+ })));
803
+ if (!shouldSkipReplayMessages) {
804
+ lastFlushedMessageSignature = messageSignature;
805
+ }
806
+ logVoiceTracing("conversation flushed", {
807
+ sessionId,
808
+ traceId: execution.traceId,
809
+ agentRunId: execution.runId,
810
+ messageCount: messagesToPersist.length,
811
+ replaySkipped: shouldSkipReplayMessages,
812
+ });
813
+ };
814
+ const emitSyntheticTurnsFromConversation = async (args = {}) => {
815
+ var _a, _b, _c, _d, _e, _f, _g, _h;
816
+ if (hasReportedRuns) {
817
+ logVoiceTracing("synthetic turn recovery skipped", {
818
+ sessionId,
819
+ reason: (_a = args.reason) !== null && _a !== void 0 ? _a : null,
820
+ hasReportedRuns,
821
+ });
822
+ return false;
823
+ }
824
+ const turns = deriveSyntheticVoiceTurns(latestMessages);
825
+ if (turns.length === 0) {
826
+ logVoiceTracing("synthetic turn recovery found no turns", {
827
+ sessionId,
828
+ reason: (_b = args.reason) !== null && _b !== void 0 ? _b : null,
829
+ messageCount: latestMessages.length,
830
+ });
831
+ return false;
832
+ }
833
+ const baseCompletedAt = args.responseAt ? new Date(args.responseAt).getTime() : Date.now();
834
+ logVoiceTracing("synthetic turns recovered", {
835
+ sessionId,
836
+ reason: (_c = args.reason) !== null && _c !== void 0 ? _c : null,
837
+ turnCount: turns.length,
838
+ messageCount: latestMessages.length,
839
+ responseStatus: (_d = args.responseStatus) !== null && _d !== void 0 ? _d : null,
840
+ });
841
+ let accumulatedSyntheticMessages = [];
842
+ for (const [index, turn] of turns.entries()) {
843
+ const syntheticTurnId = (0, shared_1.generateId)();
844
+ const execution = (0, shared_1.createRemoteExecutionContext)({
845
+ agentName: config.agentName,
846
+ sessionId,
847
+ requestedRunScope: "session",
848
+ turnId: syntheticTurnId,
849
+ turnIndex: turn.turnIndex,
850
+ });
851
+ const syntheticCompletedAt = new Date(baseCompletedAt - Math.max(0, turns.length - index - 1) * 1000).toISOString();
852
+ const syntheticStartedAt = new Date(new Date(syntheticCompletedAt).getTime() - 1000).toISOString();
853
+ const isLastTurn = index === turns.length - 1;
854
+ const responseUsage = isLastTurn ? usageToCostEvent("voice_chat", args.usage, config.modelId) : undefined;
855
+ const status = isLastTurn && args.responseStatus
856
+ ? (args.responseStatus === "cancelled" || args.responseStatus === "incomplete"
857
+ ? "interrupted"
858
+ : args.responseStatus === "failed"
859
+ ? "failed"
860
+ : "completed")
861
+ : turn.hasAssistantOutput
862
+ ? "completed"
863
+ : "interrupted";
864
+ const finishState = status === "completed" ? "completed" : "error";
865
+ accumulatedSyntheticMessages = [...accumulatedSyntheticMessages, ...turn.messages];
866
+ await ensureSessionBootstrap();
867
+ await sink.reportAgentRun(withLinkage((0, shared_1.buildRemoteRunPayload)({
868
+ execution,
869
+ userId,
870
+ sessionId,
871
+ modelId: config.modelId,
872
+ status,
873
+ inputPreview: turn.inputPreview,
874
+ outputPreview: (_e = turn.outputPreview) !== null && _e !== void 0 ? _e : undefined,
875
+ triggerType: "voice",
876
+ agentType: "voice",
877
+ startedAt: syntheticStartedAt,
878
+ completedAt: syntheticCompletedAt,
879
+ durationMs: 1000,
880
+ inputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.inputTokens,
881
+ outputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.outputTokens,
882
+ cachedInputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.cachedInputTokens,
883
+ costCentsOverride: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.costCentsOverride,
884
+ costEventType: "voice_chat",
885
+ messages: accumulatedSyntheticMessages,
886
+ incrementSessionMessageCount: false,
887
+ skipConversationLog: true,
888
+ skipPipelines: true,
889
+ metadata: Object.assign(Object.assign({}, sessionMetadata), { responseId: isLastTurn ? (_f = args.responseId) !== null && _f !== void 0 ? _f : null : null, turnIndex: turn.turnIndex, responseStatus: status, recovery: "conversation_fallback", recoveryReason: (_g = args.reason) !== null && _g !== void 0 ? _g : null }),
890
+ })));
891
+ hasReportedRuns = true;
892
+ const traceStartPayload = (0, shared_1.buildRemoteTraceStartPayload)({
893
+ execution,
894
+ userId,
895
+ sessionId,
896
+ requestPreview: turn.inputPreview,
897
+ modelId: config.modelId,
898
+ metadata: {
899
+ source: "voice",
900
+ callId: config.callId,
901
+ synthetic: true,
902
+ recovery: "conversation_fallback",
903
+ },
904
+ startedAt: syntheticStartedAt,
905
+ });
906
+ await sink.reportTraceEvents(withLinkage(Object.assign(Object.assign({}, traceStartPayload), { start: Object.assign(Object.assign(Object.assign({}, traceStartPayload.start), (linkage.sessionDbId ? { sessionDbId: linkage.sessionDbId } : {})), (linkage.agentRunDbId ? { agentRunDbId: linkage.agentRunDbId } : {})), events: turn.toolEvents, finish: (0, shared_1.buildRemoteTraceFinishPayload)({
907
+ execution,
908
+ state: finishState,
909
+ responsePreview: (_h = turn.outputPreview) !== null && _h !== void 0 ? _h : undefined,
910
+ durationMs: 1000,
911
+ usage: {
912
+ inputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.inputTokens,
913
+ outputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.outputTokens,
914
+ cachedInputTokens: responseUsage === null || responseUsage === void 0 ? void 0 : responseUsage.cachedInputTokens,
915
+ },
916
+ errorMessage: finishState === "error" ? status : undefined,
917
+ }).finish })));
918
+ }
919
+ return true;
920
+ };
921
+ return {
922
+ handleSessionEvent(event) {
923
+ void enqueueReporterTask(async () => {
924
+ latestMessages = event.state.messages;
925
+ logVoiceTracing("session state updated", {
926
+ sessionId,
927
+ messageCount: latestMessages.length,
928
+ toolInvocationCount: event.state.toolInvocations.length,
929
+ connectionStatus: event.state.connectionStatus,
930
+ agentState: event.state.agentState,
931
+ speechState: event.state.speechState,
932
+ transcriptionStatus: event.state.transcriptionStatus,
933
+ hasActiveTurn: !!activeTurn,
934
+ });
935
+ await syncTurnsFromState();
936
+ });
937
+ },
938
+ async handleTelemetry(event) {
939
+ return enqueueReporterTask(async () => {
940
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t;
941
+ logVoiceTracing("telemetry received", {
942
+ sessionId,
943
+ type: event.type,
944
+ responseId: "responseId" in event ? (_a = event.responseId) !== null && _a !== void 0 ? _a : null : null,
945
+ itemId: "itemId" in event ? (_b = event.itemId) !== null && _b !== void 0 ? _b : null : null,
946
+ toolCallId: "toolCallId" in event ? (_c = event.toolCallId) !== null && _c !== void 0 ? _c : null : null,
947
+ hasActiveTurn: !!activeTurn,
948
+ });
949
+ if (event.type === "voice.user.transcribed") {
950
+ if (event.itemId && seenUserMessageIds.has(event.itemId)) {
951
+ logVoiceTracing("duplicate user turn ignored", {
952
+ sessionId,
953
+ itemId: event.itemId,
954
+ });
955
+ return;
956
+ }
957
+ await syncTurnsFromState(event.usage);
958
+ if (!activeTurn) {
959
+ await startTurn(event.transcript, event.usage, event.itemId);
960
+ }
961
+ const sessionEvent = payloadForSessionEvent(event);
962
+ if (sessionEvent)
963
+ pendingSessionEvents.push(enrichSessionEvent(sessionEvent));
964
+ return;
965
+ }
966
+ await syncTurnsFromState();
967
+ const sessionEvent = payloadForSessionEvent(event);
968
+ if (sessionEvent)
969
+ pendingSessionEvents.push(enrichSessionEvent(sessionEvent));
970
+ if (event.type === "voice.response.created" && activeTurn) {
971
+ activeTurn.responseId = event.responseId;
972
+ logVoiceTracing("response linked to turn", {
973
+ sessionId,
974
+ traceId: activeTurn.execution.traceId,
975
+ runId: activeTurn.execution.runId,
976
+ responseId: event.responseId,
977
+ });
978
+ return;
979
+ }
980
+ if (event.type === "voice.response.output.completed" && activeTurn) {
981
+ const completionKey = buildAssistantCompletionKey(event);
982
+ if (seenAssistantCompletionKeys.has(completionKey)) {
983
+ logVoiceTracing("duplicate assistant completion ignored", {
984
+ sessionId,
985
+ responseId: (_d = event.responseId) !== null && _d !== void 0 ? _d : null,
986
+ itemId: (_e = event.itemId) !== null && _e !== void 0 ? _e : null,
987
+ });
988
+ return;
989
+ }
990
+ seenAssistantCompletionKeys.add(completionKey);
991
+ activeTurn.responseId = (_f = event.responseId) !== null && _f !== void 0 ? _f : activeTurn.responseId;
992
+ activeTurn.outputPreview = event.outputText
993
+ ? preview(event.outputText, 240)
994
+ : (_g = findLatestMessageText(toPersistedConversationMessages(latestMessages), "assistant")) !== null && _g !== void 0 ? _g : activeTurn.outputPreview;
995
+ activeTurn.completedAt = event.at;
996
+ activeTurn.completionStatus = (_h = event.status) !== null && _h !== void 0 ? _h : "completed";
997
+ logVoiceTracing("assistant output completed", {
998
+ sessionId,
999
+ traceId: activeTurn.execution.traceId,
1000
+ runId: activeTurn.execution.runId,
1001
+ responseId: (_j = activeTurn.responseId) !== null && _j !== void 0 ? _j : null,
1002
+ itemId: (_k = event.itemId) !== null && _k !== void 0 ? _k : null,
1003
+ outputPreview: (_l = activeTurn.outputPreview) !== null && _l !== void 0 ? _l : null,
1004
+ });
1005
+ await ensureSessionBootstrap();
1006
+ await sink.reportTraceEvents(withLinkage({
1007
+ traceId: activeTurn.execution.traceId,
1008
+ events: [{
1009
+ eventType: "assistant.completed",
1010
+ status: "completed",
1011
+ payload: {
1012
+ preview: (_m = activeTurn.outputPreview) !== null && _m !== void 0 ? _m : null,
1013
+ responseId: (_o = activeTurn.responseId) !== null && _o !== void 0 ? _o : null,
1014
+ itemId: (_p = event.itemId) !== null && _p !== void 0 ? _p : null,
1015
+ },
1016
+ }],
1017
+ }));
1018
+ return;
1019
+ }
1020
+ if (event.type === "voice.tool.started" && activeTurn) {
1021
+ activeTurn.pendingToolCallIds.add(event.toolCallId);
1022
+ logVoiceTracing("tool started", {
1023
+ sessionId,
1024
+ traceId: activeTurn.execution.traceId,
1025
+ toolCallId: event.toolCallId,
1026
+ toolName: event.toolName,
1027
+ pendingCount: activeTurn.pendingToolCallIds.size,
1028
+ });
1029
+ await ensureSessionBootstrap();
1030
+ await sink.reportTraceEvents(withLinkage({
1031
+ traceId: activeTurn.execution.traceId,
1032
+ events: [{
1033
+ eventType: "tool.started",
1034
+ spanKey: event.toolCallId,
1035
+ status: "active",
1036
+ payload: {
1037
+ toolCallId: event.toolCallId,
1038
+ toolName: event.toolName,
1039
+ inputPreview: JSON.stringify((_q = event.input) !== null && _q !== void 0 ? _q : {}).slice(0, 220),
1040
+ },
1041
+ }],
1042
+ }));
1043
+ return;
1044
+ }
1045
+ if (event.type === "voice.tool.completed" && activeTurn) {
1046
+ activeTurn.pendingToolCallIds.delete(event.toolCallId);
1047
+ logVoiceTracing("tool completed", {
1048
+ sessionId,
1049
+ traceId: activeTurn.execution.traceId,
1050
+ toolCallId: event.toolCallId,
1051
+ toolName: event.toolName,
1052
+ pendingCount: activeTurn.pendingToolCallIds.size,
1053
+ hasCompletedAt: !!activeTurn.completedAt,
1054
+ });
1055
+ await ensureSessionBootstrap();
1056
+ await sink.reportTraceEvents(withLinkage({
1057
+ traceId: activeTurn.execution.traceId,
1058
+ events: [{
1059
+ eventType: "tool.completed",
1060
+ spanKey: event.toolCallId,
1061
+ status: "completed",
1062
+ payload: {
1063
+ toolCallId: event.toolCallId,
1064
+ toolName: event.toolName,
1065
+ outputPreview: JSON.stringify((_r = event.output) !== null && _r !== void 0 ? _r : {}).slice(0, 220),
1066
+ },
1067
+ }],
1068
+ }));
1069
+ // All tool calls done and response.done already fired — finalize the turn now.
1070
+ // This handles the case where there is no second response.done (single-response turns).
1071
+ if (activeTurn.pendingToolCallIds.size === 0 && activeTurn.completedAt) {
1072
+ logVoiceTracing("finalizing after last tool completed", {
1073
+ sessionId,
1074
+ traceId: activeTurn.execution.traceId,
1075
+ runId: activeTurn.execution.runId,
1076
+ });
1077
+ await finalizeActiveTurn({
1078
+ completedAt: activeTurn.completedAt,
1079
+ responseStatus: activeTurn.completionStatus,
1080
+ });
1081
+ }
1082
+ return;
1083
+ }
1084
+ if (event.type === "voice.tool.failed" && activeTurn) {
1085
+ activeTurn.pendingToolCallIds.delete(event.toolCallId);
1086
+ await ensureSessionBootstrap();
1087
+ await sink.reportTraceEvents(withLinkage({
1088
+ traceId: activeTurn.execution.traceId,
1089
+ events: [{
1090
+ eventType: "tool.failed",
1091
+ spanKey: event.toolCallId,
1092
+ status: "error",
1093
+ payload: {
1094
+ toolCallId: event.toolCallId,
1095
+ toolName: event.toolName,
1096
+ errorMessage: event.error,
1097
+ },
1098
+ }],
1099
+ }));
1100
+ if (activeTurn.pendingToolCallIds.size === 0 && activeTurn.completedAt) {
1101
+ await finalizeActiveTurn({
1102
+ completedAt: activeTurn.completedAt,
1103
+ responseStatus: activeTurn.completionStatus,
1104
+ });
1105
+ }
1106
+ return;
1107
+ }
1108
+ if ((event.type === "voice.assistant.started" || event.type === "voice.assistant.stopped") && activeTurn) {
1109
+ await ensureSessionBootstrap();
1110
+ await sink.reportTraceEvents(withLinkage({
1111
+ traceId: activeTurn.execution.traceId,
1112
+ events: [{
1113
+ eventType: event.type,
1114
+ status: "completed",
1115
+ payload: {
1116
+ responseId: event.responseId,
1117
+ },
1118
+ }],
1119
+ }));
1120
+ return;
1121
+ }
1122
+ if (event.type === "voice.interrupted" && activeTurn) {
1123
+ await ensureSessionBootstrap();
1124
+ await sink.reportTraceEvents(withLinkage({
1125
+ traceId: activeTurn.execution.traceId,
1126
+ events: [{
1127
+ eventType: "trace.interrupted",
1128
+ status: "cancelled",
1129
+ payload: {
1130
+ reason: event.reason,
1131
+ responseId: event.responseId,
1132
+ },
1133
+ }],
1134
+ }));
1135
+ return;
1136
+ }
1137
+ if (event.type === "voice.response.done" && activeTurn) {
1138
+ activeTurn.responseId = event.responseId;
1139
+ activeTurn.outputPreview = event.outputText
1140
+ ? preview(event.outputText, 240)
1141
+ : activeTurn.outputPreview;
1142
+ activeTurn.completedAt = event.at;
1143
+ activeTurn.completionStatus = event.status === "cancelled" || event.status === "incomplete"
1144
+ ? "interrupted"
1145
+ : event.status === "failed"
1146
+ ? "failed"
1147
+ : "completed";
1148
+ // The browser-session SDK fires agent_tool_start AFTER response.done, so
1149
+ // pendingToolCallIds may not yet be populated via voice.tool.started.
1150
+ // Pre-populate from rawEvent output items so we can defer finalization.
1151
+ // Use call_id first (matches voice.tool.completed.toolCallId), fall back to id.
1152
+ const rawResponse = typeof ((_s = event.rawEvent) === null || _s === void 0 ? void 0 : _s.response) === "object" && event.rawEvent.response !== null
1153
+ ? event.rawEvent.response
1154
+ : null;
1155
+ const rawOutputs = Array.isArray(rawResponse === null || rawResponse === void 0 ? void 0 : rawResponse.output) ? rawResponse.output : [];
1156
+ for (const output of rawOutputs) {
1157
+ if (typeof output === "object" && output !== null) {
1158
+ const item = output;
1159
+ if (item.type === "function_call") {
1160
+ if (typeof item.call_id === "string")
1161
+ activeTurn.pendingToolCallIds.add(item.call_id);
1162
+ else if (typeof item.id === "string")
1163
+ activeTurn.pendingToolCallIds.add(item.id);
1164
+ }
1165
+ }
1166
+ }
1167
+ logVoiceTracing("response done rawEvent check", {
1168
+ sessionId,
1169
+ traceId: activeTurn.execution.traceId,
1170
+ hasRawResponse: !!rawResponse,
1171
+ rawOutputCount: rawOutputs.length,
1172
+ functionCallCount: rawOutputs.filter((o) => typeof o === "object" && o !== null && o.type === "function_call").length,
1173
+ pendingCount: activeTurn.pendingToolCallIds.size,
1174
+ });
1175
+ if (activeTurn.pendingToolCallIds.size > 0) {
1176
+ // Tool calls detected — keep activeTurn alive so voice.tool.started and
1177
+ // voice.tool.completed can be properly traced against this turn.
1178
+ logVoiceTracing("response done deferred — tool calls in output", {
1179
+ sessionId,
1180
+ traceId: activeTurn.execution.traceId,
1181
+ runId: activeTurn.execution.runId,
1182
+ status: event.status,
1183
+ pendingCount: activeTurn.pendingToolCallIds.size,
1184
+ });
1185
+ return;
1186
+ }
1187
+ logVoiceTracing("response completed", {
1188
+ sessionId,
1189
+ traceId: activeTurn.execution.traceId,
1190
+ runId: activeTurn.execution.runId,
1191
+ status: event.status,
1192
+ outputPreview: (_t = activeTurn.outputPreview) !== null && _t !== void 0 ? _t : null,
1193
+ });
1194
+ await finalizeActiveTurn({
1195
+ completedAt: event.at,
1196
+ responseStatus: activeTurn.completionStatus,
1197
+ usage: event.usage,
1198
+ });
1199
+ return;
1200
+ }
1201
+ if (event.type === "voice.response.done" && !activeTurn) {
1202
+ logVoiceTracing("response completed without active turn", {
1203
+ sessionId,
1204
+ responseId: event.responseId,
1205
+ status: event.status,
1206
+ latestMessageCount: latestMessages.length,
1207
+ });
1208
+ await emitSyntheticTurnsFromConversation({
1209
+ reason: "response.done_without_active_turn",
1210
+ responseAt: event.at,
1211
+ responseId: event.responseId,
1212
+ responseStatus: event.status,
1213
+ usage: event.usage,
1214
+ });
1215
+ }
1216
+ });
1217
+ },
1218
+ flushCallEnd(reason) {
1219
+ void enqueueReporterTask(async () => {
1220
+ var _a;
1221
+ logVoiceTracing("flush call end", {
1222
+ sessionId,
1223
+ reason: reason !== null && reason !== void 0 ? reason : null,
1224
+ hasActiveTurn: !!activeTurn,
1225
+ });
1226
+ pendingSessionEvents.push({
1227
+ eventType: "voice.call.ended",
1228
+ payload: { callId: config.callId, reason },
1229
+ });
1230
+ if (activeTurn) {
1231
+ await finalizeActiveTurn({
1232
+ completedAt: (_a = activeTurn.completedAt) !== null && _a !== void 0 ? _a : new Date().toISOString(),
1233
+ responseStatus: activeTurn.completionStatus,
1234
+ forceInterrupt: !activeTurn.completedAt && !activeTurn.outputPreview,
1235
+ reason: reason !== null && reason !== void 0 ? reason : "disconnect",
1236
+ });
1237
+ await reportConnectionRunCompletion(reason);
1238
+ return;
1239
+ }
1240
+ await emitSyntheticTurnsFromConversation({
1241
+ reason: reason !== null && reason !== void 0 ? reason : "disconnect",
1242
+ });
1243
+ await flushConversationLog();
1244
+ await reportConnectionRunCompletion(reason);
1245
+ });
1246
+ },
1247
+ };
1248
+ }
1249
+ exports.createVoiceTelemetryReporter = createVoiceTracingReporter;