@livekit/agents 1.0.22 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/api_protos.cjs +2 -2
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +16 -16
- package/dist/inference/api_protos.d.ts +16 -16
- package/dist/inference/api_protos.js +2 -2
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +35 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +13 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +52 -6
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +2 -0
- package/dist/job.d.ts +2 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +52 -6
- package/dist/job.js.map +1 -1
- package/dist/llm/llm.cjs +38 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -0
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +38 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +34 -10
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +7 -0
- package/dist/log.d.ts +7 -0
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +34 -11
- package/dist/log.js.map +1 -1
- package/dist/telemetry/index.cjs +23 -2
- package/dist/telemetry/index.cjs.map +1 -1
- package/dist/telemetry/index.d.cts +4 -1
- package/dist/telemetry/index.d.ts +4 -1
- package/dist/telemetry/index.d.ts.map +1 -1
- package/dist/telemetry/index.js +27 -2
- package/dist/telemetry/index.js.map +1 -1
- package/dist/telemetry/logging.cjs +65 -0
- package/dist/telemetry/logging.cjs.map +1 -0
- package/dist/telemetry/logging.d.cts +21 -0
- package/dist/telemetry/logging.d.ts +21 -0
- package/dist/telemetry/logging.d.ts.map +1 -0
- package/dist/telemetry/logging.js +40 -0
- package/dist/telemetry/logging.js.map +1 -0
- package/dist/telemetry/otel_http_exporter.cjs +144 -0
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
- package/dist/telemetry/otel_http_exporter.d.cts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
- package/dist/telemetry/otel_http_exporter.js +120 -0
- package/dist/telemetry/otel_http_exporter.js.map +1 -0
- package/dist/telemetry/pino_otel_transport.cjs +217 -0
- package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
- package/dist/telemetry/pino_otel_transport.d.cts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
- package/dist/telemetry/pino_otel_transport.js +189 -0
- package/dist/telemetry/pino_otel_transport.js.map +1 -0
- package/dist/telemetry/traces.cjs +225 -16
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.cts +17 -0
- package/dist/telemetry/traces.d.ts +17 -0
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +211 -14
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +62 -5
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +2 -0
- package/dist/tts/tts.d.ts +2 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +62 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +6 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +5 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +93 -7
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +3 -0
- package/dist/voice/agent_activity.d.ts +3 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +93 -7
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +122 -27
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +15 -0
- package/dist/voice/agent_session.d.ts +15 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +122 -27
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +69 -22
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +5 -0
- package/dist/voice/audio_recognition.d.ts +5 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +69 -22
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs +43 -3
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -3
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/report.cjs +3 -2
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +7 -1
- package/dist/voice/report.d.ts +7 -1
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +3 -2
- package/dist/voice/report.js.map +1 -1
- package/package.json +8 -2
- package/src/inference/api_protos.ts +2 -2
- package/src/ipc/job_proc_lazy_main.ts +12 -1
- package/src/job.ts +59 -10
- package/src/llm/llm.ts +48 -5
- package/src/log.ts +52 -15
- package/src/telemetry/index.ts +22 -4
- package/src/telemetry/logging.ts +55 -0
- package/src/telemetry/otel_http_exporter.ts +191 -0
- package/src/telemetry/pino_otel_transport.ts +265 -0
- package/src/telemetry/traces.ts +320 -20
- package/src/tts/tts.ts +71 -9
- package/src/utils.ts +5 -0
- package/src/voice/agent_activity.ts +140 -22
- package/src/voice/agent_session.ts +174 -34
- package/src/voice/audio_recognition.ts +85 -26
- package/src/voice/generation.ts +59 -7
- package/src/voice/report.ts +10 -4
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
+
import type { Context, Span } from '@opentelemetry/api';
|
|
5
6
|
import type { WritableStreamDefaultWriter } from 'node:stream/web';
|
|
6
7
|
import { ReadableStream } from 'node:stream/web';
|
|
7
8
|
import { type ChatContext } from '../llm/chat_context.js';
|
|
@@ -10,6 +11,7 @@ import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/de
|
|
|
10
11
|
import { IdentityTransform } from '../stream/identity_transform.js';
|
|
11
12
|
import { mergeReadableStreams } from '../stream/merge_readable_streams.js';
|
|
12
13
|
import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
|
|
14
|
+
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
13
15
|
import { Task, delay } from '../utils.js';
|
|
14
16
|
import { type VAD, type VADEvent, VADEventType } from '../vad.js';
|
|
15
17
|
import type { TurnDetectionMode } from './agent_session.js';
|
|
@@ -55,10 +57,9 @@ export interface AudioRecognitionOptions {
|
|
|
55
57
|
turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
|
|
56
58
|
minEndpointingDelay: number;
|
|
57
59
|
maxEndpointingDelay: number;
|
|
60
|
+
rootSpanContext?: Context;
|
|
58
61
|
}
|
|
59
62
|
|
|
60
|
-
// TODO(brian): PR3 - Add span: private _userTurnSpan?: Span, create lazily in _ensureUserTurnSpan() method (tracer.startSpan('user_turn') with participant attributes)
|
|
61
|
-
// TODO(brian): PR3 - Add span: 'eou_detection' span when running EOU detection (in runEOUDetection method)
|
|
62
63
|
export class AudioRecognition {
|
|
63
64
|
private hooks: RecognitionHooks;
|
|
64
65
|
private stt?: STTNode;
|
|
@@ -68,6 +69,7 @@ export class AudioRecognition {
|
|
|
68
69
|
private minEndpointingDelay: number;
|
|
69
70
|
private maxEndpointingDelay: number;
|
|
70
71
|
private lastLanguage?: string;
|
|
72
|
+
private rootSpanContext?: Context;
|
|
71
73
|
|
|
72
74
|
private deferredInputStream: DeferredReadableStream<AudioFrame>;
|
|
73
75
|
private logger = log();
|
|
@@ -82,6 +84,8 @@ export class AudioRecognition {
|
|
|
82
84
|
private speaking = false;
|
|
83
85
|
private sampleRate?: number;
|
|
84
86
|
|
|
87
|
+
private userTurnSpan?: Span;
|
|
88
|
+
|
|
85
89
|
private vadInputStream: ReadableStream<AudioFrame>;
|
|
86
90
|
private sttInputStream: ReadableStream<AudioFrame>;
|
|
87
91
|
private silenceAudioTransform = new IdentityTransform<AudioFrame>();
|
|
@@ -102,6 +106,7 @@ export class AudioRecognition {
|
|
|
102
106
|
this.minEndpointingDelay = opts.minEndpointingDelay;
|
|
103
107
|
this.maxEndpointingDelay = opts.maxEndpointingDelay;
|
|
104
108
|
this.lastLanguage = undefined;
|
|
109
|
+
this.rootSpanContext = opts.rootSpanContext;
|
|
105
110
|
|
|
106
111
|
this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
|
|
107
112
|
const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
|
|
@@ -357,31 +362,47 @@ export class AudioRecognition {
|
|
|
357
362
|
let endpointingDelay = this.minEndpointingDelay;
|
|
358
363
|
|
|
359
364
|
if (turnDetector) {
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
365
|
+
await tracer.startActiveSpan(
|
|
366
|
+
async (span) => {
|
|
367
|
+
this.logger.debug('Running turn detector model');
|
|
368
|
+
|
|
369
|
+
let endOfTurnProbability = 0.0;
|
|
370
|
+
let unlikelyThreshold: number | undefined;
|
|
371
|
+
|
|
372
|
+
if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {
|
|
373
|
+
this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);
|
|
374
|
+
} else {
|
|
375
|
+
try {
|
|
376
|
+
endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);
|
|
377
|
+
unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);
|
|
378
|
+
|
|
379
|
+
this.logger.debug(
|
|
380
|
+
{ endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },
|
|
381
|
+
'end of turn probability',
|
|
382
|
+
);
|
|
383
|
+
|
|
384
|
+
if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {
|
|
385
|
+
endpointingDelay = this.maxEndpointingDelay;
|
|
386
|
+
}
|
|
387
|
+
} catch (error) {
|
|
388
|
+
this.logger.error(error, 'Error predicting end of turn');
|
|
389
|
+
}
|
|
390
|
+
}
|
|
380
391
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
392
|
+
span.setAttribute(
|
|
393
|
+
traceTypes.ATTR_CHAT_CTX,
|
|
394
|
+
JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),
|
|
395
|
+
);
|
|
396
|
+
span.setAttribute(traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);
|
|
397
|
+
span.setAttribute(traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);
|
|
398
|
+
span.setAttribute(traceTypes.ATTR_EOU_DELAY, endpointingDelay);
|
|
399
|
+
span.setAttribute(traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? '');
|
|
400
|
+
},
|
|
401
|
+
{
|
|
402
|
+
name: 'eou_detection',
|
|
403
|
+
context: this.rootSpanContext,
|
|
404
|
+
},
|
|
405
|
+
);
|
|
385
406
|
}
|
|
386
407
|
|
|
387
408
|
let extraSleep = endpointingDelay;
|
|
@@ -430,6 +451,13 @@ export class AudioRecognition {
|
|
|
430
451
|
});
|
|
431
452
|
|
|
432
453
|
if (committed) {
|
|
454
|
+
this._endUserTurnSpan({
|
|
455
|
+
transcript: this.audioTranscript,
|
|
456
|
+
confidence: confidenceAvg,
|
|
457
|
+
transcriptionDelay: transcriptionDelay ?? 0,
|
|
458
|
+
endOfUtteranceDelay: endOfUtteranceDelay ?? 0,
|
|
459
|
+
});
|
|
460
|
+
|
|
433
461
|
// clear the transcript if the user turn was committed
|
|
434
462
|
this.audioTranscript = '';
|
|
435
463
|
this.finalTranscriptConfidence = [];
|
|
@@ -537,6 +565,13 @@ export class AudioRecognition {
|
|
|
537
565
|
this.hooks.onStartOfSpeech(ev);
|
|
538
566
|
this.speaking = true;
|
|
539
567
|
|
|
568
|
+
if (!this.userTurnSpan) {
|
|
569
|
+
this.userTurnSpan = tracer.startSpan({
|
|
570
|
+
name: 'user_turn',
|
|
571
|
+
context: this.rootSpanContext,
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
|
|
540
575
|
// Capture sample rate from the first VAD event if not already set
|
|
541
576
|
if (ev.frames.length > 0 && ev.frames[0]) {
|
|
542
577
|
this.sampleRate = ev.frames[0].sampleRate;
|
|
@@ -646,12 +681,36 @@ export class AudioRecognition {
|
|
|
646
681
|
|
|
647
682
|
async close() {
|
|
648
683
|
this.detachInputAudioStream();
|
|
684
|
+
this.silenceAudioWriter.releaseLock();
|
|
649
685
|
await this.commitUserTurnTask?.cancelAndWait();
|
|
650
686
|
await this.sttTask?.cancelAndWait();
|
|
651
687
|
await this.vadTask?.cancelAndWait();
|
|
652
688
|
await this.bounceEOUTask?.cancelAndWait();
|
|
653
689
|
}
|
|
654
690
|
|
|
691
|
+
private _endUserTurnSpan({
|
|
692
|
+
transcript,
|
|
693
|
+
confidence,
|
|
694
|
+
transcriptionDelay,
|
|
695
|
+
endOfUtteranceDelay,
|
|
696
|
+
}: {
|
|
697
|
+
transcript: string;
|
|
698
|
+
confidence: number;
|
|
699
|
+
transcriptionDelay: number;
|
|
700
|
+
endOfUtteranceDelay: number;
|
|
701
|
+
}): void {
|
|
702
|
+
if (this.userTurnSpan) {
|
|
703
|
+
this.userTurnSpan.setAttributes({
|
|
704
|
+
[traceTypes.ATTR_USER_TRANSCRIPT]: transcript,
|
|
705
|
+
[traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,
|
|
706
|
+
[traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,
|
|
707
|
+
[traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay,
|
|
708
|
+
});
|
|
709
|
+
this.userTurnSpan.end();
|
|
710
|
+
this.userTurnSpan = undefined;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
655
714
|
private get vadBaseTurnDetection() {
|
|
656
715
|
return ['vad', undefined].includes(this.turnDetectionMode);
|
|
657
716
|
}
|
package/src/voice/generation.ts
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
5
|
import { AudioResampler } from '@livekit/rtc-node';
|
|
6
|
+
import type { Span } from '@opentelemetry/api';
|
|
7
|
+
import { context as otelContext } from '@opentelemetry/api';
|
|
6
8
|
import type { ReadableStream, ReadableStreamDefaultReader } from 'stream/web';
|
|
7
9
|
import {
|
|
8
10
|
type ChatContext,
|
|
@@ -21,6 +23,7 @@ import {
|
|
|
21
23
|
import { isZodSchema, parseZodSchema } from '../llm/zod-utils.js';
|
|
22
24
|
import { log } from '../log.js';
|
|
23
25
|
import { IdentityTransform } from '../stream/identity_transform.js';
|
|
26
|
+
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
24
27
|
import { Future, Task, shortuuid, toError } from '../utils.js';
|
|
25
28
|
import { type Agent, type ModelSettings, asyncLocalStorage, isStopResponse } from './agent.js';
|
|
26
29
|
import type { AgentSession } from './agent_session.js';
|
|
@@ -377,7 +380,6 @@ export function updateInstructions(options: {
|
|
|
377
380
|
}
|
|
378
381
|
}
|
|
379
382
|
|
|
380
|
-
// TODO(brian): PR3 - Add @tracer.startActiveSpan('llm_node') decorator/wrapper
|
|
381
383
|
export function performLLMInference(
|
|
382
384
|
node: LLMNode,
|
|
383
385
|
chatCtx: ChatContext,
|
|
@@ -392,7 +394,13 @@ export function performLLMInference(
|
|
|
392
394
|
const toolCallWriter = toolCallStream.writable.getWriter();
|
|
393
395
|
const data = new _LLMGenerationData(textStream.readable, toolCallStream.readable);
|
|
394
396
|
|
|
395
|
-
const
|
|
397
|
+
const _performLLMInferenceImpl = async (signal: AbortSignal, span: Span) => {
|
|
398
|
+
span.setAttribute(
|
|
399
|
+
traceTypes.ATTR_CHAT_CTX,
|
|
400
|
+
JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),
|
|
401
|
+
);
|
|
402
|
+
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOLS, JSON.stringify(Object.keys(toolCtx)));
|
|
403
|
+
|
|
396
404
|
let llmStreamReader: ReadableStreamDefaultReader<string | ChatChunk> | null = null;
|
|
397
405
|
let llmStream: ReadableStream<string | ChatChunk> | null = null;
|
|
398
406
|
|
|
@@ -448,6 +456,8 @@ export function performLLMInference(
|
|
|
448
456
|
// No need to check if chunk is of type other than ChatChunk or string like in
|
|
449
457
|
// Python since chunk is defined in the type ChatChunk | string in TypeScript
|
|
450
458
|
}
|
|
459
|
+
|
|
460
|
+
span.setAttribute(traceTypes.ATTR_RESPONSE_TEXT, data.generatedText);
|
|
451
461
|
} catch (error) {
|
|
452
462
|
if (error instanceof DOMException && error.name === 'AbortError') {
|
|
453
463
|
// Abort signal was triggered, handle gracefully
|
|
@@ -462,13 +472,21 @@ export function performLLMInference(
|
|
|
462
472
|
}
|
|
463
473
|
};
|
|
464
474
|
|
|
475
|
+
// Capture the current context (agent_turn) to ensure llm_node is properly parented
|
|
476
|
+
const currentContext = otelContext.active();
|
|
477
|
+
|
|
478
|
+
const inferenceTask = async (signal: AbortSignal) =>
|
|
479
|
+
tracer.startActiveSpan(async (span) => _performLLMInferenceImpl(signal, span), {
|
|
480
|
+
name: 'llm_node',
|
|
481
|
+
context: currentContext,
|
|
482
|
+
});
|
|
483
|
+
|
|
465
484
|
return [
|
|
466
485
|
Task.from((controller) => inferenceTask(controller.signal), controller, 'performLLMInference'),
|
|
467
486
|
data,
|
|
468
487
|
];
|
|
469
488
|
}
|
|
470
489
|
|
|
471
|
-
// TODO(brian): PR3 - Add @tracer.startActiveSpan('tts_node') decorator/wrapper
|
|
472
490
|
export function performTTSInference(
|
|
473
491
|
node: TTSNode,
|
|
474
492
|
text: ReadableStream<string>,
|
|
@@ -479,7 +497,7 @@ export function performTTSInference(
|
|
|
479
497
|
const outputWriter = audioStream.writable.getWriter();
|
|
480
498
|
const audioOutputStream = audioStream.readable;
|
|
481
499
|
|
|
482
|
-
const
|
|
500
|
+
const _performTTSInferenceImpl = async (signal: AbortSignal) => {
|
|
483
501
|
let ttsStreamReader: ReadableStreamDefaultReader<AudioFrame> | null = null;
|
|
484
502
|
let ttsStream: ReadableStream<AudioFrame> | null = null;
|
|
485
503
|
|
|
@@ -514,6 +532,15 @@ export function performTTSInference(
|
|
|
514
532
|
}
|
|
515
533
|
};
|
|
516
534
|
|
|
535
|
+
// Capture the current context (agent_turn) to ensure tts_node is properly parented
|
|
536
|
+
const currentContext = otelContext.active();
|
|
537
|
+
|
|
538
|
+
const inferenceTask = async (signal: AbortSignal) =>
|
|
539
|
+
tracer.startActiveSpan(async () => _performTTSInferenceImpl(signal), {
|
|
540
|
+
name: 'tts_node',
|
|
541
|
+
context: currentContext,
|
|
542
|
+
});
|
|
543
|
+
|
|
517
544
|
return [
|
|
518
545
|
Task.from((controller) => inferenceTask(controller.signal), controller, 'performTTSInference'),
|
|
519
546
|
audioOutputStream,
|
|
@@ -652,7 +679,7 @@ export function performAudioForwarding(
|
|
|
652
679
|
];
|
|
653
680
|
}
|
|
654
681
|
|
|
655
|
-
//
|
|
682
|
+
// function_tool span is already implemented in tracableToolExecution below (line ~796)
|
|
656
683
|
export function performToolExecutions({
|
|
657
684
|
session,
|
|
658
685
|
speechHandle,
|
|
@@ -788,8 +815,9 @@ export function performToolExecutions({
|
|
|
788
815
|
});
|
|
789
816
|
});
|
|
790
817
|
|
|
791
|
-
const
|
|
792
|
-
|
|
818
|
+
const _tracableToolExecutionImpl = async (toolExecTask: Promise<unknown>, span: Span) => {
|
|
819
|
+
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_NAME, toolCall.name);
|
|
820
|
+
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_ARGS, toolCall.args);
|
|
793
821
|
|
|
794
822
|
// await for task to complete, if task is aborted, set exception
|
|
795
823
|
let toolOutput: ToolExecutionOutput | undefined;
|
|
@@ -800,6 +828,17 @@ export function performToolExecutions({
|
|
|
800
828
|
exception: isAborted ? new Error('tool call was aborted') : undefined,
|
|
801
829
|
output: isAborted ? undefined : result,
|
|
802
830
|
});
|
|
831
|
+
|
|
832
|
+
if (toolOutput.toolCallOutput) {
|
|
833
|
+
span.setAttribute(
|
|
834
|
+
traceTypes.ATTR_FUNCTION_TOOL_OUTPUT,
|
|
835
|
+
toolOutput.toolCallOutput.output,
|
|
836
|
+
);
|
|
837
|
+
span.setAttribute(
|
|
838
|
+
traceTypes.ATTR_FUNCTION_TOOL_IS_ERROR,
|
|
839
|
+
toolOutput.toolCallOutput.isError,
|
|
840
|
+
);
|
|
841
|
+
}
|
|
803
842
|
} catch (rawError) {
|
|
804
843
|
logger.error(
|
|
805
844
|
{
|
|
@@ -813,12 +852,25 @@ export function performToolExecutions({
|
|
|
813
852
|
toolCall,
|
|
814
853
|
exception: toError(rawError),
|
|
815
854
|
});
|
|
855
|
+
|
|
856
|
+
if (toolOutput.toolCallOutput) {
|
|
857
|
+
span.setAttribute(
|
|
858
|
+
traceTypes.ATTR_FUNCTION_TOOL_OUTPUT,
|
|
859
|
+
toolOutput.toolCallOutput.output,
|
|
860
|
+
);
|
|
861
|
+
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_IS_ERROR, true);
|
|
862
|
+
}
|
|
816
863
|
} finally {
|
|
817
864
|
if (!toolOutput) throw new Error('toolOutput is undefined');
|
|
818
865
|
toolCompleted(toolOutput);
|
|
819
866
|
}
|
|
820
867
|
};
|
|
821
868
|
|
|
869
|
+
const tracableToolExecution = (toolExecTask: Promise<unknown>) =>
|
|
870
|
+
tracer.startActiveSpan(async (span) => _tracableToolExecutionImpl(toolExecTask, span), {
|
|
871
|
+
name: 'function_tool',
|
|
872
|
+
});
|
|
873
|
+
|
|
822
874
|
// wait, not cancelling all tool calling tasks
|
|
823
875
|
tasks.push(tracableToolExecution(toolExecution));
|
|
824
876
|
}
|
package/src/voice/report.ts
CHANGED
|
@@ -12,7 +12,10 @@ export interface SessionReport {
|
|
|
12
12
|
options: VoiceOptions;
|
|
13
13
|
events: AgentEvent[];
|
|
14
14
|
chatHistory: ChatContext;
|
|
15
|
-
|
|
15
|
+
enableRecording: boolean;
|
|
16
|
+
/** Timestamp when the session started (milliseconds) */
|
|
17
|
+
startedAt: number;
|
|
18
|
+
/** Timestamp when the session report was created (milliseconds), typically at the end of the session */
|
|
16
19
|
timestamp: number;
|
|
17
20
|
}
|
|
18
21
|
|
|
@@ -24,6 +27,9 @@ export interface SessionReportOptions {
|
|
|
24
27
|
events: AgentEvent[];
|
|
25
28
|
chatHistory: ChatContext;
|
|
26
29
|
enableUserDataTraining?: boolean;
|
|
30
|
+
/** Timestamp when the session started (milliseconds) */
|
|
31
|
+
startedAt?: number;
|
|
32
|
+
/** Timestamp when the session report was created (milliseconds) */
|
|
27
33
|
timestamp?: number;
|
|
28
34
|
}
|
|
29
35
|
|
|
@@ -35,12 +41,12 @@ export function createSessionReport(opts: SessionReportOptions): SessionReport {
|
|
|
35
41
|
options: opts.options,
|
|
36
42
|
events: opts.events,
|
|
37
43
|
chatHistory: opts.chatHistory,
|
|
38
|
-
|
|
44
|
+
enableRecording: opts.enableUserDataTraining ?? false,
|
|
45
|
+
startedAt: opts.startedAt ?? Date.now(),
|
|
39
46
|
timestamp: opts.timestamp ?? Date.now(),
|
|
40
47
|
};
|
|
41
48
|
}
|
|
42
49
|
|
|
43
|
-
// TODO(brian): PR5 - Add uploadSessionReport() function that creates multipart form with:
|
|
44
50
|
// - header: protobuf MetricsRecordingHeader (room_id, duration, start_time)
|
|
45
51
|
// - chat_history: JSON serialized chat history (use sessionReportToJSON)
|
|
46
52
|
// - audio: audio recording file if available (ogg format)
|
|
@@ -71,7 +77,7 @@ export function sessionReportToJSON(report: SessionReport): Record<string, unkno
|
|
|
71
77
|
max_tool_steps: report.options.maxToolSteps,
|
|
72
78
|
},
|
|
73
79
|
chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
|
|
74
|
-
enable_user_data_training: report.
|
|
80
|
+
enable_user_data_training: report.enableRecording,
|
|
75
81
|
timestamp: report.timestamp,
|
|
76
82
|
};
|
|
77
83
|
}
|