@livekit/agents 0.7.3 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio.cjs +1 -1
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.js +1 -1
- package/dist/audio.js.map +1 -1
- package/dist/constants.cjs +38 -0
- package/dist/constants.cjs.map +1 -0
- package/dist/constants.d.ts +5 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +11 -0
- package/dist/constants.js.map +1 -0
- package/dist/ipc/inference_proc_lazy_main.cjs +14 -27
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +14 -5
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +23 -10
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +23 -10
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +4 -5
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +4 -5
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/multimodal/multimodal_agent.cjs +26 -9
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +30 -9
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_playout.cjs +1 -1
- package/dist/pipeline/agent_playout.cjs.map +1 -1
- package/dist/pipeline/agent_playout.d.ts.map +1 -1
- package/dist/pipeline/agent_playout.js +1 -1
- package/dist/pipeline/agent_playout.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +52 -36
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +56 -36
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/package.json +1 -1
- package/src/audio.ts +1 -1
- package/src/constants.ts +7 -0
- package/src/ipc/inference_proc_lazy_main.ts +21 -6
- package/src/ipc/job_proc_lazy_main.ts +27 -9
- package/src/ipc/supervised_proc.ts +5 -6
- package/src/multimodal/multimodal_agent.ts +32 -10
- package/src/pipeline/agent_playout.ts +1 -7
- package/src/pipeline/pipeline_agent.ts +64 -36
|
@@ -17,6 +17,11 @@ import {
|
|
|
17
17
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
18
18
|
import { randomUUID } from 'node:crypto';
|
|
19
19
|
import EventEmitter from 'node:events';
|
|
20
|
+
import {
|
|
21
|
+
ATTRIBUTE_TRANSCRIPTION_FINAL,
|
|
22
|
+
ATTRIBUTE_TRANSCRIPTION_TRACK_ID,
|
|
23
|
+
TOPIC_TRANSCRIPTION,
|
|
24
|
+
} from '../constants.js';
|
|
20
25
|
import type {
|
|
21
26
|
CallableFunctionResult,
|
|
22
27
|
FunctionCallInfo,
|
|
@@ -518,28 +523,21 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
518
523
|
this.emit(VPAEvent.USER_STOPPED_SPEAKING);
|
|
519
524
|
this.#deferredValidation.onHumanEndOfSpeech(event);
|
|
520
525
|
});
|
|
521
|
-
this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
|
|
526
|
+
this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, async (event) => {
|
|
522
527
|
if (!this.#transcriptionId) {
|
|
523
528
|
this.#transcriptionId = randomUUID();
|
|
524
529
|
}
|
|
525
530
|
this.#transcribedInterimText = event.alternatives![0].text;
|
|
526
531
|
|
|
527
|
-
this.#
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
final: true,
|
|
535
|
-
startTime: BigInt(0),
|
|
536
|
-
endTime: BigInt(0),
|
|
537
|
-
language: '',
|
|
538
|
-
},
|
|
539
|
-
],
|
|
540
|
-
});
|
|
532
|
+
await this.#publishTranscription(
|
|
533
|
+
this.#humanInput!.participant.identity,
|
|
534
|
+
this.#humanInput!.subscribedTrack!.sid!,
|
|
535
|
+
this.#transcribedInterimText,
|
|
536
|
+
false,
|
|
537
|
+
this.#transcriptionId,
|
|
538
|
+
);
|
|
541
539
|
});
|
|
542
|
-
this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
|
|
540
|
+
this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, async (event) => {
|
|
543
541
|
const newTranscript = event.alternatives![0].text;
|
|
544
542
|
if (!newTranscript) return;
|
|
545
543
|
|
|
@@ -550,20 +548,14 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
550
548
|
this.#lastFinalTranscriptTime = Date.now();
|
|
551
549
|
this.transcribedText += (this.transcribedText ? ' ' : '') + newTranscript;
|
|
552
550
|
|
|
553
|
-
this.#
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
startTime: BigInt(0),
|
|
562
|
-
endTime: BigInt(0),
|
|
563
|
-
language: '',
|
|
564
|
-
},
|
|
565
|
-
],
|
|
566
|
-
});
|
|
551
|
+
await this.#publishTranscription(
|
|
552
|
+
this.#humanInput!.participant.identity,
|
|
553
|
+
this.#humanInput!.subscribedTrack!.sid!,
|
|
554
|
+
this.transcribedText,
|
|
555
|
+
true,
|
|
556
|
+
this.#transcriptionId,
|
|
557
|
+
);
|
|
558
|
+
|
|
567
559
|
this.#transcriptionId = undefined;
|
|
568
560
|
|
|
569
561
|
if (
|
|
@@ -894,18 +886,54 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
894
886
|
handle.setDone();
|
|
895
887
|
}
|
|
896
888
|
|
|
889
|
+
async #publishTranscription(
|
|
890
|
+
participantIdentity: string,
|
|
891
|
+
trackSid: string,
|
|
892
|
+
text: string,
|
|
893
|
+
isFinal: boolean,
|
|
894
|
+
id: string,
|
|
895
|
+
) {
|
|
896
|
+
this.#room!.localParticipant!.publishTranscription({
|
|
897
|
+
participantIdentity: participantIdentity,
|
|
898
|
+
trackSid: trackSid,
|
|
899
|
+
segments: [
|
|
900
|
+
{
|
|
901
|
+
text: text,
|
|
902
|
+
final: isFinal,
|
|
903
|
+
id: id,
|
|
904
|
+
startTime: BigInt(0),
|
|
905
|
+
endTime: BigInt(0),
|
|
906
|
+
language: '',
|
|
907
|
+
},
|
|
908
|
+
],
|
|
909
|
+
});
|
|
910
|
+
const stream = await this.#room!.localParticipant!.streamText({
|
|
911
|
+
senderIdentity: participantIdentity,
|
|
912
|
+
topic: TOPIC_TRANSCRIPTION,
|
|
913
|
+
attributes: {
|
|
914
|
+
[ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,
|
|
915
|
+
[ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),
|
|
916
|
+
},
|
|
917
|
+
});
|
|
918
|
+
await stream.write(text);
|
|
919
|
+
await stream.close();
|
|
920
|
+
}
|
|
921
|
+
|
|
897
922
|
#synthesizeAgentSpeech(
|
|
898
923
|
speechId: string,
|
|
899
924
|
source: string | LLMStream | AsyncIterable<string>,
|
|
900
925
|
): SynthesisHandle {
|
|
901
926
|
const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);
|
|
902
|
-
|
|
927
|
+
// TODO: where possible we would want to use deltas instead of full text segments, esp for LLM streams over the streamText API
|
|
928
|
+
synchronizer.on('textUpdated', async (text) => {
|
|
903
929
|
this.#agentTranscribedText = text.text;
|
|
904
|
-
this.#
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
930
|
+
await this.#publishTranscription(
|
|
931
|
+
this.#room!.localParticipant!.identity!,
|
|
932
|
+
this.#agentPublication?.sid ?? '',
|
|
933
|
+
text.text,
|
|
934
|
+
text.final,
|
|
935
|
+
text.id,
|
|
936
|
+
);
|
|
909
937
|
});
|
|
910
938
|
|
|
911
939
|
if (!this.#agentOutput) {
|