@livekit/agents 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio.cjs +1 -1
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.js +1 -1
- package/dist/audio.js.map +1 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/constants.cjs +38 -0
- package/dist/constants.cjs.map +1 -0
- package/dist/constants.d.ts +5 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +11 -0
- package/dist/constants.js.map +1 -0
- package/dist/index.cjs +14 -14
- package/dist/index.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.cjs +14 -27
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +14 -5
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +23 -10
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +23 -10
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +4 -5
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +4 -5
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/multimodal/agent_playout.cjs +1 -0
- package/dist/multimodal/agent_playout.cjs.map +1 -1
- package/dist/multimodal/agent_playout.js +1 -0
- package/dist/multimodal/agent_playout.js.map +1 -1
- package/dist/multimodal/multimodal_agent.cjs +36 -11
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts +3 -2
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +40 -11
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_playout.cjs +1 -1
- package/dist/pipeline/agent_playout.cjs.map +1 -1
- package/dist/pipeline/agent_playout.d.ts.map +1 -1
- package/dist/pipeline/agent_playout.js +1 -1
- package/dist/pipeline/agent_playout.js.map +1 -1
- package/dist/pipeline/human_input.cjs +9 -2
- package/dist/pipeline/human_input.cjs.map +1 -1
- package/dist/pipeline/human_input.d.ts +2 -2
- package/dist/pipeline/human_input.d.ts.map +1 -1
- package/dist/pipeline/human_input.js +9 -2
- package/dist/pipeline/human_input.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +59 -37
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts +3 -1
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +63 -37
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/dist/worker.cjs +1 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +1 -1
- package/dist/worker.js.map +1 -1
- package/package.json +4 -4
- package/src/audio.ts +1 -1
- package/src/constants.ts +7 -0
- package/src/ipc/inference_proc_lazy_main.ts +21 -6
- package/src/ipc/job_proc_lazy_main.ts +27 -9
- package/src/ipc/supervised_proc.ts +5 -6
- package/src/multimodal/multimodal_agent.ts +43 -13
- package/src/pipeline/agent_playout.ts +1 -7
- package/src/pipeline/human_input.ts +17 -3
- package/src/pipeline/pipeline_agent.ts +79 -38
- package/src/worker.ts +1 -1
- package/dist/llm/function_context.test.d.ts +0 -2
- package/dist/llm/function_context.test.d.ts.map +0 -1
- package/dist/tokenize/tokenizer.test.d.ts +0 -2
- package/dist/tokenize/tokenizer.test.d.ts.map +0 -1
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import type {
|
|
4
|
+
import type {
|
|
5
|
+
LocalTrackPublication,
|
|
6
|
+
NoiseCancellationOptions,
|
|
7
|
+
RemoteParticipant,
|
|
8
|
+
Room,
|
|
9
|
+
} from '@livekit/rtc-node';
|
|
5
10
|
import {
|
|
6
11
|
AudioSource,
|
|
7
12
|
LocalAudioTrack,
|
|
@@ -12,6 +17,11 @@ import {
|
|
|
12
17
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
13
18
|
import { randomUUID } from 'node:crypto';
|
|
14
19
|
import EventEmitter from 'node:events';
|
|
20
|
+
import {
|
|
21
|
+
ATTRIBUTE_TRANSCRIPTION_FINAL,
|
|
22
|
+
ATTRIBUTE_TRANSCRIPTION_TRACK_ID,
|
|
23
|
+
TOPIC_TRANSCRIPTION,
|
|
24
|
+
} from '../constants.js';
|
|
15
25
|
import type {
|
|
16
26
|
CallableFunctionResult,
|
|
17
27
|
FunctionCallInfo,
|
|
@@ -216,6 +226,8 @@ export interface VPAOptions {
|
|
|
216
226
|
transcription: AgentTranscriptionOptions;
|
|
217
227
|
/** Turn detection model to use. */
|
|
218
228
|
turnDetector?: TurnDetector;
|
|
229
|
+
/** Noise cancellation options. */
|
|
230
|
+
noiseCancellation?: NoiseCancellationOptions;
|
|
219
231
|
}
|
|
220
232
|
|
|
221
233
|
const defaultVPAOptions: VPAOptions = {
|
|
@@ -474,7 +486,13 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
474
486
|
return;
|
|
475
487
|
}
|
|
476
488
|
|
|
477
|
-
this.#humanInput = new HumanInput(
|
|
489
|
+
this.#humanInput = new HumanInput(
|
|
490
|
+
this.#room,
|
|
491
|
+
this.#vad,
|
|
492
|
+
this.#stt,
|
|
493
|
+
this.#participant,
|
|
494
|
+
this.#opts.noiseCancellation,
|
|
495
|
+
);
|
|
478
496
|
this.#humanInput.on(HumanInputEvent.START_OF_SPEECH, (event) => {
|
|
479
497
|
this.emit(VPAEvent.USER_STARTED_SPEAKING);
|
|
480
498
|
this.#deferredValidation.onHumanStartOfSpeech(event);
|
|
@@ -505,28 +523,21 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
505
523
|
this.emit(VPAEvent.USER_STOPPED_SPEAKING);
|
|
506
524
|
this.#deferredValidation.onHumanEndOfSpeech(event);
|
|
507
525
|
});
|
|
508
|
-
this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
|
|
526
|
+
this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, async (event) => {
|
|
509
527
|
if (!this.#transcriptionId) {
|
|
510
528
|
this.#transcriptionId = randomUUID();
|
|
511
529
|
}
|
|
512
530
|
this.#transcribedInterimText = event.alternatives![0].text;
|
|
513
531
|
|
|
514
|
-
this.#
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
final: true,
|
|
522
|
-
startTime: BigInt(0),
|
|
523
|
-
endTime: BigInt(0),
|
|
524
|
-
language: '',
|
|
525
|
-
},
|
|
526
|
-
],
|
|
527
|
-
});
|
|
532
|
+
await this.#publishTranscription(
|
|
533
|
+
this.#humanInput!.participant.identity,
|
|
534
|
+
this.#humanInput!.subscribedTrack!.sid!,
|
|
535
|
+
this.#transcribedInterimText,
|
|
536
|
+
false,
|
|
537
|
+
this.#transcriptionId,
|
|
538
|
+
);
|
|
528
539
|
});
|
|
529
|
-
this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
|
|
540
|
+
this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, async (event) => {
|
|
530
541
|
const newTranscript = event.alternatives![0].text;
|
|
531
542
|
if (!newTranscript) return;
|
|
532
543
|
|
|
@@ -537,20 +548,14 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
537
548
|
this.#lastFinalTranscriptTime = Date.now();
|
|
538
549
|
this.transcribedText += (this.transcribedText ? ' ' : '') + newTranscript;
|
|
539
550
|
|
|
540
|
-
this.#
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
startTime: BigInt(0),
|
|
549
|
-
endTime: BigInt(0),
|
|
550
|
-
language: '',
|
|
551
|
-
},
|
|
552
|
-
],
|
|
553
|
-
});
|
|
551
|
+
await this.#publishTranscription(
|
|
552
|
+
this.#humanInput!.participant.identity,
|
|
553
|
+
this.#humanInput!.subscribedTrack!.sid!,
|
|
554
|
+
this.transcribedText,
|
|
555
|
+
true,
|
|
556
|
+
this.#transcriptionId,
|
|
557
|
+
);
|
|
558
|
+
|
|
554
559
|
this.#transcriptionId = undefined;
|
|
555
560
|
|
|
556
561
|
if (
|
|
@@ -881,18 +886,54 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
|
|
|
881
886
|
handle.setDone();
|
|
882
887
|
}
|
|
883
888
|
|
|
889
|
+
async #publishTranscription(
|
|
890
|
+
participantIdentity: string,
|
|
891
|
+
trackSid: string,
|
|
892
|
+
text: string,
|
|
893
|
+
isFinal: boolean,
|
|
894
|
+
id: string,
|
|
895
|
+
) {
|
|
896
|
+
this.#room!.localParticipant!.publishTranscription({
|
|
897
|
+
participantIdentity: participantIdentity,
|
|
898
|
+
trackSid: trackSid,
|
|
899
|
+
segments: [
|
|
900
|
+
{
|
|
901
|
+
text: text,
|
|
902
|
+
final: isFinal,
|
|
903
|
+
id: id,
|
|
904
|
+
startTime: BigInt(0),
|
|
905
|
+
endTime: BigInt(0),
|
|
906
|
+
language: '',
|
|
907
|
+
},
|
|
908
|
+
],
|
|
909
|
+
});
|
|
910
|
+
const stream = await this.#room!.localParticipant!.streamText({
|
|
911
|
+
senderIdentity: participantIdentity,
|
|
912
|
+
topic: TOPIC_TRANSCRIPTION,
|
|
913
|
+
attributes: {
|
|
914
|
+
[ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,
|
|
915
|
+
[ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),
|
|
916
|
+
},
|
|
917
|
+
});
|
|
918
|
+
await stream.write(text);
|
|
919
|
+
await stream.close();
|
|
920
|
+
}
|
|
921
|
+
|
|
884
922
|
#synthesizeAgentSpeech(
|
|
885
923
|
speechId: string,
|
|
886
924
|
source: string | LLMStream | AsyncIterable<string>,
|
|
887
925
|
): SynthesisHandle {
|
|
888
926
|
const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);
|
|
889
|
-
|
|
927
|
+
// TODO: where possible we would want to use deltas instead of full text segments, esp for LLM streams over the streamText API
|
|
928
|
+
synchronizer.on('textUpdated', async (text) => {
|
|
890
929
|
this.#agentTranscribedText = text.text;
|
|
891
|
-
this.#
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
930
|
+
await this.#publishTranscription(
|
|
931
|
+
this.#room!.localParticipant!.identity!,
|
|
932
|
+
this.#agentPublication?.sid ?? '',
|
|
933
|
+
text.text,
|
|
934
|
+
text.final,
|
|
935
|
+
text.id,
|
|
936
|
+
);
|
|
896
937
|
});
|
|
897
938
|
|
|
898
939
|
if (!this.#agentOutput) {
|
package/src/worker.ts
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"function_context.test.d.ts","sourceRoot":"","sources":["../../src/llm/function_context.test.ts"],"names":[],"mappings":""}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.test.d.ts","sourceRoot":"","sources":["../../src/tokenize/tokenizer.test.ts"],"names":[],"mappings":""}
|