@livekit/agents 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/audio.cjs +1 -1
  2. package/dist/audio.cjs.map +1 -1
  3. package/dist/audio.js +1 -1
  4. package/dist/audio.js.map +1 -1
  5. package/dist/cli.cjs.map +1 -1
  6. package/dist/constants.cjs +38 -0
  7. package/dist/constants.cjs.map +1 -0
  8. package/dist/constants.d.ts +5 -0
  9. package/dist/constants.d.ts.map +1 -0
  10. package/dist/constants.js +11 -0
  11. package/dist/constants.js.map +1 -0
  12. package/dist/index.cjs +14 -14
  13. package/dist/index.cjs.map +1 -1
  14. package/dist/ipc/inference_proc_lazy_main.cjs +14 -27
  15. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
  16. package/dist/ipc/inference_proc_lazy_main.js +14 -5
  17. package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.cjs +23 -10
  19. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  20. package/dist/ipc/job_proc_lazy_main.js +23 -10
  21. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  22. package/dist/ipc/supervised_proc.cjs +4 -5
  23. package/dist/ipc/supervised_proc.cjs.map +1 -1
  24. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  25. package/dist/ipc/supervised_proc.js +4 -5
  26. package/dist/ipc/supervised_proc.js.map +1 -1
  27. package/dist/multimodal/agent_playout.cjs +1 -0
  28. package/dist/multimodal/agent_playout.cjs.map +1 -1
  29. package/dist/multimodal/agent_playout.js +1 -0
  30. package/dist/multimodal/agent_playout.js.map +1 -1
  31. package/dist/multimodal/multimodal_agent.cjs +36 -11
  32. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  33. package/dist/multimodal/multimodal_agent.d.ts +3 -2
  34. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  35. package/dist/multimodal/multimodal_agent.js +40 -11
  36. package/dist/multimodal/multimodal_agent.js.map +1 -1
  37. package/dist/pipeline/agent_playout.cjs +1 -1
  38. package/dist/pipeline/agent_playout.cjs.map +1 -1
  39. package/dist/pipeline/agent_playout.d.ts.map +1 -1
  40. package/dist/pipeline/agent_playout.js +1 -1
  41. package/dist/pipeline/agent_playout.js.map +1 -1
  42. package/dist/pipeline/human_input.cjs +9 -2
  43. package/dist/pipeline/human_input.cjs.map +1 -1
  44. package/dist/pipeline/human_input.d.ts +2 -2
  45. package/dist/pipeline/human_input.d.ts.map +1 -1
  46. package/dist/pipeline/human_input.js +9 -2
  47. package/dist/pipeline/human_input.js.map +1 -1
  48. package/dist/pipeline/pipeline_agent.cjs +59 -37
  49. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  50. package/dist/pipeline/pipeline_agent.d.ts +3 -1
  51. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  52. package/dist/pipeline/pipeline_agent.js +63 -37
  53. package/dist/pipeline/pipeline_agent.js.map +1 -1
  54. package/dist/worker.cjs +1 -1
  55. package/dist/worker.cjs.map +1 -1
  56. package/dist/worker.d.ts.map +1 -1
  57. package/dist/worker.js +1 -1
  58. package/dist/worker.js.map +1 -1
  59. package/package.json +4 -4
  60. package/src/audio.ts +1 -1
  61. package/src/constants.ts +7 -0
  62. package/src/ipc/inference_proc_lazy_main.ts +21 -6
  63. package/src/ipc/job_proc_lazy_main.ts +27 -9
  64. package/src/ipc/supervised_proc.ts +5 -6
  65. package/src/multimodal/multimodal_agent.ts +43 -13
  66. package/src/pipeline/agent_playout.ts +1 -7
  67. package/src/pipeline/human_input.ts +17 -3
  68. package/src/pipeline/pipeline_agent.ts +79 -38
  69. package/src/worker.ts +1 -1
  70. package/dist/llm/function_context.test.d.ts +0 -2
  71. package/dist/llm/function_context.test.d.ts.map +0 -1
  72. package/dist/tokenize/tokenizer.test.d.ts +0 -2
  73. package/dist/tokenize/tokenizer.test.d.ts.map +0 -1
@@ -1,7 +1,12 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import type { LocalTrackPublication, RemoteParticipant, Room } from '@livekit/rtc-node';
4
+ import type {
5
+ LocalTrackPublication,
6
+ NoiseCancellationOptions,
7
+ RemoteParticipant,
8
+ Room,
9
+ } from '@livekit/rtc-node';
5
10
  import {
6
11
  AudioSource,
7
12
  LocalAudioTrack,
@@ -12,6 +17,11 @@ import {
12
17
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
13
18
  import { randomUUID } from 'node:crypto';
14
19
  import EventEmitter from 'node:events';
20
+ import {
21
+ ATTRIBUTE_TRANSCRIPTION_FINAL,
22
+ ATTRIBUTE_TRANSCRIPTION_TRACK_ID,
23
+ TOPIC_TRANSCRIPTION,
24
+ } from '../constants.js';
15
25
  import type {
16
26
  CallableFunctionResult,
17
27
  FunctionCallInfo,
@@ -216,6 +226,8 @@ export interface VPAOptions {
216
226
  transcription: AgentTranscriptionOptions;
217
227
  /** Turn detection model to use. */
218
228
  turnDetector?: TurnDetector;
229
+ /** Noise cancellation options. */
230
+ noiseCancellation?: NoiseCancellationOptions;
219
231
  }
220
232
 
221
233
  const defaultVPAOptions: VPAOptions = {
@@ -474,7 +486,13 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
474
486
  return;
475
487
  }
476
488
 
477
- this.#humanInput = new HumanInput(this.#room, this.#vad, this.#stt, this.#participant);
489
+ this.#humanInput = new HumanInput(
490
+ this.#room,
491
+ this.#vad,
492
+ this.#stt,
493
+ this.#participant,
494
+ this.#opts.noiseCancellation,
495
+ );
478
496
  this.#humanInput.on(HumanInputEvent.START_OF_SPEECH, (event) => {
479
497
  this.emit(VPAEvent.USER_STARTED_SPEAKING);
480
498
  this.#deferredValidation.onHumanStartOfSpeech(event);
@@ -505,28 +523,21 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
505
523
  this.emit(VPAEvent.USER_STOPPED_SPEAKING);
506
524
  this.#deferredValidation.onHumanEndOfSpeech(event);
507
525
  });
508
- this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
526
+ this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, async (event) => {
509
527
  if (!this.#transcriptionId) {
510
528
  this.#transcriptionId = randomUUID();
511
529
  }
512
530
  this.#transcribedInterimText = event.alternatives![0].text;
513
531
 
514
- this.#room!.localParticipant!.publishTranscription({
515
- participantIdentity: this.#humanInput!.participant.identity,
516
- trackSid: this.#humanInput!.subscribedTrack!.sid!,
517
- segments: [
518
- {
519
- text: this.#transcribedInterimText,
520
- id: this.#transcriptionId,
521
- final: true,
522
- startTime: BigInt(0),
523
- endTime: BigInt(0),
524
- language: '',
525
- },
526
- ],
527
- });
532
+ await this.#publishTranscription(
533
+ this.#humanInput!.participant.identity,
534
+ this.#humanInput!.subscribedTrack!.sid!,
535
+ this.#transcribedInterimText,
536
+ false,
537
+ this.#transcriptionId,
538
+ );
528
539
  });
529
- this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
540
+ this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, async (event) => {
530
541
  const newTranscript = event.alternatives![0].text;
531
542
  if (!newTranscript) return;
532
543
 
@@ -537,20 +548,14 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
537
548
  this.#lastFinalTranscriptTime = Date.now();
538
549
  this.transcribedText += (this.transcribedText ? ' ' : '') + newTranscript;
539
550
 
540
- this.#room!.localParticipant!.publishTranscription({
541
- participantIdentity: this.#humanInput!.participant.identity,
542
- trackSid: this.#humanInput!.subscribedTrack!.sid!,
543
- segments: [
544
- {
545
- text: this.transcribedText,
546
- id: this.#transcriptionId,
547
- final: true,
548
- startTime: BigInt(0),
549
- endTime: BigInt(0),
550
- language: '',
551
- },
552
- ],
553
- });
551
+ await this.#publishTranscription(
552
+ this.#humanInput!.participant.identity,
553
+ this.#humanInput!.subscribedTrack!.sid!,
554
+ this.transcribedText,
555
+ true,
556
+ this.#transcriptionId,
557
+ );
558
+
554
559
  this.#transcriptionId = undefined;
555
560
 
556
561
  if (
@@ -881,18 +886,54 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
881
886
  handle.setDone();
882
887
  }
883
888
 
889
+ async #publishTranscription(
890
+ participantIdentity: string,
891
+ trackSid: string,
892
+ text: string,
893
+ isFinal: boolean,
894
+ id: string,
895
+ ) {
896
+ this.#room!.localParticipant!.publishTranscription({
897
+ participantIdentity: participantIdentity,
898
+ trackSid: trackSid,
899
+ segments: [
900
+ {
901
+ text: text,
902
+ final: isFinal,
903
+ id: id,
904
+ startTime: BigInt(0),
905
+ endTime: BigInt(0),
906
+ language: '',
907
+ },
908
+ ],
909
+ });
910
+ const stream = await this.#room!.localParticipant!.streamText({
911
+ senderIdentity: participantIdentity,
912
+ topic: TOPIC_TRANSCRIPTION,
913
+ attributes: {
914
+ [ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,
915
+ [ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),
916
+ },
917
+ });
918
+ await stream.write(text);
919
+ await stream.close();
920
+ }
921
+
884
922
  #synthesizeAgentSpeech(
885
923
  speechId: string,
886
924
  source: string | LLMStream | AsyncIterable<string>,
887
925
  ): SynthesisHandle {
888
926
  const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);
889
- synchronizer.on('textUpdated', (text) => {
927
+ // TODO: where possible we would want to use deltas instead of full text segments, esp for LLM streams over the streamText API
928
+ synchronizer.on('textUpdated', async (text) => {
890
929
  this.#agentTranscribedText = text.text;
891
- this.#room!.localParticipant!.publishTranscription({
892
- participantIdentity: this.#room!.localParticipant!.identity,
893
- trackSid: this.#agentPublication!.sid!,
894
- segments: [text],
895
- });
930
+ await this.#publishTranscription(
931
+ this.#room!.localParticipant!.identity!,
932
+ this.#agentPublication?.sid ?? '',
933
+ text.text,
934
+ text.final,
935
+ text.id,
936
+ );
896
937
  });
897
938
 
898
939
  if (!this.#agentOutput) {
package/src/worker.ts CHANGED
@@ -180,7 +180,7 @@ export class WorkerOptions {
180
180
  wsURL = 'ws://localhost:7880',
181
181
  apiKey = undefined,
182
182
  apiSecret = undefined,
183
- host = 'localhost',
183
+ host = '0.0.0.0',
184
184
  port = undefined,
185
185
  logLevel = 'info',
186
186
  production = false,
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=function_context.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"function_context.test.d.ts","sourceRoot":"","sources":["../../src/llm/function_context.test.ts"],"names":[],"mappings":""}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=tokenizer.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"tokenizer.test.d.ts","sourceRoot":"","sources":["../../src/tokenize/tokenizer.test.ts"],"names":[],"mappings":""}