@pipecat-ai/gemini-live-websocket-transport 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,7 +17,7 @@ npm install \
17
17
 
18
18
  ## Overview
19
19
 
20
- The `GeminiLiveWebsocketTransport` class extends the `RealTimeWebsocketTransport` to implement a fully functional [RTVI `Transport`](https://docs.pipecat.ai/client/js/transports/transport). It provides a framework for implementing real-time communication directly with the [Gemini Multimodal Live](https://ai.google.dev/api/multimodal-live) voice-to-voice service. It handles media device management, audio/video streams, and state management for the connection.
20
+ The `GeminiLiveWebsocketTransport` class extends the `DirectToLLMBaseWebSocketTransport` to implement a fully functional [RTVI `Transport`](https://docs.pipecat.ai/client/js/transports/transport). It provides a framework for implementing real-time communication directly with the [Gemini Multimodal Live](https://ai.google.dev/api/multimodal-live) voice-to-voice service. It handles media device management, audio/video streams, and state management for the connection.
21
21
 
22
22
  ## Features
23
23
 
package/dist/index.d.ts CHANGED
@@ -1,12 +1,113 @@
1
- import { LLMServiceOptions, MediaManager, RealTimeWebsocketTransport } from "@pipecat-ai/realtime-websocket-transport";
2
- import { RTVIMessage } from "@pipecat-ai/client-js";
1
+ import { RTVIClientOptions, RTVIEventCallbacks, Tracks, BotTTSTextData, RTVIMessage, TranscriptData, Transport, TransportState } from "@pipecat-ai/client-js";
2
+ declare abstract class MediaManager {
3
+ protected _userAudioCallback: (data: ArrayBuffer) => void;
4
+ protected _options: RTVIClientOptions;
5
+ protected _callbacks: RTVIEventCallbacks;
6
+ protected _micEnabled: boolean;
7
+ protected _camEnabled: boolean;
8
+ constructor();
9
+ setUserAudioCallback(userAudioCallback: (data: ArrayBuffer) => void): void;
10
+ setRTVIOptions(options: RTVIClientOptions, override?: boolean): void;
11
+ abstract initialize(): Promise<void>;
12
+ abstract connect(): Promise<void>;
13
+ abstract disconnect(): Promise<void>;
14
+ abstract userStartedSpeaking(): Promise<unknown>;
15
+ abstract bufferBotAudio(data: ArrayBuffer | Int16Array, id?: string): Int16Array | undefined;
16
+ abstract getAllMics(): Promise<MediaDeviceInfo[]>;
17
+ abstract getAllCams(): Promise<MediaDeviceInfo[]>;
18
+ abstract getAllSpeakers(): Promise<MediaDeviceInfo[]>;
19
+ abstract updateMic(micId: string): void;
20
+ abstract updateCam(camId: string): void;
21
+ abstract updateSpeaker(speakerId: string): void;
22
+ abstract get selectedMic(): MediaDeviceInfo | Record<string, never>;
23
+ abstract get selectedCam(): MediaDeviceInfo | Record<string, never>;
24
+ abstract get selectedSpeaker(): MediaDeviceInfo | Record<string, never>;
25
+ abstract enableMic(enable: boolean): void;
26
+ abstract enableCam(enable: boolean): void;
27
+ abstract get isCamEnabled(): boolean;
28
+ abstract get isMicEnabled(): boolean;
29
+ abstract tracks(): Tracks;
30
+ }
31
+ interface LLMServiceOptions {
32
+ api_key?: string;
33
+ initial_messages?: Array<unknown>;
34
+ model?: string;
35
+ settings?: Record<string, unknown>;
36
+ }
37
+ /**
38
+ * DirectToLLMBaseWebSocketTransport is an abstract class that provides a client-side
39
+ * interface for connecting to a real-time AI service. It is intended to
40
+ * connect directly to the service. (No Pipecat server is involved.)
41
+ */
42
+ declare abstract class DirectToLLMBaseWebSocketTransport extends Transport {
43
+ protected _service_options: LLMServiceOptions;
44
+ protected _botIsSpeaking: boolean;
45
+ constructor(service_options: LLMServiceOptions, manager: MediaManager);
46
+ /**
47
+ * This method will be called from initialize()
48
+ * Subclasses should initialize the LLM client and media player/recorder
49
+ * and call initializeAudio() from within this method.
50
+ */
51
+ abstract initializeLLM(): void;
52
+ /**
53
+ * This method will be called from initialize()
54
+ * Subclasses should etup listeners for LLM events from within this method
55
+ */
56
+ abstract attachLLMListeners(): void;
57
+ /**
58
+ * This method will be called from connect()
59
+ * Subclasses should connect to the LLM and pass along the initial messages
60
+ * @param initial_messages
61
+ */
62
+ abstract connectLLM(): Promise<void>;
63
+ /**
64
+ * This method will be called from disconnect()
65
+ * Subclasses should disconnect from the LLM
66
+ */
67
+ abstract disconnectLLM(): Promise<void>;
68
+ /**
69
+ * This method will be called regularly with audio data from the user
70
+ * Subclasses should handle this data and pass it along to the LLM
71
+ * @param data ArrayBuffer of audio data
72
+ */
73
+ abstract handleUserAudioStream(data: ArrayBuffer): void;
74
+ initialize(options: RTVIClientOptions, messageHandler: (ev: RTVIMessage) => void): void;
75
+ initDevices(): Promise<void>;
76
+ connect(authBundle: unknown, abortController: AbortController): Promise<void>;
77
+ disconnect(): Promise<void>;
78
+ getAllMics(): Promise<MediaDeviceInfo[]>;
79
+ getAllCams(): Promise<MediaDeviceInfo[]>;
80
+ getAllSpeakers(): Promise<MediaDeviceInfo[]>;
81
+ updateMic(micId: string): Promise<void>;
82
+ updateCam(camId: string): void;
83
+ updateSpeaker(speakerId: string): void;
84
+ get selectedMic(): MediaDeviceInfo | Record<string, never>;
85
+ get selectedCam(): MediaDeviceInfo | Record<string, never>;
86
+ get selectedSpeaker(): MediaDeviceInfo | Record<string, never>;
87
+ enableMic(enable: boolean): void;
88
+ enableCam(enable: boolean): void;
89
+ get isCamEnabled(): boolean;
90
+ get isMicEnabled(): boolean;
91
+ get state(): TransportState;
92
+ set state(state: TransportState);
93
+ get expiry(): number | undefined;
94
+ tracks(): Tracks;
95
+ userStartedSpeaking(): Promise<unknown>;
96
+ userStoppedSpeaking(): void;
97
+ userTranscript(transcript: TranscriptData): void;
98
+ botStartedSpeaking(): void;
99
+ botStoppedSpeaking(): void;
100
+ botTtsText(data: BotTTSTextData): void;
101
+ bufferBotAudio(audio: ArrayBuffer, id?: string): void;
102
+ connectionError(errorMsg: string): void;
103
+ }
3
104
  export interface GeminiLLMServiceOptions extends LLMServiceOptions {
4
105
  initial_messages?: Array<{
5
106
  content: string;
6
107
  role: string;
7
108
  }>;
8
109
  api_key: string;
9
- generation_config?: {
110
+ settings?: {
10
111
  candidate_count?: number;
11
112
  maxOutput_tokens?: number;
12
113
  temperature?: number;
@@ -24,7 +125,7 @@ export interface GeminiLLMServiceOptions extends LLMServiceOptions {
24
125
  };
25
126
  };
26
127
  }
27
- export class GeminiLiveWebsocketTransport extends RealTimeWebsocketTransport {
128
+ export class GeminiLiveWebsocketTransport extends DirectToLLMBaseWebSocketTransport {
28
129
  constructor(service_options: GeminiLLMServiceOptions, manager?: MediaManager);
29
130
  initializeLLM(): void;
30
131
  attachLLMListeners(): void;
@@ -1 +1 @@
1
- {"mappings":";;ACoBA,wCAAyC,SAAQ,iBAAiB;IAChE,gBAAgB,CAAC,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC5D,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,CAAC,EAAE;QAClB,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,aAAa,CAAC,EAAE;YACd,YAAY,CAAC,EAAE;gBACb,qBAAqB,CAAC,EAAE;oBACtB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;iBAE7D,CAAC;aACH,CAAC;SACH,CAAC;KACH,CAAC;CACH;AAED,yCAA0C,SAAQ,0BAA0B;gBAQxE,eAAe,EAAE,uBAAuB,EACxC,OAAO,CAAC,EAAE,YAAY;IAYxB,aAAa,IAAI,IAAI;IAcrB,kBAAkB,IAAI,IAAI;IA8DpB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC3B,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC;IAevC,qBAAqB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAa9C,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,IAAI;IA0BjC,eAAe,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBjD,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAezD,QAAQ,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAqB3C,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,IAAI;IAOxC,IAAW,eAAe,IAAI,OAAO,CAKpC;CACF","sources":["transports/lib/websocket-utils/reconnectingWebSocket.ts","transports/gemini-live-websocket-transport/src/src/geminiLiveWebSocketTransport.ts","transports/gemini-live-websocket-transport/src/src/index.ts","transports/gemini-live-websocket-transport/src/index.ts"],"sourcesContent":[null,null,null,"export * from \"./geminiLiveWebSocketTransport\";\n"],"names":[],"version":3,"file":"index.d.ts.map"}
1
+ {"mappings":";ASQA,QAAO,QAAQ;IACb,UAAkB,kBAAkB,EAAE,CAAC,IAAI,EAAE,WAAW,KAAK,IAAI,CAAC;IAClE,UAAkB,QAAQ,EAAE,iBAAiB,CAAC;IAC9C,SAAS,CAAC,UAAU,EAAE,kBAAkB,CAAM;IAE9C,SAAS,CAAC,WAAW,EAAE,OAAO,CAAC;IAC/B,SAAS,CAAC,WAAW,EAAE,OAAO,CAAC;;IAO/B,oBAAoB,CAAC,iBAAiB,EAAE,CAAC,IAAI,EAAE,WAAW,KAAK,IAAI;IAGnE,cAAc,CAAC,OAAO,EAAE,iBAAiB,EAAE,QAAQ,GAAE,OAAe;IAQpE,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IACpC,QAAQ,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IACjC,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAEpC,QAAQ,CAAC,mBAAmB,IAAI,OAAO,CAAC,OAAO,CAAC;IAChD,QAAQ,CAAC,cAAc,CACrB,IAAI,EAAE,WAAW,GAAG,UAAU,EAC9B,EAAE,CAAC,EAAE,MAAM,GACV,UAAU,GAAG,SAAS;IAEzB,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;IACjD,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;IACjD,QAAQ,CAAC,cAAc,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;IAErD,QAAQ,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IACvC,QAAQ,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IACvC,QAAQ,CAAC,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAE/C,QAAQ,KAAK,WAAW,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IACpE,QAAQ,KAAK,WAAW,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IACpE,QAAQ,KAAK,eAAe,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAExE,QAAQ,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,GAAG,IAAI;IACzC,QAAQ,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,GAAG,IAAI;IAEzC,QAAQ,KAAK,YAAY,IAAI,OAAO,CAAC;IACrC,QAAQ,KAAK,YAAY,IAAI,OAAO,CAAC;IAErC,QAAQ,CAAC,MAAM,IAAI,MAAM;CAC1B;AGjDD;IACE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,gBAAgB,CAAC,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;IAClC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED;;;;GAIG;AACH,QAAO,QAAQ,wCAAyC,SAAQ,SAAS;IAGvE,SAAS,CAAC,gBAAgB,EAAE,iBAAiB,CAAC;IAE9C,SAAS,CAAC,cAAc,UAAS;gBAErB,eAAe,EAAE,iBAAiB,EAAE,OAAO,EAAE,YAAY;IASrE;;;;OAIG;IACH,QAAQ,CAAC,aAAa,IAAI,IAAI;IAC9B;;;OAGG;IACH,QAAQ,CAAC,kBAAkB,IAAI,IAAI;IACnC;;;;OAIG;IACH,QAAQ,CAAC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IACpC;;;OAGG;IACH,QAAQ,CAAC,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IACvC;;;;OAIG;IACH,QAAQ,CAAC,qBAAqB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAIvD,UAAU,CACR,OAAO,EAAE,iBAAiB,EAC1B,cAAc,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,GACxC,IAAI;IAeD,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,OAAO,CACX,UAAU,EAAE,OAAO,EACnB,eAAe,EAAE,eAAe,GAC/B,OAAO,CAAC,IAAI,CAAC;IAWV,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAQjC,UAAU,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;IAGxC,UAAU,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;IAGxC,cAAc,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;IAItC,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAG7C,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAG9B,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI;IAItC,IAAI,WAAW,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAEzD;IACD,IAAI,WAAW,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAEzD;IACD,IAAI,eAAe,IAAI,eAAe,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAE7D;IAED,SAAS,CAAC,MAAM,EAAE,OAAO,GAAG,IAAI;IAGhC,SAAS,CAAC,MAAM,EAAE,OAAO,GAAG,IAAI;IAIhC,IAAI,YAAY,IAAI,OAAO,CAE1B;IACD,IAAI,YAAY,IAAI,OAAO,CAE1B;IAED,IAAI,KAAK,IAAI,cAAc,CAE1B;IAED,IAAI,KAAK,CAAC,KAAK,EAAE,cAAc,EAK9B;IAED,IAAI,MAAM,IAAI,MAAM,GAAG,SAAS,CAE/B;IAED,MAAM,IAAI,MAAM;IAKV,mBAAmB,IAAI,OAAO,CAAC,OAAO,CAAC;IAO7C,mBAAmB,IAAI,IAAI;IAI3B,cAAc,CAAC,UAAU,EAAE,cAAc,GAAG,IAAI;IAIhD,kBAAkB,IAAI,IAAI;IAO1B,kBAAkB,IAAI,IAAI;IAO1B,UAAU,CAAC,IAAI,EAAE,cAAc,GAAG,IAAI;IAItC,cAAc,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,IAAI;IAIrD,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;CAOxC;AC7MD,wCAAyC,SAAQ,iBAAiB;IAChE,gBAAgB,CAAC,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC5D,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE;QACT,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,aAAa,CAAC,EAAE;YACd,YAAY,CAAC,EAAE;gBACb,qBAAqB,CAAC,EAAE;oBACtB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;iBAE7D,CAAC;aACH,CAAC;SACH,CAAC;KACH,CAAC;CACH;AAED,yCAA0C,SAAQ,iCAAiC;gBAQ/E,eAAe,EAAE,uBAAuB,EACxC,OAAO,CAAC,EAAE,YAAY;IAYxB,aAAa,IAAI,IAAI;IAcrB,kBAAkB,IAAI,IAAI;IA8DpB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC3B,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC;IAevC,qBAAqB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAa9C,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,IAAI;IA+BjC,eAAe,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBjD,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAezD,QAAQ,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAqB3C,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,IAAI;IAOxC,IAAW,eAAe,IAAI,OAAO,CAKpC;CACF","sources":["transports/lib/wavtools/lib/wav_packer.js","transports/lib/wavtools/lib/analysis/constants.js","transports/lib/wavtools/lib/analysis/audio_analysis.js","transports/lib/wavtools/lib/worklets/stream_processor.js","transports/lib/wavtools/lib/wav_stream_player.js","transports/lib/wavtools/lib/worklets/audio_processor.js","transports/lib/wavtools/lib/wav_recorder.js","transports/lib/wavtools/lib/mediastream_recorder.js","transports/lib/wavtools/index.js","transports/lib/media-mgmt/mediaManager.ts","transports/lib/media-mgmt/dailyMediaManager.ts","transports/lib/websocket-utils/reconnectingWebSocket.ts","transports/gemini-live-websocket-transport/src/src/directToLLMBaseWebSocketTransport.ts","transports/gemini-live-websocket-transport/src/src/geminiLiveWebSocketTransport.ts","transports/gemini-live-websocket-transport/src/src/index.ts","transports/gemini-live-websocket-transport/src/index.ts"],"sourcesContent":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"export * from \"./geminiLiveWebSocketTransport\";\n"],"names":[],"version":3,"file":"index.d.ts.map"}