@pipecat-ai/gemini-live-websocket-transport 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2024, Daily
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # Gemini Live Websocket Transport
2
+
3
+ A real-time websocket transport implementation for interacting with Google's Gemini Multimodal Live API, supporting bidirectional audio and unidirectional text communication.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @pipecat-ai/client-js @pipecat-ai/real-time-websocket-transport @pipecat-ai/gemini-live-websocket-transport
9
+ ```
10
+
11
+ ## Overview
12
+
13
+ The `GeminiLiveWebsocketTransport` class extends the `RealTimeWebsocketTransport` to implement a fully functional [RTVI `Transport`](https://docs.pipecat.ai/client/reference/js/transports/transport). It provides a framework for implementing real-time communication directly with the [Gemini Multimodal Live](https://ai.google.dev/api/multimodal-live) voice-to-voice service. It handles media device management, audio/video streams, and state management for the connection.
14
+
15
+ ## Features
16
+
17
+ - Real-time bidirectional communication with Gemini Multimodal Live
18
+ - Audio streaming support
19
+ - Text message support
20
+ - Automatic reconnection handling
21
+ - Configurable generation parameters
22
+ - Support for initial conversation context
23
+
24
+ ## Usage
25
+
26
+ ### Basic Setup
27
+
28
+ ```javascript
29
+ import { GeminiLiveWebsocketTransport, GeminiLLMServiceOptions } from '@pipecat-ai/gemini-live-websocket-transport';
30
+
31
+ const options: GeminiLLMServiceOptions = {
32
+ api_key: 'YOUR_API_KEY',
33
+ generation_config: {
34
+ temperature: 0.7,
35
+ maxOutput_tokens: 1000
36
+ }
37
+ };
38
+
39
+ const transport = new GeminiLiveWebsocketTransport(options);
40
+ let RTVIConfig: RTVIClientOptions = {
41
+ transport,
42
+ ...
43
+ };
44
+
45
+ ```
46
+
47
+ ### Configuration Options
48
+
49
+ ```typescript
50
+ interface GeminiLLMServiceOptions {
51
+ api_key: string; // Required: Your Gemini API key
52
+ initial_messages?: Array<{ // Optional: Initial conversation context
53
+ content: string;
54
+ role: string;
55
+ }>;
56
+ generation_config?: { // Optional: Generation parameters
57
+ candidate_count?: number;
58
+ maxOutput_tokens?: number;
59
+ temperature?: number;
60
+ top_p?: number;
61
+ top_k?: number;
62
+ presence_penalty?: number;
63
+ frequency_penalty?: number;
64
+ response_modalities?: string;
65
+ speech_config?: {
66
+ voice_config?: {
67
+ prebuilt_voice_config?: {
68
+ voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
69
+ };
70
+ };
71
+ };
72
+ };
73
+ }
74
+ ```
75
+
76
+ ### Sending Messages
77
+
78
+ ```javascript
79
+ // Send text prompt message
80
+ rtviClient.sendMessage({
81
+ type: 'send-text',
82
+ data: 'Hello, Gemini!'
83
+ });
84
+ ```
85
+
86
+ ### Handling Events
87
+
88
+ The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/reference/js/callbacks). Check out the docs or samples for more info.
89
+
90
+ ## API Reference
91
+
92
+ ### Methods
93
+
94
+ - `initialize()`: Set up the transport and establish connection
95
+ - `sendMessage(message)`: Send a text message
96
+ - `handleUserAudioStream(data)`: Stream audio data to the model
97
+ - `disconnectLLM()`: Close the connection
98
+ - `sendReadyMessage()`: Signal ready state
99
+
100
+ ### States
101
+
102
+ The transport can be in one of the following states:
103
+ - "disconnected"
104
+ - "initializing"
105
+ - "initialized"
106
+ - "connecting"
107
+ - "connected"
108
+ - "ready"
109
+ - "disconnecting
110
+ - "error"
111
+
112
+ ## Error Handling
113
+
114
+ The transport includes comprehensive error handling for:
115
+ - Connection failures
116
+ - Websocket errors
117
+ - API key validation
118
+ - Message transmission errors
119
+
120
+ ## License
121
+ BSD-2 Clause
122
+
123
+ ## Contributing
124
+ Feel free to submit issues and pull requests for improvements or bug fixes. Be nice :)
@@ -0,0 +1,41 @@
1
+ import { MediaManager, RealTimeWebsocketTransport, LLMServiceOptions } from "@pipecat-ai/realtime-websocket-transport";
2
+ import { RTVIMessage } from "@pipecat-ai/client-js";
3
+ export interface GeminiLLMServiceOptions extends LLMServiceOptions {
4
+ initial_messages?: Array<{
5
+ content: string;
6
+ role: string;
7
+ }>;
8
+ api_key: string;
9
+ generation_config?: {
10
+ candidate_count?: number;
11
+ maxOutput_tokens?: number;
12
+ temperature?: number;
13
+ top_p?: number;
14
+ top_k?: number;
15
+ presence_penalty?: number;
16
+ frequency_penalty?: number;
17
+ response_modalities?: string;
18
+ speech_config?: {
19
+ voice_config?: {
20
+ prebuilt_voice_config?: {
21
+ voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
22
+ };
23
+ };
24
+ };
25
+ };
26
+ }
27
+ export class GeminiLiveWebsocketTransport extends RealTimeWebsocketTransport {
28
+ constructor(service_options: GeminiLLMServiceOptions, manager?: MediaManager);
29
+ initializeLLM(): void;
30
+ attachLLMListeners(): void;
31
+ connectLLM(): Promise<void>;
32
+ disconnectLLM(): Promise<void>;
33
+ sendReadyMessage(): Promise<void>;
34
+ handleUserAudioStream(data: ArrayBuffer): void;
35
+ sendMessage(message: RTVIMessage): void;
36
+ _sendAudioInput(data: ArrayBuffer): Promise<void>;
37
+ _sendTextInput(text: string, role: string): Promise<void>;
38
+ _sendMsg(msg: unknown): Promise<void>;
39
+ }
40
+
41
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"mappings":";;ACkBA,wCAAyC,SAAQ,iBAAiB;IAChE,gBAAgB,CAAC,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC5D,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,CAAC,EAAE;QAClB,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,aAAa,CAAC,EAAE;YACd,YAAY,CAAC,EAAE;gBACb,qBAAqB,CAAC,EAAE;oBACtB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;iBAE7D,CAAC;aACH,CAAC;SACH,CAAC;KACH,CAAC;CACH;AAED,yCAA0C,SAAQ,0BAA0B;gBAQxE,eAAe,EAAE,uBAAuB,EACxC,OAAO,CAAC,EAAE,YAAY;IAYxB,aAAa,IAAI,IAAI;IAcrB,kBAAkB,IAAI,IAAI;IA8DpB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC3B,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC;IAevC,qBAAqB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAa9C,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,IAAI;IAMjC,eAAe,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBjD,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAezD,QAAQ,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;CAmB5C","sources":["transports/lib/websocket-utils/reconnectingWebSocket.ts","transports/gemini-live-websocket-transport/src/src/geminiLiveWebSocketTransport.ts","transports/gemini-live-websocket-transport/src/src/index.ts","transports/gemini-live-websocket-transport/src/index.ts"],"sourcesContent":[null,null,null,"export * from \"./geminiLiveWebSocketTransport\";\n"],"names":[],"version":3,"file":"index.d.ts.map"}