@pipecat-ai/gemini-live-websocket-transport 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +24 -0
- package/README.md +124 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1006 -0
- package/dist/index.js.map +1 -0
- package/dist/index.module.js +989 -0
- package/dist/index.module.js.map +1 -0
- package/package.json +43 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
BSD 2-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024, Daily
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
16
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
17
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
18
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
19
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
20
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
21
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
22
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
23
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
24
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
package/README.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Gemini Live Websocket Transport
|
|
2
|
+
|
|
3
|
+
A real-time websocket transport implementation for interacting with Google's Gemini Multimodal Live API, supporting bidirectional audio and unidirectional text communication.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @pipecat-ai/client-js @pipecat-ai/real-time-websocket-transport @pipecat-ai/gemini-live-websocket-transport
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
The `GeminiLiveWebsocketTransport` class extends the `RealTimeWebsocketTransport` to implement a fully functional [RTVI `Transport`](https://docs.pipecat.ai/client/reference/js/transports/transport). It provides a framework for implementing real-time communication directly with the [Gemini Multimodal Live](https://ai.google.dev/api/multimodal-live) voice-to-voice service. It handles media device management, audio/video streams, and state management for the connection.
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- Real-time bidirectional communication with Gemini Multimodal Live
|
|
18
|
+
- Audio streaming support
|
|
19
|
+
- Text message support
|
|
20
|
+
- Automatic reconnection handling
|
|
21
|
+
- Configurable generation parameters
|
|
22
|
+
- Support for initial conversation context
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
### Basic Setup
|
|
27
|
+
|
|
28
|
+
```javascript
|
|
29
|
+
import { GeminiLiveWebsocketTransport, GeminiLLMServiceOptions } from '@pipecat-ai/gemini-live-websocket-transport';
|
|
30
|
+
|
|
31
|
+
const options: GeminiLLMServiceOptions = {
|
|
32
|
+
api_key: 'YOUR_API_KEY',
|
|
33
|
+
generation_config: {
|
|
34
|
+
temperature: 0.7,
|
|
35
|
+
maxOutput_tokens: 1000
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const transport = new GeminiLiveWebsocketTransport(options);
|
|
40
|
+
let RTVIConfig: RTVIClientOptions = {
|
|
41
|
+
transport,
|
|
42
|
+
...
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Configuration Options
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
interface GeminiLLMServiceOptions {
|
|
51
|
+
api_key: string; // Required: Your Gemini API key
|
|
52
|
+
initial_messages?: Array<{ // Optional: Initial conversation context
|
|
53
|
+
content: string;
|
|
54
|
+
role: string;
|
|
55
|
+
}>;
|
|
56
|
+
generation_config?: { // Optional: Generation parameters
|
|
57
|
+
candidate_count?: number;
|
|
58
|
+
maxOutput_tokens?: number;
|
|
59
|
+
temperature?: number;
|
|
60
|
+
top_p?: number;
|
|
61
|
+
top_k?: number;
|
|
62
|
+
presence_penalty?: number;
|
|
63
|
+
frequency_penalty?: number;
|
|
64
|
+
response_modalities?: string;
|
|
65
|
+
speech_config?: {
|
|
66
|
+
voice_config?: {
|
|
67
|
+
prebuilt_voice_config?: {
|
|
68
|
+
voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
|
|
69
|
+
};
|
|
70
|
+
};
|
|
71
|
+
};
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Sending Messages
|
|
77
|
+
|
|
78
|
+
```javascript
|
|
79
|
+
// Send text prompt message
|
|
80
|
+
rtviClient.sendMessage({
|
|
81
|
+
type: 'send-text',
|
|
82
|
+
data: 'Hello, Gemini!'
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Handling Events
|
|
87
|
+
|
|
88
|
+
The transport implements the various [RTVI event handlers](https://docs.pipecat.ai/client/reference/js/callbacks). Check out the docs or samples for more info.
|
|
89
|
+
|
|
90
|
+
## API Reference
|
|
91
|
+
|
|
92
|
+
### Methods
|
|
93
|
+
|
|
94
|
+
- `initialize()`: Set up the transport and establish connection
|
|
95
|
+
- `sendMessage(message)`: Send a text message
|
|
96
|
+
- `handleUserAudioStream(data)`: Stream audio data to the model
|
|
97
|
+
- `disconnectLLM()`: Close the connection
|
|
98
|
+
- `sendReadyMessage()`: Signal ready state
|
|
99
|
+
|
|
100
|
+
### States
|
|
101
|
+
|
|
102
|
+
The transport can be in one of the following states:
|
|
103
|
+
- "disconnected"
|
|
104
|
+
- "initializing"
|
|
105
|
+
- "initialized"
|
|
106
|
+
- "connecting"
|
|
107
|
+
- "connected"
|
|
108
|
+
- "ready"
|
|
109
|
+
- "disconnecting
|
|
110
|
+
- "error"
|
|
111
|
+
|
|
112
|
+
## Error Handling
|
|
113
|
+
|
|
114
|
+
The transport includes comprehensive error handling for:
|
|
115
|
+
- Connection failures
|
|
116
|
+
- Websocket errors
|
|
117
|
+
- API key validation
|
|
118
|
+
- Message transmission errors
|
|
119
|
+
|
|
120
|
+
## License
|
|
121
|
+
BSD-2 Clause
|
|
122
|
+
|
|
123
|
+
## Contributing
|
|
124
|
+
Feel free to submit issues and pull requests for improvements or bug fixes. Be nice :)
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { MediaManager, RealTimeWebsocketTransport, LLMServiceOptions } from "@pipecat-ai/realtime-websocket-transport";
|
|
2
|
+
import { RTVIMessage } from "@pipecat-ai/client-js";
|
|
3
|
+
export interface GeminiLLMServiceOptions extends LLMServiceOptions {
|
|
4
|
+
initial_messages?: Array<{
|
|
5
|
+
content: string;
|
|
6
|
+
role: string;
|
|
7
|
+
}>;
|
|
8
|
+
api_key: string;
|
|
9
|
+
generation_config?: {
|
|
10
|
+
candidate_count?: number;
|
|
11
|
+
maxOutput_tokens?: number;
|
|
12
|
+
temperature?: number;
|
|
13
|
+
top_p?: number;
|
|
14
|
+
top_k?: number;
|
|
15
|
+
presence_penalty?: number;
|
|
16
|
+
frequency_penalty?: number;
|
|
17
|
+
response_modalities?: string;
|
|
18
|
+
speech_config?: {
|
|
19
|
+
voice_config?: {
|
|
20
|
+
prebuilt_voice_config?: {
|
|
21
|
+
voice_name: "Puck" | "Charon" | "Kore" | "Fenrir" | "Aoede";
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
export class GeminiLiveWebsocketTransport extends RealTimeWebsocketTransport {
|
|
28
|
+
constructor(service_options: GeminiLLMServiceOptions, manager?: MediaManager);
|
|
29
|
+
initializeLLM(): void;
|
|
30
|
+
attachLLMListeners(): void;
|
|
31
|
+
connectLLM(): Promise<void>;
|
|
32
|
+
disconnectLLM(): Promise<void>;
|
|
33
|
+
sendReadyMessage(): Promise<void>;
|
|
34
|
+
handleUserAudioStream(data: ArrayBuffer): void;
|
|
35
|
+
sendMessage(message: RTVIMessage): void;
|
|
36
|
+
_sendAudioInput(data: ArrayBuffer): Promise<void>;
|
|
37
|
+
_sendTextInput(text: string, role: string): Promise<void>;
|
|
38
|
+
_sendMsg(msg: unknown): Promise<void>;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"mappings":";;ACkBA,wCAAyC,SAAQ,iBAAiB;IAChE,gBAAgB,CAAC,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC5D,OAAO,EAAE,MAAM,CAAC;IAChB,iBAAiB,CAAC,EAAE;QAClB,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,gBAAgB,CAAC,EAAE,MAAM,CAAC;QAC1B,iBAAiB,CAAC,EAAE,MAAM,CAAC;QAC3B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,aAAa,CAAC,EAAE;YACd,YAAY,CAAC,EAAE;gBACb,qBAAqB,CAAC,EAAE;oBACtB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,CAAC;iBAE7D,CAAC;aACH,CAAC;SACH,CAAC;KACH,CAAC;CACH;AAED,yCAA0C,SAAQ,0BAA0B;gBAQxE,eAAe,EAAE,uBAAuB,EACxC,OAAO,CAAC,EAAE,YAAY;IAYxB,aAAa,IAAI,IAAI;IAcrB,kBAAkB,IAAI,IAAI;IA8DpB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC3B,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC;IAevC,qBAAqB,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI;IAa9C,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,IAAI;IAMjC,eAAe,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAgBjD,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAezD,QAAQ,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;CAmB5C","sources":["transports/lib/websocket-utils/reconnectingWebSocket.ts","transports/gemini-live-websocket-transport/src/src/geminiLiveWebSocketTransport.ts","transports/gemini-live-websocket-transport/src/src/index.ts","transports/gemini-live-websocket-transport/src/index.ts"],"sourcesContent":[null,null,null,"export * from \"./geminiLiveWebSocketTransport\";\n"],"names":[],"version":3,"file":"index.d.ts.map"}
|