deepslate-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepslate_core-0.1.0/.gitignore +25 -0
- deepslate_core-0.1.0/PKG-INFO +319 -0
- deepslate_core-0.1.0/README.md +299 -0
- deepslate_core-0.1.0/pyproject.toml +32 -0
- deepslate_core-0.1.0/src/deepslate/core/__init__.py +55 -0
- deepslate_core-0.1.0/src/deepslate/core/_types.py +128 -0
- deepslate_core-0.1.0/src/deepslate/core/_utils.py +206 -0
- deepslate_core-0.1.0/src/deepslate/core/client.py +111 -0
- deepslate_core-0.1.0/src/deepslate/core/options.py +205 -0
- deepslate_core-0.1.0/src/deepslate/core/proto/__init__.py +4 -0
- deepslate_core-0.1.0/src/deepslate/core/proto/realtime.proto +707 -0
- deepslate_core-0.1.0/src/deepslate/core/proto/realtime_pb2.py +123 -0
- deepslate_core-0.1.0/src/deepslate/core/proto/realtime_pb2.pyi +432 -0
- deepslate_core-0.1.0/src/deepslate/core/session.py +553 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Python-generated files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[oc]
|
|
4
|
+
build/
|
|
5
|
+
dist/
|
|
6
|
+
wheels/
|
|
7
|
+
*.egg-info
|
|
8
|
+
|
|
9
|
+
# NodeJS stuff
|
|
10
|
+
node_modules/
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv
|
|
14
|
+
|
|
15
|
+
# IDE files
|
|
16
|
+
.idea/
|
|
17
|
+
|
|
18
|
+
# Development files
|
|
19
|
+
.env.local
|
|
20
|
+
.no-update
|
|
21
|
+
|
|
22
|
+
# Local settings
|
|
23
|
+
.claude/
|
|
24
|
+
CLAUDE.local.md
|
|
25
|
+
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepslate-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared core library for Deepslate SDK integrations
|
|
5
|
+
Project-URL: Documentation, https://docs.deepslate.eu/
|
|
6
|
+
Project-URL: Website, https://deepslate.eu/
|
|
7
|
+
Project-URL: Source, https://github.com/deepslate-labs/deepslate-sdks
|
|
8
|
+
Keywords: ai,audio,deepslate,realtime,voice
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: aiohttp>=3.10.0
|
|
18
|
+
Requires-Dist: protobuf>=5.26.0
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# deepslate-core
|
|
22
|
+
|
|
23
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
24
|
+
[](https://docs.deepslate.eu/)
|
|
25
|
+
[](https://www.python.org/downloads/)
|
|
26
|
+
|
|
27
|
+
Shared core library for [Deepslate's](https://deepslate.eu/) realtime voice AI SDKs.
|
|
28
|
+
|
|
29
|
+
> **You probably don't need to install this directly.**
|
|
30
|
+
>
|
|
31
|
+
> Install [`deepslate-livekit`](../livekit/README.md) or [`deepslate-pipecat`](../pipecat/README.md) instead — they pull in `deepslate-core` automatically. This package is for developers building **custom Deepslate integrations** outside of LiveKit or Pipecat.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## What's Inside
|
|
36
|
+
|
|
37
|
+
`deepslate-core` provides everything needed to connect to the Deepslate Realtime API from any async Python application:
|
|
38
|
+
|
|
39
|
+
- **`DeepslateSession`** — High-level session that manages the full WebSocket lifecycle, protobuf framing, session initialization, reconnection, and callback dispatch. The primary building block for custom integrations.
|
|
40
|
+
- **`DeepslateOptions`** — API credentials and connection configuration
|
|
41
|
+
- **`VadConfig`** — Server-side Voice Activity Detection parameters
|
|
42
|
+
- **`ElevenLabsTtsConfig` / `ElevenLabsLocation`** — ElevenLabs TTS configuration
|
|
43
|
+
- **`BaseDeepslateClient`** — Low-level async WebSocket client with exponential-backoff reconnection (used internally by `DeepslateSession`)
|
|
44
|
+
- **Protobuf definitions** — Compiled `.proto` bindings for the Deepslate realtime wire protocol
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install deepslate-core
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Requirements
|
|
55
|
+
|
|
56
|
+
- Python 3.11 or higher
|
|
57
|
+
|
|
58
|
+
### Dependencies
|
|
59
|
+
|
|
60
|
+
- `aiohttp>=3.10.0` — Async HTTP and WebSocket client
|
|
61
|
+
- `protobuf>=5.26.0` — Protocol buffer serialization
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
> This section is for custom integration authors. For standard usage see the [LiveKit](../livekit/README.md) or [Pipecat](../pipecat/README.md) packages.
|
|
68
|
+
|
|
69
|
+
### Credentials
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from deepslate.core import DeepslateOptions
|
|
73
|
+
|
|
74
|
+
# Load from environment variables (recommended)
|
|
75
|
+
opts = DeepslateOptions.from_env()
|
|
76
|
+
|
|
77
|
+
# Or configure manually
|
|
78
|
+
opts = DeepslateOptions(
|
|
79
|
+
vendor_id="your_vendor_id",
|
|
80
|
+
organization_id="your_org_id",
|
|
81
|
+
api_key="your_api_key",
|
|
82
|
+
system_prompt="You are a helpful assistant.",
|
|
83
|
+
)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Required environment variables:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
DEEPSLATE_VENDOR_ID=your_vendor_id
|
|
90
|
+
DEEPSLATE_ORGANIZATION_ID=your_organization_id
|
|
91
|
+
DEEPSLATE_API_KEY=your_api_key
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### VAD Configuration
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from deepslate.core import VadConfig
|
|
98
|
+
|
|
99
|
+
vad = VadConfig(
|
|
100
|
+
confidence_threshold=0.5, # 0.0–1.0
|
|
101
|
+
min_volume=0.01, # 0.0–1.0
|
|
102
|
+
start_duration_ms=200,
|
|
103
|
+
stop_duration_ms=500,
|
|
104
|
+
backbuffer_duration_ms=1000,
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### ElevenLabs TTS Configuration
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from deepslate.core import ElevenLabsTtsConfig, ElevenLabsLocation
|
|
112
|
+
|
|
113
|
+
# Load from environment variables
|
|
114
|
+
tts = ElevenLabsTtsConfig.from_env()
|
|
115
|
+
|
|
116
|
+
# Or configure manually
|
|
117
|
+
tts = ElevenLabsTtsConfig(
|
|
118
|
+
api_key="your_elevenlabs_key",
|
|
119
|
+
voice_id="21m00Tcm4TlvDq8ikWAM",
|
|
120
|
+
model_id="eleven_turbo_v2",
|
|
121
|
+
location=ElevenLabsLocation.US, # US (default), EU, or INDIA
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### `DeepslateSession`
|
|
126
|
+
|
|
127
|
+
`DeepslateSession` is the recommended entry point for custom integrations. It handles the full protocol lifecycle — session initialization, protobuf serialization, reconnection, and server-event routing — delivering events to a `DeepslateSessionListener` so your code only deals with application logic.
|
|
128
|
+
|
|
129
|
+
Subclass `DeepslateSessionListener` and override only the methods you care about, then pass the instance to `DeepslateSession.create()`. Use `create()` when your code owns the connection — it creates its own `BaseDeepslateClient` and closes it automatically when `close()` is called:
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
import asyncio
|
|
133
|
+
from deepslate.core import (
|
|
134
|
+
DeepslateOptions,
|
|
135
|
+
DeepslateSession,
|
|
136
|
+
DeepslateSessionListener,
|
|
137
|
+
ElevenLabsTtsConfig,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
class MyListener(DeepslateSessionListener):
|
|
141
|
+
def __init__(self) -> None:
|
|
142
|
+
self.session: DeepslateSession | None = None
|
|
143
|
+
|
|
144
|
+
async def on_text_fragment(self, text: str) -> None:
|
|
145
|
+
print(text, end="", flush=True)
|
|
146
|
+
|
|
147
|
+
async def on_audio_chunk(
|
|
148
|
+
self, pcm_bytes: bytes, sample_rate: int, channels: int, transcript: str | None
|
|
149
|
+
) -> None:
|
|
150
|
+
# Forward PCM audio to your output device / transport
|
|
151
|
+
...
|
|
152
|
+
|
|
153
|
+
async def on_tool_call(self, call_id: str, name: str, params: dict) -> None:
|
|
154
|
+
result = await dispatch_tool(name, params)
|
|
155
|
+
if self.session:
|
|
156
|
+
await self.session.send_tool_response(call_id, result)
|
|
157
|
+
|
|
158
|
+
async def on_error(self, category: str, message: str, trace_id: str | None) -> None:
|
|
159
|
+
print(f"[{category}] {message}")
|
|
160
|
+
|
|
161
|
+
async def on_response_begin(self) -> None:
|
|
162
|
+
print("\n--- response start ---")
|
|
163
|
+
|
|
164
|
+
async def on_response_end(self) -> None:
|
|
165
|
+
print("--- response end ---\n")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
async def main():
|
|
169
|
+
opts = DeepslateOptions.from_env(
|
|
170
|
+
system_prompt="You are a helpful assistant."
|
|
171
|
+
)
|
|
172
|
+
tts = ElevenLabsTtsConfig.from_env()
|
|
173
|
+
listener = MyListener()
|
|
174
|
+
|
|
175
|
+
session = DeepslateSession.create(
|
|
176
|
+
opts,
|
|
177
|
+
tts_config=tts,
|
|
178
|
+
user_agent="MyApp/1.0",
|
|
179
|
+
listener=listener,
|
|
180
|
+
)
|
|
181
|
+
listener.session = session
|
|
182
|
+
session.start()
|
|
183
|
+
|
|
184
|
+
# Initialize for text-only interaction (audio sessions initialize automatically)
|
|
185
|
+
await session.initialize(sample_rate=24000, channels=1)
|
|
186
|
+
|
|
187
|
+
# Send a text prompt and trigger a reply
|
|
188
|
+
await session.send_text("What is the capital of France?")
|
|
189
|
+
|
|
190
|
+
await asyncio.sleep(5) # Wait for response
|
|
191
|
+
await session.close()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
asyncio.run(main())
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
#### Sending audio
|
|
198
|
+
|
|
199
|
+
Audio sessions initialize automatically on the first `send_audio()` call. The session also sends a `ReconfigureSessionRequest` transparently if the audio format changes mid-session:
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
# pcm_bytes: raw signed 16-bit PCM
|
|
203
|
+
await session.send_audio(pcm_bytes, sample_rate=16000, channels=1)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
#### Updating tools at runtime
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
tools = [
|
|
210
|
+
{
|
|
211
|
+
"type": "function",
|
|
212
|
+
"function": {
|
|
213
|
+
"name": "get_weather",
|
|
214
|
+
"description": "Get the weather for a city.",
|
|
215
|
+
"parameters": {
|
|
216
|
+
"type": "object",
|
|
217
|
+
"properties": {
|
|
218
|
+
"city": {"type": "string"}
|
|
219
|
+
},
|
|
220
|
+
"required": ["city"],
|
|
221
|
+
},
|
|
222
|
+
},
|
|
223
|
+
}
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
await session.update_tools(tools)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Tool definitions are re-synced automatically after every reconnect.
|
|
230
|
+
|
|
231
|
+
#### Reconnection
|
|
232
|
+
|
|
233
|
+
`start()` drives `BaseDeepslateClient.run_with_retry()` internally. On a dropped connection the session resets its state, re-sends `InitializeSessionRequest`, re-syncs tool definitions, and resumes — all without any action required from your code. Call `close()` to stop the retry loop permanently.
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## API Reference
|
|
238
|
+
|
|
239
|
+
### `DeepslateSession.create()`
|
|
240
|
+
|
|
241
|
+
Factory that creates a session together with its own `BaseDeepslateClient`. The session owns the client and closes it when `close()` is called.
|
|
242
|
+
|
|
243
|
+
| Parameter | Type | Default | Description |
|
|
244
|
+
|---|---|---|---|
|
|
245
|
+
| `options` | `DeepslateOptions` | required | API credentials and settings |
|
|
246
|
+
| `vad_config` | `VadConfig \| None` | `None` | VAD settings (uses defaults if omitted) |
|
|
247
|
+
| `tts_config` | `ElevenLabsTtsConfig \| None` | `None` | Enables server-side TTS audio output |
|
|
248
|
+
| `user_agent` | `str` | `"DeepslateCore"` | HTTP `User-Agent` header sent on connect |
|
|
249
|
+
| `http_session` | `aiohttp.ClientSession \| None` | `None` | Shared aiohttp session (not closed by the session) |
|
|
250
|
+
| `listener` | `DeepslateSessionListener \| None` | `None` | Receives all session events; defaults to a no-op base instance |
|
|
251
|
+
|
|
252
|
+
### `DeepslateSession` — send methods
|
|
253
|
+
|
|
254
|
+
| Method | Description |
|
|
255
|
+
|---|---|
|
|
256
|
+
| `await session.send_audio(pcm, sr, ch)` | Send PCM audio; auto-initializes and auto-reconfigures on format change |
|
|
257
|
+
| `await session.send_text(text, trigger=IMMEDIATE)` | Send a text `UserInput` |
|
|
258
|
+
| `await session.initialize(sr=24000, ch=1)` | Explicitly initialize (for text-only sessions) |
|
|
259
|
+
| `await session.trigger_inference(instructions=None)` | Manually trigger a model reply |
|
|
260
|
+
| `await session.send_tool_response(call_id, result)` | Return a tool result to the server |
|
|
261
|
+
| `await session.update_tools(tools)` | Sync tool definitions (persisted across reconnects) |
|
|
262
|
+
| `await session.reconfigure(system_prompt=None, temperature=None)` | Live-update inference settings |
|
|
263
|
+
| `await session.send_direct_speech(text, include_in_history=True)` | Speak text directly via TTS, bypassing the LLM |
|
|
264
|
+
| `await session.export_chat_history(await_pending=False)` | Request a history export; result delivered via `on_chat_history` |
|
|
265
|
+
| `await session.send_conversation_query(query_id, prompt, instructions)` | Side-channel inference; at least one of `prompt`/`instructions` required; result via `on_conversation_query_result` |
|
|
266
|
+
| `await session.report_playback_position(bytes_played)` | Report audio playback position for server-side truncation |
|
|
267
|
+
|
|
268
|
+
### `DeepslateSessionListener`
|
|
269
|
+
|
|
270
|
+
Subclass this and override only the methods you need. All methods are `async` and default to no-ops.
|
|
271
|
+
|
|
272
|
+
| Method | Signature | Called when |
|
|
273
|
+
|---|---|---|
|
|
274
|
+
| `on_text_fragment` | `(text: str)` | Model streams a text token |
|
|
275
|
+
| `on_audio_chunk` | `(pcm_bytes: bytes, sample_rate: int, channels: int, transcript: str \| None)` | Model streams a TTS audio chunk |
|
|
276
|
+
| `on_tool_call` | `(call_id: str, name: str, params: dict)` | Model requests a tool invocation |
|
|
277
|
+
| `on_response_begin` | `()` | Model response starts |
|
|
278
|
+
| `on_response_end` | `()` | Model response ends |
|
|
279
|
+
| `on_user_transcription` | `(text: str, language: str \| None, turn_id: int)` | User speech transcription result arrives |
|
|
280
|
+
| `on_playback_buffer_clear` | `()` | Server cleared its audio playback buffer |
|
|
281
|
+
| `on_chat_history` | `(messages: list[ChatMessageDict])` | Chat history export received |
|
|
282
|
+
| `on_conversation_query_result` | `(query_id: str, text: str)` | Side-channel query result received |
|
|
283
|
+
| `on_error` | `(category: str, message: str, trace_id: str \| None)` | Server sent an error notification |
|
|
284
|
+
| `on_fatal_error` | `(e: Exception)` | All reconnect retries exhausted |
|
|
285
|
+
|
|
286
|
+
### `DeepslateOptions`
|
|
287
|
+
|
|
288
|
+
| Parameter | Type | Default | Description |
|
|
289
|
+
|---|---|---|---|
|
|
290
|
+
| `vendor_id` | `str` | env: `DEEPSLATE_VENDOR_ID` | Deepslate vendor ID |
|
|
291
|
+
| `organization_id` | `str` | env: `DEEPSLATE_ORGANIZATION_ID` | Deepslate organization ID |
|
|
292
|
+
| `api_key` | `str` | env: `DEEPSLATE_API_KEY` | Deepslate API key |
|
|
293
|
+
| `base_url` | `str` | `"https://app.deepslate.eu"` | Base URL for Deepslate API |
|
|
294
|
+
| `system_prompt` | `str` | `"You are a helpful assistant."` | Default system prompt |
|
|
295
|
+
| `ws_url` | `str \| None` | `None` | Direct WebSocket URL (overrides `base_url`; for local dev) |
|
|
296
|
+
| `max_retries` | `int` | `3` | Maximum reconnection attempts before giving up |
|
|
297
|
+
|
|
298
|
+
### `VadConfig`
|
|
299
|
+
|
|
300
|
+
| Parameter | Type | Default | Description |
|
|
301
|
+
|---|---|---|---|
|
|
302
|
+
| `confidence_threshold` | `float` | `0.5` | Minimum confidence to classify audio as speech (0–1) |
|
|
303
|
+
| `min_volume` | `float` | `0.01` | Minimum volume to classify audio as speech (0–1) |
|
|
304
|
+
| `start_duration_ms` | `int` | `200` | Duration of speech required to trigger start event |
|
|
305
|
+
| `stop_duration_ms` | `int` | `500` | Duration of silence required to trigger stop event |
|
|
306
|
+
| `backbuffer_duration_ms` | `int` | `1000` | Audio buffer captured before speech detection triggers |
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## Documentation
|
|
311
|
+
|
|
312
|
+
- [Deepslate Documentation](https://docs.deepslate.eu/)
|
|
313
|
+
- [API Reference](https://docs.deepslate.eu/api-reference/)
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## License
|
|
318
|
+
|
|
319
|
+
Apache License 2.0 — see [LICENSE](../../LICENSE) for details.
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# deepslate-core
|
|
2
|
+
|
|
3
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
4
|
+
[](https://docs.deepslate.eu/)
|
|
5
|
+
[](https://www.python.org/downloads/)
|
|
6
|
+
|
|
7
|
+
Shared core library for [Deepslate's](https://deepslate.eu/) realtime voice AI SDKs.
|
|
8
|
+
|
|
9
|
+
> **You probably don't need to install this directly.**
|
|
10
|
+
>
|
|
11
|
+
> Install [`deepslate-livekit`](../livekit/README.md) or [`deepslate-pipecat`](../pipecat/README.md) instead — they pull in `deepslate-core` automatically. This package is for developers building **custom Deepslate integrations** outside of LiveKit or Pipecat.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## What's Inside
|
|
16
|
+
|
|
17
|
+
`deepslate-core` provides everything needed to connect to the Deepslate Realtime API from any async Python application:
|
|
18
|
+
|
|
19
|
+
- **`DeepslateSession`** — High-level session that manages the full WebSocket lifecycle, protobuf framing, session initialization, reconnection, and callback dispatch. The primary building block for custom integrations.
|
|
20
|
+
- **`DeepslateOptions`** — API credentials and connection configuration
|
|
21
|
+
- **`VadConfig`** — Server-side Voice Activity Detection parameters
|
|
22
|
+
- **`ElevenLabsTtsConfig` / `ElevenLabsLocation`** — ElevenLabs TTS configuration
|
|
23
|
+
- **`BaseDeepslateClient`** — Low-level async WebSocket client with exponential-backoff reconnection (used internally by `DeepslateSession`)
|
|
24
|
+
- **Protobuf definitions** — Compiled `.proto` bindings for the Deepslate realtime wire protocol
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install deepslate-core
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Requirements
|
|
35
|
+
|
|
36
|
+
- Python 3.11 or higher
|
|
37
|
+
|
|
38
|
+
### Dependencies
|
|
39
|
+
|
|
40
|
+
- `aiohttp>=3.10.0` — Async HTTP and WebSocket client
|
|
41
|
+
- `protobuf>=5.26.0` — Protocol buffer serialization
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
> This section is for custom integration authors. For standard usage see the [LiveKit](../livekit/README.md) or [Pipecat](../pipecat/README.md) packages.
|
|
48
|
+
|
|
49
|
+
### Credentials
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from deepslate.core import DeepslateOptions
|
|
53
|
+
|
|
54
|
+
# Load from environment variables (recommended)
|
|
55
|
+
opts = DeepslateOptions.from_env()
|
|
56
|
+
|
|
57
|
+
# Or configure manually
|
|
58
|
+
opts = DeepslateOptions(
|
|
59
|
+
vendor_id="your_vendor_id",
|
|
60
|
+
organization_id="your_org_id",
|
|
61
|
+
api_key="your_api_key",
|
|
62
|
+
system_prompt="You are a helpful assistant.",
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Required environment variables:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
DEEPSLATE_VENDOR_ID=your_vendor_id
|
|
70
|
+
DEEPSLATE_ORGANIZATION_ID=your_organization_id
|
|
71
|
+
DEEPSLATE_API_KEY=your_api_key
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### VAD Configuration
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from deepslate.core import VadConfig
|
|
78
|
+
|
|
79
|
+
vad = VadConfig(
|
|
80
|
+
confidence_threshold=0.5, # 0.0–1.0
|
|
81
|
+
min_volume=0.01, # 0.0–1.0
|
|
82
|
+
start_duration_ms=200,
|
|
83
|
+
stop_duration_ms=500,
|
|
84
|
+
backbuffer_duration_ms=1000,
|
|
85
|
+
)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### ElevenLabs TTS Configuration
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from deepslate.core import ElevenLabsTtsConfig, ElevenLabsLocation
|
|
92
|
+
|
|
93
|
+
# Load from environment variables
|
|
94
|
+
tts = ElevenLabsTtsConfig.from_env()
|
|
95
|
+
|
|
96
|
+
# Or configure manually
|
|
97
|
+
tts = ElevenLabsTtsConfig(
|
|
98
|
+
api_key="your_elevenlabs_key",
|
|
99
|
+
voice_id="21m00Tcm4TlvDq8ikWAM",
|
|
100
|
+
model_id="eleven_turbo_v2",
|
|
101
|
+
location=ElevenLabsLocation.US, # US (default), EU, or INDIA
|
|
102
|
+
)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### `DeepslateSession`
|
|
106
|
+
|
|
107
|
+
`DeepslateSession` is the recommended entry point for custom integrations. It handles the full protocol lifecycle — session initialization, protobuf serialization, reconnection, and server-event routing — delivering events to a `DeepslateSessionListener` so your code only deals with application logic.
|
|
108
|
+
|
|
109
|
+
Subclass `DeepslateSessionListener` and override only the methods you care about, then pass the instance to `DeepslateSession.create()`. Use `create()` when your code owns the connection — it creates its own `BaseDeepslateClient` and closes it automatically when `close()` is called:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
import asyncio
|
|
113
|
+
from deepslate.core import (
|
|
114
|
+
DeepslateOptions,
|
|
115
|
+
DeepslateSession,
|
|
116
|
+
DeepslateSessionListener,
|
|
117
|
+
ElevenLabsTtsConfig,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
class MyListener(DeepslateSessionListener):
|
|
121
|
+
def __init__(self) -> None:
|
|
122
|
+
self.session: DeepslateSession | None = None
|
|
123
|
+
|
|
124
|
+
async def on_text_fragment(self, text: str) -> None:
|
|
125
|
+
print(text, end="", flush=True)
|
|
126
|
+
|
|
127
|
+
async def on_audio_chunk(
|
|
128
|
+
self, pcm_bytes: bytes, sample_rate: int, channels: int, transcript: str | None
|
|
129
|
+
) -> None:
|
|
130
|
+
# Forward PCM audio to your output device / transport
|
|
131
|
+
...
|
|
132
|
+
|
|
133
|
+
async def on_tool_call(self, call_id: str, name: str, params: dict) -> None:
|
|
134
|
+
result = await dispatch_tool(name, params)
|
|
135
|
+
if self.session:
|
|
136
|
+
await self.session.send_tool_response(call_id, result)
|
|
137
|
+
|
|
138
|
+
async def on_error(self, category: str, message: str, trace_id: str | None) -> None:
|
|
139
|
+
print(f"[{category}] {message}")
|
|
140
|
+
|
|
141
|
+
async def on_response_begin(self) -> None:
|
|
142
|
+
print("\n--- response start ---")
|
|
143
|
+
|
|
144
|
+
async def on_response_end(self) -> None:
|
|
145
|
+
print("--- response end ---\n")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
async def main():
|
|
149
|
+
opts = DeepslateOptions.from_env(
|
|
150
|
+
system_prompt="You are a helpful assistant."
|
|
151
|
+
)
|
|
152
|
+
tts = ElevenLabsTtsConfig.from_env()
|
|
153
|
+
listener = MyListener()
|
|
154
|
+
|
|
155
|
+
session = DeepslateSession.create(
|
|
156
|
+
opts,
|
|
157
|
+
tts_config=tts,
|
|
158
|
+
user_agent="MyApp/1.0",
|
|
159
|
+
listener=listener,
|
|
160
|
+
)
|
|
161
|
+
listener.session = session
|
|
162
|
+
session.start()
|
|
163
|
+
|
|
164
|
+
# Initialize for text-only interaction (audio sessions initialize automatically)
|
|
165
|
+
await session.initialize(sample_rate=24000, channels=1)
|
|
166
|
+
|
|
167
|
+
# Send a text prompt and trigger a reply
|
|
168
|
+
await session.send_text("What is the capital of France?")
|
|
169
|
+
|
|
170
|
+
await asyncio.sleep(5) # Wait for response
|
|
171
|
+
await session.close()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
asyncio.run(main())
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
#### Sending audio
|
|
178
|
+
|
|
179
|
+
Audio sessions initialize automatically on the first `send_audio()` call. The session also sends a `ReconfigureSessionRequest` transparently if the audio format changes mid-session:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
# pcm_bytes: raw signed 16-bit PCM
|
|
183
|
+
await session.send_audio(pcm_bytes, sample_rate=16000, channels=1)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
#### Updating tools at runtime
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
tools = [
|
|
190
|
+
{
|
|
191
|
+
"type": "function",
|
|
192
|
+
"function": {
|
|
193
|
+
"name": "get_weather",
|
|
194
|
+
"description": "Get the weather for a city.",
|
|
195
|
+
"parameters": {
|
|
196
|
+
"type": "object",
|
|
197
|
+
"properties": {
|
|
198
|
+
"city": {"type": "string"}
|
|
199
|
+
},
|
|
200
|
+
"required": ["city"],
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
}
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
await session.update_tools(tools)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Tool definitions are re-synced automatically after every reconnect.
|
|
210
|
+
|
|
211
|
+
#### Reconnection
|
|
212
|
+
|
|
213
|
+
`start()` drives `BaseDeepslateClient.run_with_retry()` internally. On a dropped connection the session resets its state, re-sends `InitializeSessionRequest`, re-syncs tool definitions, and resumes — all without any action required from your code. Call `close()` to stop the retry loop permanently.
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## API Reference
|
|
218
|
+
|
|
219
|
+
### `DeepslateSession.create()`
|
|
220
|
+
|
|
221
|
+
Factory that creates a session together with its own `BaseDeepslateClient`. The session owns the client and closes it when `close()` is called.
|
|
222
|
+
|
|
223
|
+
| Parameter | Type | Default | Description |
|
|
224
|
+
|---|---|---|---|
|
|
225
|
+
| `options` | `DeepslateOptions` | required | API credentials and settings |
|
|
226
|
+
| `vad_config` | `VadConfig \| None` | `None` | VAD settings (uses defaults if omitted) |
|
|
227
|
+
| `tts_config` | `ElevenLabsTtsConfig \| None` | `None` | Enables server-side TTS audio output |
|
|
228
|
+
| `user_agent` | `str` | `"DeepslateCore"` | HTTP `User-Agent` header sent on connect |
|
|
229
|
+
| `http_session` | `aiohttp.ClientSession \| None` | `None` | Shared aiohttp session (not closed by the session) |
|
|
230
|
+
| `listener` | `DeepslateSessionListener \| None` | `None` | Receives all session events; defaults to a no-op base instance |
|
|
231
|
+
|
|
232
|
+
### `DeepslateSession` — send methods
|
|
233
|
+
|
|
234
|
+
| Method | Description |
|
|
235
|
+
|---|---|
|
|
236
|
+
| `await session.send_audio(pcm, sr, ch)` | Send PCM audio; auto-initializes and auto-reconfigures on format change |
|
|
237
|
+
| `await session.send_text(text, trigger=IMMEDIATE)` | Send a text `UserInput` |
|
|
238
|
+
| `await session.initialize(sr=24000, ch=1)` | Explicitly initialize (for text-only sessions) |
|
|
239
|
+
| `await session.trigger_inference(instructions=None)` | Manually trigger a model reply |
|
|
240
|
+
| `await session.send_tool_response(call_id, result)` | Return a tool result to the server |
|
|
241
|
+
| `await session.update_tools(tools)` | Sync tool definitions (persisted across reconnects) |
|
|
242
|
+
| `await session.reconfigure(system_prompt=None, temperature=None)` | Live-update inference settings |
|
|
243
|
+
| `await session.send_direct_speech(text, include_in_history=True)` | Speak text directly via TTS, bypassing the LLM |
|
|
244
|
+
| `await session.export_chat_history(await_pending=False)` | Request a history export; result delivered via `on_chat_history` |
|
|
245
|
+
| `await session.send_conversation_query(query_id, prompt, instructions)` | Side-channel inference; at least one of `prompt`/`instructions` required; result via `on_conversation_query_result` |
|
|
246
|
+
| `await session.report_playback_position(bytes_played)` | Report audio playback position for server-side truncation |
|
|
247
|
+
|
|
248
|
+
### `DeepslateSessionListener`
|
|
249
|
+
|
|
250
|
+
Subclass this and override only the methods you need. All methods are `async` and default to no-ops.
|
|
251
|
+
|
|
252
|
+
| Method | Signature | Called when |
|
|
253
|
+
|---|---|---|
|
|
254
|
+
| `on_text_fragment` | `(text: str)` | Model streams a text token |
|
|
255
|
+
| `on_audio_chunk` | `(pcm_bytes: bytes, sample_rate: int, channels: int, transcript: str \| None)` | Model streams a TTS audio chunk |
|
|
256
|
+
| `on_tool_call` | `(call_id: str, name: str, params: dict)` | Model requests a tool invocation |
|
|
257
|
+
| `on_response_begin` | `()` | Model response starts |
|
|
258
|
+
| `on_response_end` | `()` | Model response ends |
|
|
259
|
+
| `on_user_transcription` | `(text: str, language: str \| None, turn_id: int)` | User speech transcription result arrives |
|
|
260
|
+
| `on_playback_buffer_clear` | `()` | Server cleared its audio playback buffer |
|
|
261
|
+
| `on_chat_history` | `(messages: list[ChatMessageDict])` | Chat history export received |
|
|
262
|
+
| `on_conversation_query_result` | `(query_id: str, text: str)` | Side-channel query result received |
|
|
263
|
+
| `on_error` | `(category: str, message: str, trace_id: str \| None)` | Server sent an error notification |
|
|
264
|
+
| `on_fatal_error` | `(e: Exception)` | All reconnect retries exhausted |
|
|
265
|
+
|
|
266
|
+
### `DeepslateOptions`
|
|
267
|
+
|
|
268
|
+
| Parameter | Type | Default | Description |
|
|
269
|
+
|---|---|---|---|
|
|
270
|
+
| `vendor_id` | `str` | env: `DEEPSLATE_VENDOR_ID` | Deepslate vendor ID |
|
|
271
|
+
| `organization_id` | `str` | env: `DEEPSLATE_ORGANIZATION_ID` | Deepslate organization ID |
|
|
272
|
+
| `api_key` | `str` | env: `DEEPSLATE_API_KEY` | Deepslate API key |
|
|
273
|
+
| `base_url` | `str` | `"https://app.deepslate.eu"` | Base URL for Deepslate API |
|
|
274
|
+
| `system_prompt` | `str` | `"You are a helpful assistant."` | Default system prompt |
|
|
275
|
+
| `ws_url` | `str \| None` | `None` | Direct WebSocket URL (overrides `base_url`; for local dev) |
|
|
276
|
+
| `max_retries` | `int` | `3` | Maximum reconnection attempts before giving up |
|
|
277
|
+
|
|
278
|
+
### `VadConfig`
|
|
279
|
+
|
|
280
|
+
| Parameter | Type | Default | Description |
|
|
281
|
+
|---|---|---|---|
|
|
282
|
+
| `confidence_threshold` | `float` | `0.5` | Minimum confidence to classify audio as speech (0–1) |
|
|
283
|
+
| `min_volume` | `float` | `0.01` | Minimum volume to classify audio as speech (0–1) |
|
|
284
|
+
| `start_duration_ms` | `int` | `200` | Duration of speech required to trigger start event |
|
|
285
|
+
| `stop_duration_ms` | `int` | `500` | Duration of silence required to trigger stop event |
|
|
286
|
+
| `backbuffer_duration_ms` | `int` | `1000` | Audio buffer captured before speech detection triggers |
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
## Documentation
|
|
291
|
+
|
|
292
|
+
- [Deepslate Documentation](https://docs.deepslate.eu/)
|
|
293
|
+
- [API Reference](https://docs.deepslate.eu/api-reference/)
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
## License
|
|
298
|
+
|
|
299
|
+
Apache License 2.0 — see [LICENSE](../../LICENSE) for details.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "deepslate-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Shared core library for Deepslate SDK integrations"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"aiohttp>=3.10.0",
|
|
13
|
+
"protobuf>=5.26.0",
|
|
14
|
+
]
|
|
15
|
+
keywords = ["voice", "ai", "realtime", "audio", "deepslate"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: Apache Software License",
|
|
19
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Documentation = "https://docs.deepslate.eu/"
|
|
28
|
+
Website = "https://deepslate.eu/"
|
|
29
|
+
Source = "https://github.com/deepslate-labs/deepslate-sdks"
|
|
30
|
+
|
|
31
|
+
[tool.hatch.build.targets.wheel]
|
|
32
|
+
packages = ["src/deepslate/core"]
|