chub-dev 0.1.0 → 0.1.2-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/bin/chub-mcp +2 -0
- package/dist/airtable/docs/database/javascript/DOC.md +1437 -0
- package/dist/airtable/docs/database/python/DOC.md +1735 -0
- package/dist/amplitude/docs/analytics/javascript/DOC.md +1282 -0
- package/dist/amplitude/docs/analytics/python/DOC.md +1199 -0
- package/dist/anthropic/docs/claude-api/javascript/DOC.md +503 -0
- package/dist/anthropic/docs/claude-api/python/DOC.md +389 -0
- package/dist/asana/docs/tasks/DOC.md +1396 -0
- package/dist/assemblyai/docs/transcription/DOC.md +1043 -0
- package/dist/atlassian/docs/confluence/javascript/DOC.md +1347 -0
- package/dist/atlassian/docs/confluence/python/DOC.md +1604 -0
- package/dist/auth0/docs/identity/javascript/DOC.md +968 -0
- package/dist/auth0/docs/identity/python/DOC.md +1199 -0
- package/dist/aws/docs/s3/javascript/DOC.md +1773 -0
- package/dist/aws/docs/s3/python/DOC.md +1807 -0
- package/dist/binance/docs/trading/javascript/DOC.md +1315 -0
- package/dist/binance/docs/trading/python/DOC.md +1454 -0
- package/dist/braintree/docs/gateway/javascript/DOC.md +1278 -0
- package/dist/braintree/docs/gateway/python/DOC.md +1179 -0
- package/dist/chromadb/docs/embeddings-db/javascript/DOC.md +1263 -0
- package/dist/chromadb/docs/embeddings-db/python/DOC.md +1707 -0
- package/dist/clerk/docs/auth/javascript/DOC.md +1220 -0
- package/dist/clerk/docs/auth/python/DOC.md +274 -0
- package/dist/cloudflare/docs/workers/javascript/DOC.md +918 -0
- package/dist/cloudflare/docs/workers/python/DOC.md +994 -0
- package/dist/cockroachdb/docs/distributed-db/DOC.md +1500 -0
- package/dist/cohere/docs/llm/DOC.md +1335 -0
- package/dist/datadog/docs/monitoring/javascript/DOC.md +1740 -0
- package/dist/datadog/docs/monitoring/python/DOC.md +1815 -0
- package/dist/deepgram/docs/speech/javascript/DOC.md +885 -0
- package/dist/deepgram/docs/speech/python/DOC.md +685 -0
- package/dist/deepl/docs/translation/javascript/DOC.md +887 -0
- package/dist/deepl/docs/translation/python/DOC.md +944 -0
- package/dist/deepseek/docs/llm/DOC.md +1220 -0
- package/dist/directus/docs/headless-cms/javascript/DOC.md +1128 -0
- package/dist/directus/docs/headless-cms/python/DOC.md +1276 -0
- package/dist/discord/docs/bot/javascript/DOC.md +1090 -0
- package/dist/discord/docs/bot/python/DOC.md +1130 -0
- package/dist/elasticsearch/docs/search/DOC.md +1634 -0
- package/dist/elevenlabs/docs/text-to-speech/javascript/DOC.md +336 -0
- package/dist/elevenlabs/docs/text-to-speech/python/DOC.md +552 -0
- package/dist/firebase/docs/auth/DOC.md +1015 -0
- package/dist/gemini/docs/genai/javascript/DOC.md +691 -0
- package/dist/gemini/docs/genai/python/DOC.md +555 -0
- package/dist/github/docs/octokit/DOC.md +1560 -0
- package/dist/google/docs/bigquery/javascript/DOC.md +1688 -0
- package/dist/google/docs/bigquery/python/DOC.md +1503 -0
- package/dist/hubspot/docs/crm/javascript/DOC.md +1805 -0
- package/dist/hubspot/docs/crm/python/DOC.md +2033 -0
- package/dist/huggingface/docs/transformers/DOC.md +948 -0
- package/dist/intercom/docs/messaging/javascript/DOC.md +1844 -0
- package/dist/intercom/docs/messaging/python/DOC.md +1797 -0
- package/dist/jira/docs/issues/javascript/DOC.md +1420 -0
- package/dist/jira/docs/issues/python/DOC.md +1492 -0
- package/dist/kafka/docs/streaming/javascript/DOC.md +1671 -0
- package/dist/kafka/docs/streaming/python/DOC.md +1464 -0
- package/dist/landingai-ade/docs/api/DOC.md +620 -0
- package/dist/landingai-ade/docs/sdk/python/DOC.md +489 -0
- package/dist/landingai-ade/docs/sdk/typescript/DOC.md +542 -0
- package/dist/landingai-ade/skills/SKILL.md +489 -0
- package/dist/launchdarkly/docs/feature-flags/javascript/DOC.md +1191 -0
- package/dist/launchdarkly/docs/feature-flags/python/DOC.md +1671 -0
- package/dist/linear/docs/tracker/DOC.md +1554 -0
- package/dist/livekit/docs/realtime/javascript/DOC.md +303 -0
- package/dist/livekit/docs/realtime/python/DOC.md +163 -0
- package/dist/mailchimp/docs/marketing/DOC.md +1420 -0
- package/dist/meilisearch/docs/search/DOC.md +1241 -0
- package/dist/microsoft/docs/onedrive/javascript/DOC.md +1421 -0
- package/dist/microsoft/docs/onedrive/python/DOC.md +1549 -0
- package/dist/mongodb/docs/atlas/DOC.md +2041 -0
- package/dist/notion/docs/workspace-api/javascript/DOC.md +1435 -0
- package/dist/notion/docs/workspace-api/python/DOC.md +1400 -0
- package/dist/okta/docs/identity/javascript/DOC.md +1171 -0
- package/dist/okta/docs/identity/python/DOC.md +1401 -0
- package/dist/openai/docs/chat/javascript/DOC.md +407 -0
- package/dist/openai/docs/chat/python/DOC.md +568 -0
- package/dist/paypal/docs/checkout/DOC.md +278 -0
- package/dist/pinecone/docs/sdk/javascript/DOC.md +984 -0
- package/dist/pinecone/docs/sdk/python/DOC.md +1395 -0
- package/dist/plaid/docs/banking/javascript/DOC.md +1163 -0
- package/dist/plaid/docs/banking/python/DOC.md +1203 -0
- package/dist/playwright-community/skills/login-flows/SKILL.md +108 -0
- package/dist/postmark/docs/transactional-email/DOC.md +1168 -0
- package/dist/prisma/docs/orm/javascript/DOC.md +1419 -0
- package/dist/prisma/docs/orm/python/DOC.md +1317 -0
- package/dist/qdrant/docs/vector-search/javascript/DOC.md +1221 -0
- package/dist/qdrant/docs/vector-search/python/DOC.md +1653 -0
- package/dist/rabbitmq/docs/message-queue/javascript/DOC.md +1193 -0
- package/dist/rabbitmq/docs/message-queue/python/DOC.md +1243 -0
- package/dist/razorpay/docs/payments/javascript/DOC.md +1219 -0
- package/dist/razorpay/docs/payments/python/DOC.md +1330 -0
- package/dist/redis/docs/key-value/javascript/DOC.md +1851 -0
- package/dist/redis/docs/key-value/python/DOC.md +2054 -0
- package/dist/registry.json +2817 -0
- package/dist/replicate/docs/model-hosting/DOC.md +1318 -0
- package/dist/resend/docs/email/DOC.md +1271 -0
- package/dist/salesforce/docs/crm/javascript/DOC.md +1241 -0
- package/dist/salesforce/docs/crm/python/DOC.md +1183 -0
- package/dist/search-index.json +1 -0
- package/dist/sendgrid/docs/email-api/javascript/DOC.md +371 -0
- package/dist/sendgrid/docs/email-api/python/DOC.md +656 -0
- package/dist/sentry/docs/error-tracking/javascript/DOC.md +1073 -0
- package/dist/sentry/docs/error-tracking/python/DOC.md +1309 -0
- package/dist/shopify/docs/storefront/DOC.md +457 -0
- package/dist/slack/docs/workspace/javascript/DOC.md +933 -0
- package/dist/slack/docs/workspace/python/DOC.md +271 -0
- package/dist/square/docs/payments/javascript/DOC.md +1855 -0
- package/dist/square/docs/payments/python/DOC.md +1728 -0
- package/dist/stripe/docs/api/DOC.md +1727 -0
- package/dist/stripe/docs/payments/DOC.md +1726 -0
- package/dist/stytch/docs/auth/javascript/DOC.md +1813 -0
- package/dist/stytch/docs/auth/python/DOC.md +1962 -0
- package/dist/supabase/docs/client/DOC.md +1606 -0
- package/dist/twilio/docs/messaging/python/DOC.md +469 -0
- package/dist/twilio/docs/messaging/typescript/DOC.md +946 -0
- package/dist/vercel/docs/platform/DOC.md +1940 -0
- package/dist/weaviate/docs/vector-db/javascript/DOC.md +1268 -0
- package/dist/weaviate/docs/vector-db/python/DOC.md +1388 -0
- package/dist/zendesk/docs/support/javascript/DOC.md +2150 -0
- package/dist/zendesk/docs/support/python/DOC.md +2297 -0
- package/package.json +22 -6
- package/skills/get-api-docs/SKILL.md +84 -0
- package/src/commands/annotate.js +83 -0
- package/src/commands/build.js +12 -1
- package/src/commands/feedback.js +150 -0
- package/src/commands/get.js +83 -42
- package/src/commands/search.js +7 -0
- package/src/index.js +43 -17
- package/src/lib/analytics.js +90 -0
- package/src/lib/annotations.js +57 -0
- package/src/lib/bm25.js +170 -0
- package/src/lib/cache.js +69 -6
- package/src/lib/config.js +8 -3
- package/src/lib/identity.js +99 -0
- package/src/lib/registry.js +103 -20
- package/src/lib/telemetry.js +86 -0
- package/src/mcp/server.js +177 -0
- package/src/mcp/tools.js +251 -0
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: speech
|
|
3
|
+
description: "Deepgram Python SDK coding guidelines for speech recognition, text-to-speech, and audio intelligence"
|
|
4
|
+
metadata:
|
|
5
|
+
languages: "python"
|
|
6
|
+
versions: "5.1.0"
|
|
7
|
+
updated-on: "2026-03-02"
|
|
8
|
+
source: maintainer
|
|
9
|
+
tags: "deepgram,speech,transcription,tts,audio"
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Deepgram Python SDK Coding Guidelines
|
|
13
|
+
|
|
14
|
+
You are a Deepgram Python SDK coding expert. Help me with writing code using the Deepgram API calling the official Python SDK.
|
|
15
|
+
|
|
16
|
+
Please follow the following guidelines when generating code.
|
|
17
|
+
|
|
18
|
+
You can find the official SDK documentation and code samples here:
|
|
19
|
+
https://developers.deepgram.com/docs
|
|
20
|
+
|
|
21
|
+
## Golden Rule: Use the Correct and Current SDK
|
|
22
|
+
|
|
23
|
+
Always use the official Deepgram Python SDK for all Deepgram API interactions. <cite/>
|
|
24
|
+
|
|
25
|
+
- **Library Name:** Deepgram Python SDK
|
|
26
|
+
- **Python Package:** `deepgram-sdk`
|
|
27
|
+
- **Repository:** https://github.com/deepgram/deepgram-python-sdk
|
|
28
|
+
|
|
29
|
+
**Installation:**
|
|
30
|
+
|
|
31
|
+
- **Correct:** `pip install deepgram-sdk`
|
|
32
|
+
|
|
33
|
+
**APIs and Usage:**
|
|
34
|
+
|
|
35
|
+
- **Correct:** `from deepgram import DeepgramClient`
|
|
36
|
+
- **Correct:** `from deepgram import SpeakOptions, PrerecordedOptions, LiveOptions`
|
|
37
|
+
- **Correct:** `from deepgram import ReadOptions, SettingsOptions`
|
|
38
|
+
|
|
39
|
+
## Initialization and API Key
|
|
40
|
+
|
|
41
|
+
The `deepgram-sdk` library requires creating a client object for all API calls. <cite/>
|
|
42
|
+
|
|
43
|
+
- Always use `client = DeepgramClient()` to create a client object
|
|
44
|
+
- Set `DEEPGRAM_API_KEY` environment variable, which will be picked up automatically
|
|
45
|
+
- Alternatively, pass the API key directly: `client = DeepgramClient(api_key="YOUR_API_KEY")`
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from deepgram import DeepgramClient
|
|
49
|
+
|
|
50
|
+
# Using environment variable DEEPGRAM_API_KEY
|
|
51
|
+
client = DeepgramClient()
|
|
52
|
+
|
|
53
|
+
# Or direct API key
|
|
54
|
+
client = DeepgramClient(api_key="YOUR_API_KEY")
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Authentication Methods
|
|
58
|
+
|
|
59
|
+
The Deepgram Python SDK supports multiple authentication methods:
|
|
60
|
+
|
|
61
|
+
### API Key Authentication (Traditional)
|
|
62
|
+
```python
|
|
63
|
+
from deepgram import DeepgramClient
|
|
64
|
+
|
|
65
|
+
# Direct API key
|
|
66
|
+
client = DeepgramClient(api_key="YOUR_API_KEY")
|
|
67
|
+
|
|
68
|
+
# Or using environment variable DEEPGRAM_API_KEY
|
|
69
|
+
client = DeepgramClient() # Auto-detects from environment
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Bearer Token Authentication (OAuth 2.0)
|
|
73
|
+
```python
|
|
74
|
+
from deepgram import DeepgramClient
|
|
75
|
+
|
|
76
|
+
# Direct access token
|
|
77
|
+
client = DeepgramClient(access_token="YOUR_ACCESS_TOKEN")
|
|
78
|
+
|
|
79
|
+
# Or using environment variable DEEPGRAM_ACCESS_TOKEN
|
|
80
|
+
client = DeepgramClient() # Auto-detects from environment
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Models
|
|
84
|
+
|
|
85
|
+
By default, use the following models when using the Deepgram SDK: <cite/>
|
|
86
|
+
|
|
87
|
+
- **Speech-to-Text Tasks:** `nova-3` (latest general model)
|
|
88
|
+
- **Text-to-Speech Tasks:** `aura-2-thalia-en` (default TTS model)
|
|
89
|
+
- **Text Intelligence Tasks:** `nova-3`
|
|
90
|
+
|
|
91
|
+
## Speech-to-Text (Transcription)
|
|
92
|
+
|
|
93
|
+
### Pre-Recorded Audio (Synchronous)
|
|
94
|
+
|
|
95
|
+
For transcribing pre-recorded audio files:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from deepgram import DeepgramClient, PrerecordedOptions, FileSource
|
|
99
|
+
|
|
100
|
+
# Create client
|
|
101
|
+
client = DeepgramClient()
|
|
102
|
+
|
|
103
|
+
# Configure options
|
|
104
|
+
options = PrerecordedOptions(
|
|
105
|
+
model="nova-3",
|
|
106
|
+
smart_format=True,
|
|
107
|
+
language="en"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# From URL
|
|
111
|
+
url_source = {"url": "https://example.com/audio.wav"}
|
|
112
|
+
response = client.listen.rest.v("1").transcribe_url(url_source, options)
|
|
113
|
+
|
|
114
|
+
# From local file
|
|
115
|
+
with open("audio.wav", "rb") as file:
|
|
116
|
+
buffer_data = file.read()
|
|
117
|
+
|
|
118
|
+
payload = {"buffer": buffer_data}
|
|
119
|
+
response = client.listen.rest.v("1").transcribe_file(payload, options)
|
|
120
|
+
|
|
121
|
+
print(response.results.channels[0].alternatives[0].transcript)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Pre-Recorded Audio (Asynchronous)
|
|
125
|
+
|
|
126
|
+
For asynchronous transcription with callbacks:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import asyncio
|
|
130
|
+
from deepgram import DeepgramClient, PrerecordedOptions
|
|
131
|
+
|
|
132
|
+
async def main():
|
|
133
|
+
client = DeepgramClient()
|
|
134
|
+
|
|
135
|
+
options = PrerecordedOptions(
|
|
136
|
+
model="nova-3",
|
|
137
|
+
smart_format=True
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
url_source = {"url": "https://example.com/audio.wav"}
|
|
141
|
+
response = await client.listen.asyncrest.v("1").transcribe_url(url_source, options)
|
|
142
|
+
|
|
143
|
+
print(response.results.channels[0].alternatives[0].transcript)
|
|
144
|
+
|
|
145
|
+
asyncio.run(main())
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Streaming Audio (Real-time)
|
|
149
|
+
|
|
150
|
+
For real-time audio transcription:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from deepgram import DeepgramClient, LiveTranscriptionEvents, LiveOptions
|
|
154
|
+
|
|
155
|
+
# Create client
|
|
156
|
+
client = DeepgramClient()
|
|
157
|
+
|
|
158
|
+
# Create connection
|
|
159
|
+
dg_connection = client.listen.websocket.v("1")
|
|
160
|
+
|
|
161
|
+
def on_message(self, result, **kwargs):
|
|
162
|
+
sentence = result.channel.alternatives[0].transcript
|
|
163
|
+
if len(sentence) == 0:
|
|
164
|
+
return
|
|
165
|
+
print(f"Transcript: {sentence}")
|
|
166
|
+
|
|
167
|
+
def on_error(self, error, **kwargs):
|
|
168
|
+
print(f"Error: {error}")
|
|
169
|
+
|
|
170
|
+
# Register event handlers
|
|
171
|
+
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
|
|
172
|
+
dg_connection.on(LiveTranscriptionEvents.Error, on_error)
|
|
173
|
+
|
|
174
|
+
# Configure options
|
|
175
|
+
options = LiveOptions(
|
|
176
|
+
model="nova-3",
|
|
177
|
+
language="en",
|
|
178
|
+
smart_format=True
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Start connection
|
|
182
|
+
if dg_connection.start(options) is False:
|
|
183
|
+
print("Failed to start connection")
|
|
184
|
+
exit()
|
|
185
|
+
|
|
186
|
+
# Send audio data (example with microphone)
|
|
187
|
+
# dg_connection.send(audio_data)
|
|
188
|
+
|
|
189
|
+
# Close connection when done
|
|
190
|
+
dg_connection.finish()
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Text-to-Speech
|
|
194
|
+
|
|
195
|
+
### REST API (Batch Conversion)
|
|
196
|
+
|
|
197
|
+
For converting text to speech in batch mode:
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
from deepgram import DeepgramClient, SpeakOptions
|
|
201
|
+
|
|
202
|
+
# Create client
|
|
203
|
+
client = DeepgramClient()
|
|
204
|
+
|
|
205
|
+
# Configure TTS options
|
|
206
|
+
options = SpeakOptions(
|
|
207
|
+
model="aura-2-thalia-en",
|
|
208
|
+
encoding="linear16",
|
|
209
|
+
sample_rate=24000
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Input text
|
|
213
|
+
input_text = {"text": "Hello, world."}
|
|
214
|
+
|
|
215
|
+
# Convert text to speech and store in memory
|
|
216
|
+
response = client.speak.rest.v("1").stream_memory(input_text, options)
|
|
217
|
+
|
|
218
|
+
# Access the audio data
|
|
219
|
+
audio_data = response.stream_memory.getbuffer()
|
|
220
|
+
|
|
221
|
+
# Save to a file
|
|
222
|
+
with open("output.wav", "wb") as file:
|
|
223
|
+
file.write(audio_data)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### REST API (Save to File)
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from deepgram import DeepgramClient, SpeakOptions
|
|
230
|
+
|
|
231
|
+
client = DeepgramClient()
|
|
232
|
+
|
|
233
|
+
options = SpeakOptions(
|
|
234
|
+
model="aura-2-thalia-en",
|
|
235
|
+
encoding="linear16",
|
|
236
|
+
sample_rate=24000
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
input_text = {"text": "Hello, world."}
|
|
240
|
+
|
|
241
|
+
# Convert text to speech and save directly to file
|
|
242
|
+
response = client.speak.rest.v("1").save("output.wav", input_text, options)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Asynchronous Text-to-Speech
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
import asyncio
|
|
249
|
+
from deepgram import DeepgramClient, SpeakOptions
|
|
250
|
+
|
|
251
|
+
async def main():
|
|
252
|
+
client = DeepgramClient()
|
|
253
|
+
|
|
254
|
+
options = SpeakOptions(
|
|
255
|
+
model="aura-2-thalia-en",
|
|
256
|
+
encoding="linear16",
|
|
257
|
+
sample_rate=24000
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
input_text = {"text": "Hello, world."}
|
|
261
|
+
|
|
262
|
+
response = await client.speak.asyncrest.v("1").stream_memory(input_text, options)
|
|
263
|
+
|
|
264
|
+
audio_data = response.stream_memory.getbuffer()
|
|
265
|
+
|
|
266
|
+
with open("output.wav", "wb") as file:
|
|
267
|
+
file.write(audio_data)
|
|
268
|
+
|
|
269
|
+
asyncio.run(main())
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### WebSocket API (Streaming TTS)
|
|
273
|
+
|
|
274
|
+
For real-time streaming text-to-speech:
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
from deepgram import DeepgramClient, SpeakWebSocketEvents, SpeakWSOptions
|
|
278
|
+
|
|
279
|
+
# Create client
|
|
280
|
+
client = DeepgramClient()
|
|
281
|
+
|
|
282
|
+
# Create websocket connection
|
|
283
|
+
dg_connection = client.speak.websocket.v("1")
|
|
284
|
+
|
|
285
|
+
def on_open(self, open, **kwargs):
|
|
286
|
+
print(f"Connection opened: {open}")
|
|
287
|
+
|
|
288
|
+
def on_binary_data(self, data, **kwargs):
|
|
289
|
+
print("Received audio data")
|
|
290
|
+
# Process audio data here
|
|
291
|
+
with open("output.wav", "ab") as f:
|
|
292
|
+
f.write(data)
|
|
293
|
+
|
|
294
|
+
def on_close(self, close, **kwargs):
|
|
295
|
+
print(f"Connection closed: {close}")
|
|
296
|
+
|
|
297
|
+
# Register event handlers
|
|
298
|
+
dg_connection.on(SpeakWebSocketEvents.Open, on_open)
|
|
299
|
+
dg_connection.on(SpeakWebSocketEvents.AudioData, on_binary_data)
|
|
300
|
+
dg_connection.on(SpeakWebSocketEvents.Close, on_close)
|
|
301
|
+
|
|
302
|
+
# Configure TTS options
|
|
303
|
+
options = SpeakWSOptions(
|
|
304
|
+
model="aura-2-thalia-en",
|
|
305
|
+
encoding="linear16",
|
|
306
|
+
sample_rate=16000
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Start the connection
|
|
310
|
+
if dg_connection.start(options) is False:
|
|
311
|
+
print("Failed to start connection")
|
|
312
|
+
exit()
|
|
313
|
+
|
|
314
|
+
# Send text to be converted to speech
|
|
315
|
+
dg_connection.send_text("Hello, this is a text to speech example using Deepgram.")
|
|
316
|
+
|
|
317
|
+
# Flush the connection
|
|
318
|
+
dg_connection.flush()
|
|
319
|
+
|
|
320
|
+
# Wait for processing to complete
|
|
321
|
+
dg_connection.wait_for_complete()
|
|
322
|
+
|
|
323
|
+
# Close the connection
|
|
324
|
+
dg_connection.finish()
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
## Text Intelligence
|
|
328
|
+
|
|
329
|
+
Analyze text for insights and intelligence:
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
from deepgram import DeepgramClient, ReadOptions
|
|
333
|
+
|
|
334
|
+
client = DeepgramClient()
|
|
335
|
+
|
|
336
|
+
# Configure read options
|
|
337
|
+
options = ReadOptions(
|
|
338
|
+
model="nova-3",
|
|
339
|
+
language="en"
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Process text for intelligence
|
|
343
|
+
response = client.read.rest.v("1").process(
|
|
344
|
+
text="The quick brown fox jumps over the lazy dog.",
|
|
345
|
+
options=options
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
print(response.results)
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
## Voice Agent
|
|
352
|
+
|
|
353
|
+
Configure a Voice Agent for conversational AI:
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
from deepgram import DeepgramClient, SettingsOptions
|
|
357
|
+
|
|
358
|
+
client = DeepgramClient()
|
|
359
|
+
|
|
360
|
+
# Create websocket connection
|
|
361
|
+
connection = client.agent.websocket.v("1")
|
|
362
|
+
|
|
363
|
+
# Configure agent settings
|
|
364
|
+
options = SettingsOptions()
|
|
365
|
+
options.language = "en"
|
|
366
|
+
options.agent.think.provider.type = "open_ai"
|
|
367
|
+
options.agent.think.provider.model = "gpt-4o-mini"
|
|
368
|
+
options.agent.think.prompt = "You are a helpful AI assistant."
|
|
369
|
+
options.agent.listen.provider.type = "deepgram"
|
|
370
|
+
options.agent.listen.provider.model = "nova-3"
|
|
371
|
+
options.agent.speak.provider.type = "deepgram"
|
|
372
|
+
options.agent.speak.provider.model = "aura-2-thalia-en"
|
|
373
|
+
|
|
374
|
+
options.greeting = "Hello, I'm your AI assistant."
|
|
375
|
+
|
|
376
|
+
# Start the connection
|
|
377
|
+
connection.start(options)
|
|
378
|
+
|
|
379
|
+
# Close the connection
|
|
380
|
+
connection.finish()
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
## Captions Generation
|
|
384
|
+
|
|
385
|
+
Convert transcription results to captions:
|
|
386
|
+
|
|
387
|
+
### WebVTT
|
|
388
|
+
```python
|
|
389
|
+
from deepgram_captions import DeepgramConverter, webvtt
|
|
390
|
+
|
|
391
|
+
transcription = DeepgramConverter(dg_response)
|
|
392
|
+
captions = webvtt(transcription)
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
### SRT
|
|
396
|
+
```python
|
|
397
|
+
from deepgram_captions import DeepgramConverter, srt
|
|
398
|
+
|
|
399
|
+
transcription = DeepgramConverter(dg_response)
|
|
400
|
+
captions = srt(transcription)
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
## Error Handling
|
|
404
|
+
|
|
405
|
+
Always implement proper error handling:
|
|
406
|
+
|
|
407
|
+
```python
|
|
408
|
+
from deepgram import DeepgramClient
|
|
409
|
+
from deepgram.errors import DeepgramError
|
|
410
|
+
|
|
411
|
+
try:
|
|
412
|
+
client = DeepgramClient()
|
|
413
|
+
response = client.listen.rest.v("1").transcribe_url(url_source, options)
|
|
414
|
+
print(response.results.channels[0].alternatives[0].transcript)
|
|
415
|
+
except DeepgramError as e:
|
|
416
|
+
print(f"Deepgram Error: {e}")
|
|
417
|
+
except Exception as e:
|
|
418
|
+
print(f"Unexpected error: {e}")
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
## Advanced Configuration
|
|
422
|
+
|
|
423
|
+
### Client Options
|
|
424
|
+
|
|
425
|
+
Configure advanced client settings:
|
|
426
|
+
|
|
427
|
+
```python
|
|
428
|
+
from deepgram import DeepgramClient, DeepgramClientOptions
|
|
429
|
+
from deepgram.utils import verboselogs
|
|
430
|
+
|
|
431
|
+
config = DeepgramClientOptions(
|
|
432
|
+
options={
|
|
433
|
+
"auto_flush_speak_delta": "500", # Auto-flush after 500ms
|
|
434
|
+
"speaker_playback": "true" # Auto-play audio
|
|
435
|
+
},
|
|
436
|
+
verbose=verboselogs.DEBUG
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
client = DeepgramClient("", config)
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
### Integration Example
|
|
443
|
+
|
|
444
|
+
Combine Speech-to-Text and Text-to-Speech:
|
|
445
|
+
|
|
446
|
+
```python
|
|
447
|
+
from deepgram import DeepgramClient, SpeakOptions, PrerecordedOptions
|
|
448
|
+
|
|
449
|
+
client = DeepgramClient()
|
|
450
|
+
|
|
451
|
+
# Text to convert to speech
|
|
452
|
+
text = "Hello, world."
|
|
453
|
+
|
|
454
|
+
# Configure TTS options
|
|
455
|
+
tts_options = SpeakOptions(
|
|
456
|
+
model="aura-2-thalia-en",
|
|
457
|
+
encoding="linear16",
|
|
458
|
+
sample_rate=24000
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Convert text to speech
|
|
462
|
+
tts_response = client.speak.rest.v("1").stream_memory({"text": text}, tts_options)
|
|
463
|
+
|
|
464
|
+
# Save the audio to a buffer
|
|
465
|
+
audio_buffer = tts_response.stream_memory.getbuffer()
|
|
466
|
+
|
|
467
|
+
# Configure STT options
|
|
468
|
+
stt_options = PrerecordedOptions(
|
|
469
|
+
model="nova-3",
|
|
470
|
+
smart_format=True
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# Transcribe the generated audio
|
|
474
|
+
stt_response = client.listen.rest.v("1").transcribe_file(
|
|
475
|
+
{"buffer": audio_buffer},
|
|
476
|
+
stt_options
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
# Verify the transcription matches the original text
|
|
480
|
+
transcript = stt_response.results.channels[0].alternatives[0].transcript
|
|
481
|
+
print(f"Original: {text}")
|
|
482
|
+
print(f"Transcribed: {transcript}")
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
## Requirements
|
|
486
|
+
|
|
487
|
+
- Python 3.10 or higher
|
|
488
|
+
- `deepgram-sdk` package
|
|
489
|
+
|
|
490
|
+
## Useful Links
|
|
491
|
+
|
|
492
|
+
- Documentation: https://developers.deepgram.com/docs
|
|
493
|
+
- Python SDK Repository: https://github.com/deepgram/deepgram-python-sdk
|
|
494
|
+
- API Reference: https://developers.deepgram.com/reference
|
|
495
|
+
- Discord Community: https://discord.gg/xWRaCDBtW4
|
|
496
|
+
|
|
497
|
+
## Notes
|
|
498
|
+
|
|
499
|
+
This SDK provides comprehensive support for all Deepgram APIs including Speech-to-Text (both pre-recorded and streaming), Text-to-Speech (both REST and WebSocket), Text Intelligence, and Voice Agent functionality. <cite/> The SDK follows semantic versioning and maintains backward compatibility within major versions. For development and testing, the repository includes both daily tests (against real API endpoints) and unit tests (against mock endpoints).
|
|
500
|
+
|
|
501
|
+
Wiki pages you might want to explore:
|
|
502
|
+
- [Text-to-Speech API (deepgram/deepgram-python-sdk)](/wiki/deepgram/deepgram-python-sdk#3)
|
|
503
|
+
|
|
504
|
+
### Citations
|
|
505
|
+
|
|
506
|
+
## Requirements
|
|
507
|
+
|
|
508
|
+
[Python](https://www.python.org/downloads/) (version ^3.10)
|
|
509
|
+
|
|
510
|
+
```python
|
|
511
|
+
from deepgram_captions import DeepgramConverter, webvtt
|
|
512
|
+
|
|
513
|
+
transcription = DeepgramConverter(dg_response)
|
|
514
|
+
captions = webvtt(transcription)
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
### SRT
|
|
518
|
+
|
|
519
|
+
```python
|
|
520
|
+
from deepgram_captions import DeepgramConverter, srt
|
|
521
|
+
|
|
522
|
+
transcription = DeepgramConverter(dg_response)
|
|
523
|
+
captions = srt(transcription)
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
[See our stand alone captions library for more information.](https://github.com/deepgram/deepgram-python-captions).
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
```python
|
|
530
|
+
from deepgram import (
|
|
531
|
+
SettingsOptions
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Create websocket connection
|
|
535
|
+
connection = deepgram.agent.websocket.v("1")
|
|
536
|
+
|
|
537
|
+
# Configure agent settings
|
|
538
|
+
options = SettingsOptions()
|
|
539
|
+
options.language = "en"
|
|
540
|
+
options.agent.think.provider.type = "open_ai"
|
|
541
|
+
options.agent.think.provider.model = "gpt-4o-mini"
|
|
542
|
+
options.agent.think.prompt = "You are a helpful AI assistant."
|
|
543
|
+
options.agent.listen.provider.type = "deepgram"
|
|
544
|
+
options.agent.listen.provider.model = "nova-3"
|
|
545
|
+
options.agent.speak.provider.type = "deepgram"
|
|
546
|
+
options.agent.speak.provider.model ="aura-2-thalia-en"
|
|
547
|
+
|
|
548
|
+
options.greeting = "Hello, I'm your AI assistant."
|
|
549
|
+
|
|
550
|
+
# Start the connection
|
|
551
|
+
connection.start(options)
|
|
552
|
+
|
|
553
|
+
# Close the connection
|
|
554
|
+
connection.finish()
|
|
555
|
+
```
|
|
556
|
+
```
|
|
557
|
+
|
|
558
|
+
```python
|
|
559
|
+
from deepgram import ReadOptions
|
|
560
|
+
|
|
561
|
+
# Configure read options
|
|
562
|
+
options = ReadOptions(
|
|
563
|
+
model="nova-3",
|
|
564
|
+
language="en"
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
# Process text for intelligence
|
|
568
|
+
response = deepgram.read.rest.v("1").process(
|
|
569
|
+
text="The quick brown fox jumps over the lazy dog.",
|
|
570
|
+
options=options
|
|
571
|
+
)
|
|
572
|
+
```
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
The Deepgram Python SDK supports multiple authentication methods to provide flexibility and enhanced security for your applications.
|
|
576
|
+
|
|
577
|
+
### Authentication Methods
|
|
578
|
+
|
|
579
|
+
#### API Key Authentication (Traditional)
|
|
580
|
+
|
|
581
|
+
The traditional method using your Deepgram API key:
|
|
582
|
+
|
|
583
|
+
```python
|
|
584
|
+
from deepgram import DeepgramClient
|
|
585
|
+
|
|
586
|
+
# Direct API key
|
|
587
|
+
client = DeepgramClient(api_key="YOUR_API_KEY")
|
|
588
|
+
|
|
589
|
+
# Or using environment variable DEEPGRAM_API_KEY
|
|
590
|
+
client = DeepgramClient() # Auto-detects from environment
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
#### Bearer Token Authentication (OAuth 2.0)
|
|
594
|
+
|
|
595
|
+
Use short-lived access tokens for enhanced security:
|
|
596
|
+
|
|
597
|
+
```python
|
|
598
|
+
from deepgram import DeepgramClient
|
|
599
|
+
|
|
600
|
+
# Direct access token
|
|
601
|
+
client = DeepgramClient(access_token="YOUR_ACCESS_TOKEN")
|
|
602
|
+
|
|
603
|
+
# Or using environment variable DEEPGRAM_ACCESS_TOKEN
|
|
604
|
+
client = DeepgramClient() # Auto-detects from environment
|
|
605
|
+
```
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
If you are looking to use, run, contribute or modify to the daily/unit tests, then you need to install the following dependencies:
|
|
609
|
+
|
|
610
|
+
```bash
|
|
611
|
+
pip install -r requirements-dev.txt
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
### Daily Tests
|
|
615
|
+
|
|
616
|
+
The daily tests invoke a series of checks against the actual/real API endpoint and save the results in the `tests/response_data` folder. This response data is updated nightly to reflect the latest response from the server. Running the daily tests does require a `DEEPGRAM_API_KEY` set in your environment variables.
|
|
617
|
+
|
|
618
|
+
To run the Daily Tests:
|
|
619
|
+
|
|
620
|
+
```bash
|
|
621
|
+
make daily-test
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
#### Unit Tests
|
|
625
|
+
|
|
626
|
+
The unit tests invoke a series of checks against mock endpoints using the responses saved in `tests/response_data` from the daily tests. These tests are meant to simulate running against the endpoint without actually reaching out to the endpoint; running the unit tests does require a `DEEPGRAM_API_KEY` set in your environment variables, but you will not actually reach out to the server.
|
|
627
|
+
|
|
628
|
+
```bash
|
|
629
|
+
make unit-test
|
|
630
|
+
```
|
|
631
|
+
```
|
|
632
|
+
|
|
633
|
+
## Backwards Compatibility
|
|
634
|
+
|
|
635
|
+
We follow semantic versioning (semver) to ensure a smooth upgrade experience. Within a major version (like `4.*`), we will maintain backward compatibility so your code will continue to work without breaking changes. When we release a new major version (like moving from `3.*` to `4.*`), we may introduce breaking changes to improve the SDK. We'll always document these changes clearly in our release notes to help you upgrade smoothly.
|
|
636
|
+
|
|
637
|
+
Older SDK versions will receive Priority 1 (P1) bug support only. Security issues, both in our code and dependencies, are promptly addressed. Significant bugs without clear workarounds are also given priority attention.
|
|
638
|
+
```
|
|
639
|
+
|
|
640
|
+
```python
|
|
641
|
+
from deepgram import DeepgramClient, SpeakOptions, PrerecordedOptions, FileSource
|
|
642
|
+
|
|
643
|
+
from tests.utils import save_metadata_string
|
|
644
|
+
|
|
645
|
+
TTS_MODEL = "aura-2-thalia-en"
|
|
646
|
+
STT_MODEL = "general-nova-3"
|
|
647
|
+
|
|
648
|
+
# response constants
|
|
649
|
+
TEXT1 = "Hello, world."
|
|
650
|
+
|
|
651
|
+
# Create a list of tuples to store the key-value pairs
|
|
652
|
+
input_output = [
|
|
653
|
+
(
|
|
654
|
+
TEXT1,
|
|
655
|
+
SpeakOptions(model=TTS_MODEL, encoding="linear16", sample_rate=24000),
|
|
656
|
+
PrerecordedOptions(model="nova-3", smart_format=True),
|
|
657
|
+
{"results.channels.0.alternatives.0.transcript": [TEXT1]},
|
|
658
|
+
),
|
|
659
|
+
]
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
```python
|
|
663
|
+
from deepgram import DeepgramClient, SpeakOptions, PrerecordedOptions, FileSource
|
|
664
|
+
|
|
665
|
+
from tests.utils import read_metadata_string, save_metadata_string
|
|
666
|
+
|
|
667
|
+
MODEL = "aura-2-thalia-en"
|
|
668
|
+
|
|
669
|
+
# response constants
|
|
670
|
+
TEXT1 = "Hello, world."
|
|
671
|
+
|
|
672
|
+
# Create a list of tuples to store the key-value pairs
|
|
673
|
+
input_output = [
|
|
674
|
+
(
|
|
675
|
+
TEXT1,
|
|
676
|
+
SpeakOptions(model=MODEL, encoding="linear16", sample_rate=24000),
|
|
677
|
+
{
|
|
678
|
+
"content_type": ["audio/wav"],
|
|
679
|
+
"model_name": ["aura-2-thalia-en"],
|
|
680
|
+
"characters": ["13"],
|
|
681
|
+
},
|
|
682
|
+
),
|
|
683
|
+
]
|
|
684
|
+
```
|
|
685
|
+
cture
|