deepctl-cmd-listen 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: deepctl-cmd-listen
3
+ Version: 0.0.2
4
+ Summary: Listen (live speech-to-text) command for deepctl
5
+ Author-email: Deepgram <devrel@deepgram.com>
6
+ Maintainer-email: Deepgram <devrel@deepgram.com>
7
+ License-Expression: MIT
8
+ Keywords: deepgram,cli,stt,live,streaming,listen
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: deepctl-core>=0.1.10
18
+ Requires-Dist: click>=8.0.0
19
+ Requires-Dist: rich>=13.0.0
20
+ Requires-Dist: pydantic>=2.0.0
21
+ Requires-Dist: websockets>=11.0
22
+ Provides-Extra: mic
23
+ Requires-Dist: sounddevice>=0.4.6; extra == "mic"
24
+ Requires-Dist: numpy>=1.24.0; extra == "mic"
25
+
26
+ # deepctl-cmd-listen
27
+
28
+ > Part of [deepctl](https://github.com/deepgram/cli) — Official Deepgram CLI
29
+
30
+ Listen (live speech-to-text) command for deepctl
31
+
32
+ ## Installation
33
+
34
+ This package is included with deepctl and does not need to be installed separately.
35
+
36
+ ### Install deepctl
37
+
38
+ ```bash
39
+ # Install with pip
40
+ pip install deepctl
41
+
42
+ # Or install with uv
43
+ uv tool install deepctl
44
+
45
+ # Or install with pipx
46
+ pipx install deepctl
47
+
48
+ # Or run without installing
49
+ uvx deepctl --help
50
+ pipx run deepctl --help
51
+ ```
52
+
53
+ ## Commands
54
+
55
+ | Command | Entry Point |
56
+ |---------|-------------|
57
+ | `deepctl listen` | `deepctl_cmd_listen.command:ListenCommand` |
58
+
59
+ ## Dependencies
60
+
61
+ - `click>=8.0.0`
62
+ - `rich>=13.0.0`
63
+ - `pydantic>=2.0.0`
64
+ - `websockets>=11.0`
65
+
66
+ ## License
67
+
68
+ MIT — see [LICENSE](../../LICENSE)
@@ -0,0 +1,43 @@
1
+ # deepctl-cmd-listen
2
+
3
+ > Part of [deepctl](https://github.com/deepgram/cli) — Official Deepgram CLI
4
+
5
+ Listen (live speech-to-text) command for deepctl
6
+
7
+ ## Installation
8
+
9
+ This package is included with deepctl and does not need to be installed separately.
10
+
11
+ ### Install deepctl
12
+
13
+ ```bash
14
+ # Install with pip
15
+ pip install deepctl
16
+
17
+ # Or install with uv
18
+ uv tool install deepctl
19
+
20
+ # Or install with pipx
21
+ pipx install deepctl
22
+
23
+ # Or run without installing
24
+ uvx deepctl --help
25
+ pipx run deepctl --help
26
+ ```
27
+
28
+ ## Commands
29
+
30
+ | Command | Entry Point |
31
+ |---------|-------------|
32
+ | `deepctl listen` | `deepctl_cmd_listen.command:ListenCommand` |
33
+
34
+ ## Dependencies
35
+
36
+ - `click>=8.0.0`
37
+ - `rich>=13.0.0`
38
+ - `pydantic>=2.0.0`
39
+ - `websockets>=11.0`
40
+
41
+ ## License
42
+
43
+ MIT — see [LICENSE](../../LICENSE)
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "deepctl-cmd-listen"
7
+ version = "0.0.2" # x-release-please-version
8
+ description = "Listen (live speech-to-text) command for deepctl"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [{ name = "Deepgram", email = "devrel@deepgram.com" }]
12
+ maintainers = [{ name = "Deepgram", email = "devrel@deepgram.com" }]
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Developers",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ ]
21
+ keywords = ["deepgram", "cli", "stt", "live", "streaming", "listen"]
22
+ requires-python = ">=3.10"
23
+ dependencies = [
24
+ "deepctl-core>=0.1.10",
25
+ "click>=8.0.0",
26
+ "rich>=13.0.0",
27
+ "pydantic>=2.0.0",
28
+ "websockets>=11.0",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ mic = ["sounddevice>=0.4.6", "numpy>=1.24.0"]
33
+
34
+ [project.scripts]
35
+
36
+ [project.entry-points."deepctl.commands"]
37
+ listen = "deepctl_cmd_listen.command:ListenCommand"
38
+
39
+ [tool.setuptools]
40
+ package-dir = { "" = "src" }
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["src"]
44
+ include = ["deepctl_cmd_listen*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ """Listen (live speech-to-text) command for deepctl."""
@@ -0,0 +1,401 @@
1
+ """Listen (live speech-to-text) command for deepctl."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import sys
8
+ import threading
9
+ from typing import Any
10
+
11
+ from deepctl_core import (
12
+ AuthManager,
13
+ BaseCommand,
14
+ BaseResult,
15
+ Config,
16
+ DeepgramClient,
17
+ )
18
+ from rich.console import Console
19
+
20
+ from .models import ListenResult
21
+
22
+ console = Console(stderr=True)
23
+
24
+
25
+ class ListenCommand(BaseCommand):
26
+ """Command for live speech-to-text transcription."""
27
+
28
+ name = "listen"
29
+ help = "Live speech-to-text transcription"
30
+ short_help = "Live transcription"
31
+
32
+ requires_auth = True
33
+ requires_project = False
34
+ ci_friendly = True
35
+
36
+ examples = [
37
+ "dg listen --mic",
38
+ "dg listen --mic --model nova-3 --language en-US",
39
+ "dg listen --mic --interim",
40
+ "cat audio.raw | dg listen --encoding linear16 --sample-rate 16000",
41
+ "ffmpeg -i audio.mp3 -f s16le -ar 16000 -ac 1 - | dg listen --encoding linear16 --sample-rate 16000",
42
+ ]
43
+ agent_help = (
44
+ "Live speech-to-text transcription using Deepgram's streaming API. "
45
+ "Use --mic for microphone input (requires sounddevice package) "
46
+ "or pipe raw audio via stdin. Transcripts are printed to stdout."
47
+ )
48
+
49
+ def get_arguments(self) -> list[dict[str, Any]]:
50
+ return [
51
+ {
52
+ "names": ["--mic"],
53
+ "help": "Use microphone input (requires 'pip install deepctl-cmd-listen[mic]')",
54
+ "is_flag": True,
55
+ "is_option": True,
56
+ },
57
+ {
58
+ "names": ["--model", "-m"],
59
+ "help": "STT model (default: nova-3)",
60
+ "type": str,
61
+ "is_option": True,
62
+ "default": "nova-3",
63
+ },
64
+ {
65
+ "names": ["--language", "-l"],
66
+ "help": "Language code (default: en-US)",
67
+ "type": str,
68
+ "is_option": True,
69
+ "default": "en-US",
70
+ },
71
+ {
72
+ "names": ["--encoding"],
73
+ "help": "Audio encoding for stdin input (e.g., linear16, mulaw)",
74
+ "type": str,
75
+ "is_option": True,
76
+ },
77
+ {
78
+ "names": ["--sample-rate"],
79
+ "help": "Audio sample rate in Hz for stdin input (default: 16000)",
80
+ "type": int,
81
+ "is_option": True,
82
+ "default": 16000,
83
+ },
84
+ {
85
+ "names": ["--channels"],
86
+ "help": "Number of audio channels (default: 1)",
87
+ "type": int,
88
+ "is_option": True,
89
+ "default": 1,
90
+ },
91
+ {
92
+ "names": ["--interim"],
93
+ "help": "Show interim (partial) results",
94
+ "is_flag": True,
95
+ "is_option": True,
96
+ },
97
+ {
98
+ "names": ["--punctuate"],
99
+ "help": "Enable punctuation (default: true)",
100
+ "is_flag": True,
101
+ "is_option": True,
102
+ "default": True,
103
+ },
104
+ {
105
+ "names": ["--smart-format"],
106
+ "help": "Enable smart formatting (default: true)",
107
+ "is_flag": True,
108
+ "is_option": True,
109
+ "default": True,
110
+ },
111
+ ]
112
+
113
+ def handle(
114
+ self,
115
+ config: Config,
116
+ auth_manager: AuthManager,
117
+ client: DeepgramClient,
118
+ **kwargs: Any,
119
+ ) -> BaseResult:
120
+ use_mic = kwargs.get("mic", False)
121
+ model = kwargs.get("model") or "nova-3"
122
+ language = kwargs.get("language") or "en-US"
123
+ encoding = kwargs.get("encoding")
124
+ sample_rate = kwargs.get("sample_rate") or 16000
125
+ channels = kwargs.get("channels") or 1
126
+ interim = kwargs.get("interim", False)
127
+ punctuate = kwargs.get("punctuate", True)
128
+ smart_format = kwargs.get("smart_format", True)
129
+
130
+ if use_mic:
131
+ return self._listen_mic(
132
+ client,
133
+ model,
134
+ language,
135
+ sample_rate,
136
+ channels,
137
+ interim,
138
+ punctuate,
139
+ smart_format,
140
+ )
141
+ elif not sys.stdin.isatty():
142
+ return self._listen_stdin(
143
+ client,
144
+ model,
145
+ language,
146
+ encoding,
147
+ sample_rate,
148
+ channels,
149
+ interim,
150
+ punctuate,
151
+ smart_format,
152
+ )
153
+ else:
154
+ return BaseResult(
155
+ status="error",
156
+ message=(
157
+ "No audio source. Use --mic for microphone or pipe audio via stdin.\n"
158
+ " Example: dg listen --mic\n"
159
+ " Example: cat audio.raw | dg listen --encoding linear16 --sample-rate 16000"
160
+ ),
161
+ )
162
+
163
+ def _listen_mic(
164
+ self,
165
+ client: DeepgramClient,
166
+ model: str,
167
+ language: str,
168
+ sample_rate: int,
169
+ channels: int,
170
+ interim: bool,
171
+ punctuate: bool,
172
+ smart_format: bool,
173
+ ) -> BaseResult:
174
+ try:
175
+ import sounddevice # noqa: F401
176
+ except ImportError:
177
+ return BaseResult(
178
+ status="error",
179
+ message=(
180
+ "Microphone support requires sounddevice. "
181
+ "Install with: pip install 'deepctl-cmd-listen[mic]'"
182
+ ),
183
+ )
184
+
185
+ console.print("[blue]Listening from microphone... Press Ctrl+C to stop.[/blue]")
186
+
187
+ try:
188
+ result = asyncio.run(
189
+ self._stream_mic(
190
+ client,
191
+ model,
192
+ language,
193
+ sample_rate,
194
+ channels,
195
+ interim,
196
+ punctuate,
197
+ smart_format,
198
+ )
199
+ )
200
+ return result
201
+ except KeyboardInterrupt:
202
+ console.print("\n[yellow]Stopped listening.[/yellow]")
203
+ return ListenResult(status="success", source="mic")
204
+
205
+ async def _stream_mic(
206
+ self,
207
+ client: DeepgramClient,
208
+ model: str,
209
+ language: str,
210
+ sample_rate: int,
211
+ channels: int,
212
+ interim: bool,
213
+ punctuate: bool,
214
+ smart_format: bool,
215
+ ) -> ListenResult:
216
+ import numpy as np
217
+ import sounddevice as sd
218
+
219
+ api_key = client.auth_manager.get_api_key()
220
+ import websockets
221
+
222
+ url = f"wss://api.deepgram.com/v1/listen?model={model}&language={language}&punctuate={str(punctuate).lower()}&smart_format={str(smart_format).lower()}&encoding=linear16&sample_rate={sample_rate}&channels={channels}"
223
+ if interim:
224
+ url += "&interim_results=true"
225
+
226
+ full_transcript: list[str] = []
227
+
228
+ async with websockets.connect(
229
+ url, additional_headers={"Authorization": f"Token {api_key}"}
230
+ ) as ws:
231
+ stop_event = threading.Event()
232
+
233
+ async def send_audio() -> None:
234
+ loop = asyncio.get_event_loop()
235
+ q: asyncio.Queue[bytes] = asyncio.Queue()
236
+
237
+ def audio_callback(
238
+ indata: Any, frames: int, time_info: Any, status: Any
239
+ ) -> None:
240
+ if status:
241
+ console.print(f"[yellow]Audio: {status}[/yellow]")
242
+ loop.call_soon_threadsafe(q.put_nowait, bytes(indata))
243
+
244
+ stream = sd.RawInputStream(
245
+ samplerate=sample_rate,
246
+ channels=channels,
247
+ dtype=np.int16,
248
+ callback=audio_callback,
249
+ blocksize=int(sample_rate * 0.1),
250
+ )
251
+ stream.start()
252
+
253
+ try:
254
+ while not stop_event.is_set():
255
+ try:
256
+ data = await asyncio.wait_for(q.get(), timeout=0.5)
257
+ await ws.send(data)
258
+ except asyncio.TimeoutError:
259
+ continue
260
+ finally:
261
+ stream.stop()
262
+ stream.close()
263
+ await ws.send(json.dumps({"type": "CloseStream"}))
264
+
265
+ async def receive_transcripts() -> None:
266
+ try:
267
+ async for msg in ws:
268
+ data = json.loads(msg)
269
+ if data.get("type") == "Results":
270
+ channel = data.get("channel", {})
271
+ alternatives = channel.get("alternatives", [])
272
+ if alternatives:
273
+ transcript = alternatives[0].get("transcript", "")
274
+ is_final = data.get("is_final", False)
275
+ if transcript:
276
+ if is_final:
277
+ full_transcript.append(transcript)
278
+ print(transcript, flush=True)
279
+ elif interim:
280
+ print(f"\r{transcript}", end="", flush=True)
281
+ except Exception:
282
+ pass
283
+
284
+ send_task = asyncio.create_task(send_audio())
285
+ recv_task = asyncio.create_task(receive_transcripts())
286
+
287
+ try:
288
+ await asyncio.gather(send_task, recv_task)
289
+ except (KeyboardInterrupt, asyncio.CancelledError):
290
+ stop_event.set()
291
+ send_task.cancel()
292
+ recv_task.cancel()
293
+
294
+ return ListenResult(
295
+ status="success",
296
+ transcript=" ".join(full_transcript),
297
+ source="mic",
298
+ )
299
+
300
+ def _listen_stdin(
301
+ self,
302
+ client: DeepgramClient,
303
+ model: str,
304
+ language: str,
305
+ encoding: str | None,
306
+ sample_rate: int,
307
+ channels: int,
308
+ interim: bool,
309
+ punctuate: bool,
310
+ smart_format: bool,
311
+ ) -> BaseResult:
312
+ if not encoding:
313
+ console.print(
314
+ "[yellow]Warning: No --encoding specified for stdin. "
315
+ "Assuming linear16. Specify with --encoding.[/yellow]"
316
+ )
317
+ encoding = "linear16"
318
+
319
+ console.print(
320
+ "[blue]Reading audio from stdin... Send EOF (Ctrl+D) to finish.[/blue]"
321
+ )
322
+
323
+ try:
324
+ result = asyncio.run(
325
+ self._stream_stdin(
326
+ client,
327
+ model,
328
+ language,
329
+ encoding,
330
+ sample_rate,
331
+ channels,
332
+ interim,
333
+ punctuate,
334
+ smart_format,
335
+ )
336
+ )
337
+ return result
338
+ except KeyboardInterrupt:
339
+ console.print("\n[yellow]Stopped listening.[/yellow]")
340
+ return ListenResult(status="success", source="stdin")
341
+
342
+ async def _stream_stdin(
343
+ self,
344
+ client: DeepgramClient,
345
+ model: str,
346
+ language: str,
347
+ encoding: str,
348
+ sample_rate: int,
349
+ channels: int,
350
+ interim: bool,
351
+ punctuate: bool,
352
+ smart_format: bool,
353
+ ) -> ListenResult:
354
+ import websockets
355
+
356
+ api_key = client.auth_manager.get_api_key()
357
+ url = f"wss://api.deepgram.com/v1/listen?model={model}&language={language}&punctuate={str(punctuate).lower()}&smart_format={str(smart_format).lower()}&encoding={encoding}&sample_rate={sample_rate}&channels={channels}"
358
+ if interim:
359
+ url += "&interim_results=true"
360
+
361
+ full_transcript: list[str] = []
362
+
363
+ async with websockets.connect(
364
+ url, additional_headers={"Authorization": f"Token {api_key}"}
365
+ ) as ws:
366
+
367
+ async def send_audio() -> None:
368
+ loop = asyncio.get_event_loop()
369
+ while True:
370
+ data = await loop.run_in_executor(None, sys.stdin.buffer.read, 4096)
371
+ if not data:
372
+ break
373
+ await ws.send(data)
374
+ await ws.send(json.dumps({"type": "CloseStream"}))
375
+
376
+ async def receive_transcripts() -> None:
377
+ try:
378
+ async for msg in ws:
379
+ data = json.loads(msg)
380
+ if data.get("type") == "Results":
381
+ channel = data.get("channel", {})
382
+ alternatives = channel.get("alternatives", [])
383
+ if alternatives:
384
+ transcript = alternatives[0].get("transcript", "")
385
+ is_final = data.get("is_final", False)
386
+ if transcript:
387
+ if is_final:
388
+ full_transcript.append(transcript)
389
+ print(transcript, flush=True)
390
+ elif interim:
391
+ print(f"\r{transcript}", end="", flush=True)
392
+ except Exception:
393
+ pass
394
+
395
+ await asyncio.gather(send_audio(), receive_transcripts())
396
+
397
+ return ListenResult(
398
+ status="success",
399
+ transcript=" ".join(full_transcript),
400
+ source="stdin",
401
+ )
@@ -0,0 +1,11 @@
1
+ """Models for listen command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from deepctl_core import BaseResult
6
+
7
+
8
+ class ListenResult(BaseResult):
9
+ transcript: str = ""
10
+ duration_seconds: float = 0.0
11
+ source: str = "" # "mic" or "stdin"
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.4
2
+ Name: deepctl-cmd-listen
3
+ Version: 0.0.2
4
+ Summary: Listen (live speech-to-text) command for deepctl
5
+ Author-email: Deepgram <devrel@deepgram.com>
6
+ Maintainer-email: Deepgram <devrel@deepgram.com>
7
+ License-Expression: MIT
8
+ Keywords: deepgram,cli,stt,live,streaming,listen
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: deepctl-core>=0.1.10
18
+ Requires-Dist: click>=8.0.0
19
+ Requires-Dist: rich>=13.0.0
20
+ Requires-Dist: pydantic>=2.0.0
21
+ Requires-Dist: websockets>=11.0
22
+ Provides-Extra: mic
23
+ Requires-Dist: sounddevice>=0.4.6; extra == "mic"
24
+ Requires-Dist: numpy>=1.24.0; extra == "mic"
25
+
26
+ # deepctl-cmd-listen
27
+
28
+ > Part of [deepctl](https://github.com/deepgram/cli) — Official Deepgram CLI
29
+
30
+ Listen (live speech-to-text) command for deepctl
31
+
32
+ ## Installation
33
+
34
+ This package is included with deepctl and does not need to be installed separately.
35
+
36
+ ### Install deepctl
37
+
38
+ ```bash
39
+ # Install with pip
40
+ pip install deepctl
41
+
42
+ # Or install with uv
43
+ uv tool install deepctl
44
+
45
+ # Or install with pipx
46
+ pipx install deepctl
47
+
48
+ # Or run without installing
49
+ uvx deepctl --help
50
+ pipx run deepctl --help
51
+ ```
52
+
53
+ ## Commands
54
+
55
+ | Command | Entry Point |
56
+ |---------|-------------|
57
+ | `deepctl listen` | `deepctl_cmd_listen.command:ListenCommand` |
58
+
59
+ ## Dependencies
60
+
61
+ - `click>=8.0.0`
62
+ - `rich>=13.0.0`
63
+ - `pydantic>=2.0.0`
64
+ - `websockets>=11.0`
65
+
66
+ ## License
67
+
68
+ MIT — see [LICENSE](../../LICENSE)
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/deepctl_cmd_listen/__init__.py
4
+ src/deepctl_cmd_listen/command.py
5
+ src/deepctl_cmd_listen/models.py
6
+ src/deepctl_cmd_listen.egg-info/PKG-INFO
7
+ src/deepctl_cmd_listen.egg-info/SOURCES.txt
8
+ src/deepctl_cmd_listen.egg-info/dependency_links.txt
9
+ src/deepctl_cmd_listen.egg-info/entry_points.txt
10
+ src/deepctl_cmd_listen.egg-info/requires.txt
11
+ src/deepctl_cmd_listen.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [deepctl.commands]
2
+ listen = deepctl_cmd_listen.command:ListenCommand
@@ -0,0 +1,9 @@
1
+ deepctl-core>=0.1.10
2
+ click>=8.0.0
3
+ rich>=13.0.0
4
+ pydantic>=2.0.0
5
+ websockets>=11.0
6
+
7
+ [mic]
8
+ sounddevice>=0.4.6
9
+ numpy>=1.24.0
@@ -0,0 +1 @@
1
+ deepctl_cmd_listen