deepctl-cmd-listen 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepctl_cmd_listen-0.0.2/PKG-INFO +68 -0
- deepctl_cmd_listen-0.0.2/README.md +43 -0
- deepctl_cmd_listen-0.0.2/pyproject.toml +44 -0
- deepctl_cmd_listen-0.0.2/setup.cfg +4 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen/__init__.py +1 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen/command.py +401 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen/models.py +11 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen.egg-info/PKG-INFO +68 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen.egg-info/SOURCES.txt +11 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen.egg-info/dependency_links.txt +1 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen.egg-info/entry_points.txt +2 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen.egg-info/requires.txt +9 -0
- deepctl_cmd_listen-0.0.2/src/deepctl_cmd_listen.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepctl-cmd-listen
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Listen (live speech-to-text) command for deepctl
|
|
5
|
+
Author-email: Deepgram <devrel@deepgram.com>
|
|
6
|
+
Maintainer-email: Deepgram <devrel@deepgram.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Keywords: deepgram,cli,stt,live,streaming,listen
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: deepctl-core>=0.1.10
|
|
18
|
+
Requires-Dist: click>=8.0.0
|
|
19
|
+
Requires-Dist: rich>=13.0.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0.0
|
|
21
|
+
Requires-Dist: websockets>=11.0
|
|
22
|
+
Provides-Extra: mic
|
|
23
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "mic"
|
|
24
|
+
Requires-Dist: numpy>=1.24.0; extra == "mic"
|
|
25
|
+
|
|
26
|
+
# deepctl-cmd-listen
|
|
27
|
+
|
|
28
|
+
> Part of [deepctl](https://github.com/deepgram/cli) — Official Deepgram CLI
|
|
29
|
+
|
|
30
|
+
Listen (live speech-to-text) command for deepctl
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
This package is included with deepctl and does not need to be installed separately.
|
|
35
|
+
|
|
36
|
+
### Install deepctl
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Install with pip
|
|
40
|
+
pip install deepctl
|
|
41
|
+
|
|
42
|
+
# Or install with uv
|
|
43
|
+
uv tool install deepctl
|
|
44
|
+
|
|
45
|
+
# Or install with pipx
|
|
46
|
+
pipx install deepctl
|
|
47
|
+
|
|
48
|
+
# Or run without installing
|
|
49
|
+
uvx deepctl --help
|
|
50
|
+
pipx run deepctl --help
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Commands
|
|
54
|
+
|
|
55
|
+
| Command | Entry Point |
|
|
56
|
+
|---------|-------------|
|
|
57
|
+
| `deepctl listen` | `deepctl_cmd_listen.command:ListenCommand` |
|
|
58
|
+
|
|
59
|
+
## Dependencies
|
|
60
|
+
|
|
61
|
+
- `click>=8.0.0`
|
|
62
|
+
- `rich>=13.0.0`
|
|
63
|
+
- `pydantic>=2.0.0`
|
|
64
|
+
- `websockets>=11.0`
|
|
65
|
+
|
|
66
|
+
## License
|
|
67
|
+
|
|
68
|
+
MIT — see [LICENSE](../../LICENSE)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# deepctl-cmd-listen
|
|
2
|
+
|
|
3
|
+
> Part of [deepctl](https://github.com/deepgram/cli) — Official Deepgram CLI
|
|
4
|
+
|
|
5
|
+
Listen (live speech-to-text) command for deepctl
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
This package is included with deepctl and does not need to be installed separately.
|
|
10
|
+
|
|
11
|
+
### Install deepctl
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Install with pip
|
|
15
|
+
pip install deepctl
|
|
16
|
+
|
|
17
|
+
# Or install with uv
|
|
18
|
+
uv tool install deepctl
|
|
19
|
+
|
|
20
|
+
# Or install with pipx
|
|
21
|
+
pipx install deepctl
|
|
22
|
+
|
|
23
|
+
# Or run without installing
|
|
24
|
+
uvx deepctl --help
|
|
25
|
+
pipx run deepctl --help
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Commands
|
|
29
|
+
|
|
30
|
+
| Command | Entry Point |
|
|
31
|
+
|---------|-------------|
|
|
32
|
+
| `deepctl listen` | `deepctl_cmd_listen.command:ListenCommand` |
|
|
33
|
+
|
|
34
|
+
## Dependencies
|
|
35
|
+
|
|
36
|
+
- `click>=8.0.0`
|
|
37
|
+
- `rich>=13.0.0`
|
|
38
|
+
- `pydantic>=2.0.0`
|
|
39
|
+
- `websockets>=11.0`
|
|
40
|
+
|
|
41
|
+
## License
|
|
42
|
+
|
|
43
|
+
MIT — see [LICENSE](../../LICENSE)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "deepctl-cmd-listen"
|
|
7
|
+
version = "0.0.2" # x-release-please-version
|
|
8
|
+
description = "Listen (live speech-to-text) command for deepctl"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [{ name = "Deepgram", email = "devrel@deepgram.com" }]
|
|
12
|
+
maintainers = [{ name = "Deepgram", email = "devrel@deepgram.com" }]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
]
|
|
21
|
+
keywords = ["deepgram", "cli", "stt", "live", "streaming", "listen"]
|
|
22
|
+
requires-python = ">=3.10"
|
|
23
|
+
dependencies = [
|
|
24
|
+
"deepctl-core>=0.1.10",
|
|
25
|
+
"click>=8.0.0",
|
|
26
|
+
"rich>=13.0.0",
|
|
27
|
+
"pydantic>=2.0.0",
|
|
28
|
+
"websockets>=11.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
mic = ["sounddevice>=0.4.6", "numpy>=1.24.0"]
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
|
|
36
|
+
[project.entry-points."deepctl.commands"]
|
|
37
|
+
listen = "deepctl_cmd_listen.command:ListenCommand"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools]
|
|
40
|
+
package-dir = { "" = "src" }
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
44
|
+
include = ["deepctl_cmd_listen*"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Listen (live speech-to-text) command for deepctl."""
|
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
"""Listen (live speech-to-text) command for deepctl."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
import threading
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from deepctl_core import (
|
|
12
|
+
AuthManager,
|
|
13
|
+
BaseCommand,
|
|
14
|
+
BaseResult,
|
|
15
|
+
Config,
|
|
16
|
+
DeepgramClient,
|
|
17
|
+
)
|
|
18
|
+
from rich.console import Console
|
|
19
|
+
|
|
20
|
+
from .models import ListenResult
|
|
21
|
+
|
|
22
|
+
console = Console(stderr=True)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ListenCommand(BaseCommand):
|
|
26
|
+
"""Command for live speech-to-text transcription."""
|
|
27
|
+
|
|
28
|
+
name = "listen"
|
|
29
|
+
help = "Live speech-to-text transcription"
|
|
30
|
+
short_help = "Live transcription"
|
|
31
|
+
|
|
32
|
+
requires_auth = True
|
|
33
|
+
requires_project = False
|
|
34
|
+
ci_friendly = True
|
|
35
|
+
|
|
36
|
+
examples = [
|
|
37
|
+
"dg listen --mic",
|
|
38
|
+
"dg listen --mic --model nova-3 --language en-US",
|
|
39
|
+
"dg listen --mic --interim",
|
|
40
|
+
"cat audio.raw | dg listen --encoding linear16 --sample-rate 16000",
|
|
41
|
+
"ffmpeg -i audio.mp3 -f s16le -ar 16000 -ac 1 - | dg listen --encoding linear16 --sample-rate 16000",
|
|
42
|
+
]
|
|
43
|
+
agent_help = (
|
|
44
|
+
"Live speech-to-text transcription using Deepgram's streaming API. "
|
|
45
|
+
"Use --mic for microphone input (requires sounddevice package) "
|
|
46
|
+
"or pipe raw audio via stdin. Transcripts are printed to stdout."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def get_arguments(self) -> list[dict[str, Any]]:
|
|
50
|
+
return [
|
|
51
|
+
{
|
|
52
|
+
"names": ["--mic"],
|
|
53
|
+
"help": "Use microphone input (requires 'pip install deepctl-cmd-listen[mic]')",
|
|
54
|
+
"is_flag": True,
|
|
55
|
+
"is_option": True,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"names": ["--model", "-m"],
|
|
59
|
+
"help": "STT model (default: nova-3)",
|
|
60
|
+
"type": str,
|
|
61
|
+
"is_option": True,
|
|
62
|
+
"default": "nova-3",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"names": ["--language", "-l"],
|
|
66
|
+
"help": "Language code (default: en-US)",
|
|
67
|
+
"type": str,
|
|
68
|
+
"is_option": True,
|
|
69
|
+
"default": "en-US",
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"names": ["--encoding"],
|
|
73
|
+
"help": "Audio encoding for stdin input (e.g., linear16, mulaw)",
|
|
74
|
+
"type": str,
|
|
75
|
+
"is_option": True,
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"names": ["--sample-rate"],
|
|
79
|
+
"help": "Audio sample rate in Hz for stdin input (default: 16000)",
|
|
80
|
+
"type": int,
|
|
81
|
+
"is_option": True,
|
|
82
|
+
"default": 16000,
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"names": ["--channels"],
|
|
86
|
+
"help": "Number of audio channels (default: 1)",
|
|
87
|
+
"type": int,
|
|
88
|
+
"is_option": True,
|
|
89
|
+
"default": 1,
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"names": ["--interim"],
|
|
93
|
+
"help": "Show interim (partial) results",
|
|
94
|
+
"is_flag": True,
|
|
95
|
+
"is_option": True,
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"names": ["--punctuate"],
|
|
99
|
+
"help": "Enable punctuation (default: true)",
|
|
100
|
+
"is_flag": True,
|
|
101
|
+
"is_option": True,
|
|
102
|
+
"default": True,
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"names": ["--smart-format"],
|
|
106
|
+
"help": "Enable smart formatting (default: true)",
|
|
107
|
+
"is_flag": True,
|
|
108
|
+
"is_option": True,
|
|
109
|
+
"default": True,
|
|
110
|
+
},
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
def handle(
|
|
114
|
+
self,
|
|
115
|
+
config: Config,
|
|
116
|
+
auth_manager: AuthManager,
|
|
117
|
+
client: DeepgramClient,
|
|
118
|
+
**kwargs: Any,
|
|
119
|
+
) -> BaseResult:
|
|
120
|
+
use_mic = kwargs.get("mic", False)
|
|
121
|
+
model = kwargs.get("model") or "nova-3"
|
|
122
|
+
language = kwargs.get("language") or "en-US"
|
|
123
|
+
encoding = kwargs.get("encoding")
|
|
124
|
+
sample_rate = kwargs.get("sample_rate") or 16000
|
|
125
|
+
channels = kwargs.get("channels") or 1
|
|
126
|
+
interim = kwargs.get("interim", False)
|
|
127
|
+
punctuate = kwargs.get("punctuate", True)
|
|
128
|
+
smart_format = kwargs.get("smart_format", True)
|
|
129
|
+
|
|
130
|
+
if use_mic:
|
|
131
|
+
return self._listen_mic(
|
|
132
|
+
client,
|
|
133
|
+
model,
|
|
134
|
+
language,
|
|
135
|
+
sample_rate,
|
|
136
|
+
channels,
|
|
137
|
+
interim,
|
|
138
|
+
punctuate,
|
|
139
|
+
smart_format,
|
|
140
|
+
)
|
|
141
|
+
elif not sys.stdin.isatty():
|
|
142
|
+
return self._listen_stdin(
|
|
143
|
+
client,
|
|
144
|
+
model,
|
|
145
|
+
language,
|
|
146
|
+
encoding,
|
|
147
|
+
sample_rate,
|
|
148
|
+
channels,
|
|
149
|
+
interim,
|
|
150
|
+
punctuate,
|
|
151
|
+
smart_format,
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
return BaseResult(
|
|
155
|
+
status="error",
|
|
156
|
+
message=(
|
|
157
|
+
"No audio source. Use --mic for microphone or pipe audio via stdin.\n"
|
|
158
|
+
" Example: dg listen --mic\n"
|
|
159
|
+
" Example: cat audio.raw | dg listen --encoding linear16 --sample-rate 16000"
|
|
160
|
+
),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def _listen_mic(
|
|
164
|
+
self,
|
|
165
|
+
client: DeepgramClient,
|
|
166
|
+
model: str,
|
|
167
|
+
language: str,
|
|
168
|
+
sample_rate: int,
|
|
169
|
+
channels: int,
|
|
170
|
+
interim: bool,
|
|
171
|
+
punctuate: bool,
|
|
172
|
+
smart_format: bool,
|
|
173
|
+
) -> BaseResult:
|
|
174
|
+
try:
|
|
175
|
+
import sounddevice # noqa: F401
|
|
176
|
+
except ImportError:
|
|
177
|
+
return BaseResult(
|
|
178
|
+
status="error",
|
|
179
|
+
message=(
|
|
180
|
+
"Microphone support requires sounddevice. "
|
|
181
|
+
"Install with: pip install 'deepctl-cmd-listen[mic]'"
|
|
182
|
+
),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
console.print("[blue]Listening from microphone... Press Ctrl+C to stop.[/blue]")
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
result = asyncio.run(
|
|
189
|
+
self._stream_mic(
|
|
190
|
+
client,
|
|
191
|
+
model,
|
|
192
|
+
language,
|
|
193
|
+
sample_rate,
|
|
194
|
+
channels,
|
|
195
|
+
interim,
|
|
196
|
+
punctuate,
|
|
197
|
+
smart_format,
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
return result
|
|
201
|
+
except KeyboardInterrupt:
|
|
202
|
+
console.print("\n[yellow]Stopped listening.[/yellow]")
|
|
203
|
+
return ListenResult(status="success", source="mic")
|
|
204
|
+
|
|
205
|
+
async def _stream_mic(
|
|
206
|
+
self,
|
|
207
|
+
client: DeepgramClient,
|
|
208
|
+
model: str,
|
|
209
|
+
language: str,
|
|
210
|
+
sample_rate: int,
|
|
211
|
+
channels: int,
|
|
212
|
+
interim: bool,
|
|
213
|
+
punctuate: bool,
|
|
214
|
+
smart_format: bool,
|
|
215
|
+
) -> ListenResult:
|
|
216
|
+
import numpy as np
|
|
217
|
+
import sounddevice as sd
|
|
218
|
+
|
|
219
|
+
api_key = client.auth_manager.get_api_key()
|
|
220
|
+
import websockets
|
|
221
|
+
|
|
222
|
+
url = f"wss://api.deepgram.com/v1/listen?model={model}&language={language}&punctuate={str(punctuate).lower()}&smart_format={str(smart_format).lower()}&encoding=linear16&sample_rate={sample_rate}&channels={channels}"
|
|
223
|
+
if interim:
|
|
224
|
+
url += "&interim_results=true"
|
|
225
|
+
|
|
226
|
+
full_transcript: list[str] = []
|
|
227
|
+
|
|
228
|
+
async with websockets.connect(
|
|
229
|
+
url, additional_headers={"Authorization": f"Token {api_key}"}
|
|
230
|
+
) as ws:
|
|
231
|
+
stop_event = threading.Event()
|
|
232
|
+
|
|
233
|
+
async def send_audio() -> None:
|
|
234
|
+
loop = asyncio.get_event_loop()
|
|
235
|
+
q: asyncio.Queue[bytes] = asyncio.Queue()
|
|
236
|
+
|
|
237
|
+
def audio_callback(
|
|
238
|
+
indata: Any, frames: int, time_info: Any, status: Any
|
|
239
|
+
) -> None:
|
|
240
|
+
if status:
|
|
241
|
+
console.print(f"[yellow]Audio: {status}[/yellow]")
|
|
242
|
+
loop.call_soon_threadsafe(q.put_nowait, bytes(indata))
|
|
243
|
+
|
|
244
|
+
stream = sd.RawInputStream(
|
|
245
|
+
samplerate=sample_rate,
|
|
246
|
+
channels=channels,
|
|
247
|
+
dtype=np.int16,
|
|
248
|
+
callback=audio_callback,
|
|
249
|
+
blocksize=int(sample_rate * 0.1),
|
|
250
|
+
)
|
|
251
|
+
stream.start()
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
while not stop_event.is_set():
|
|
255
|
+
try:
|
|
256
|
+
data = await asyncio.wait_for(q.get(), timeout=0.5)
|
|
257
|
+
await ws.send(data)
|
|
258
|
+
except asyncio.TimeoutError:
|
|
259
|
+
continue
|
|
260
|
+
finally:
|
|
261
|
+
stream.stop()
|
|
262
|
+
stream.close()
|
|
263
|
+
await ws.send(json.dumps({"type": "CloseStream"}))
|
|
264
|
+
|
|
265
|
+
async def receive_transcripts() -> None:
|
|
266
|
+
try:
|
|
267
|
+
async for msg in ws:
|
|
268
|
+
data = json.loads(msg)
|
|
269
|
+
if data.get("type") == "Results":
|
|
270
|
+
channel = data.get("channel", {})
|
|
271
|
+
alternatives = channel.get("alternatives", [])
|
|
272
|
+
if alternatives:
|
|
273
|
+
transcript = alternatives[0].get("transcript", "")
|
|
274
|
+
is_final = data.get("is_final", False)
|
|
275
|
+
if transcript:
|
|
276
|
+
if is_final:
|
|
277
|
+
full_transcript.append(transcript)
|
|
278
|
+
print(transcript, flush=True)
|
|
279
|
+
elif interim:
|
|
280
|
+
print(f"\r{transcript}", end="", flush=True)
|
|
281
|
+
except Exception:
|
|
282
|
+
pass
|
|
283
|
+
|
|
284
|
+
send_task = asyncio.create_task(send_audio())
|
|
285
|
+
recv_task = asyncio.create_task(receive_transcripts())
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
await asyncio.gather(send_task, recv_task)
|
|
289
|
+
except (KeyboardInterrupt, asyncio.CancelledError):
|
|
290
|
+
stop_event.set()
|
|
291
|
+
send_task.cancel()
|
|
292
|
+
recv_task.cancel()
|
|
293
|
+
|
|
294
|
+
return ListenResult(
|
|
295
|
+
status="success",
|
|
296
|
+
transcript=" ".join(full_transcript),
|
|
297
|
+
source="mic",
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
def _listen_stdin(
|
|
301
|
+
self,
|
|
302
|
+
client: DeepgramClient,
|
|
303
|
+
model: str,
|
|
304
|
+
language: str,
|
|
305
|
+
encoding: str | None,
|
|
306
|
+
sample_rate: int,
|
|
307
|
+
channels: int,
|
|
308
|
+
interim: bool,
|
|
309
|
+
punctuate: bool,
|
|
310
|
+
smart_format: bool,
|
|
311
|
+
) -> BaseResult:
|
|
312
|
+
if not encoding:
|
|
313
|
+
console.print(
|
|
314
|
+
"[yellow]Warning: No --encoding specified for stdin. "
|
|
315
|
+
"Assuming linear16. Specify with --encoding.[/yellow]"
|
|
316
|
+
)
|
|
317
|
+
encoding = "linear16"
|
|
318
|
+
|
|
319
|
+
console.print(
|
|
320
|
+
"[blue]Reading audio from stdin... Send EOF (Ctrl+D) to finish.[/blue]"
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
try:
|
|
324
|
+
result = asyncio.run(
|
|
325
|
+
self._stream_stdin(
|
|
326
|
+
client,
|
|
327
|
+
model,
|
|
328
|
+
language,
|
|
329
|
+
encoding,
|
|
330
|
+
sample_rate,
|
|
331
|
+
channels,
|
|
332
|
+
interim,
|
|
333
|
+
punctuate,
|
|
334
|
+
smart_format,
|
|
335
|
+
)
|
|
336
|
+
)
|
|
337
|
+
return result
|
|
338
|
+
except KeyboardInterrupt:
|
|
339
|
+
console.print("\n[yellow]Stopped listening.[/yellow]")
|
|
340
|
+
return ListenResult(status="success", source="stdin")
|
|
341
|
+
|
|
342
|
+
async def _stream_stdin(
|
|
343
|
+
self,
|
|
344
|
+
client: DeepgramClient,
|
|
345
|
+
model: str,
|
|
346
|
+
language: str,
|
|
347
|
+
encoding: str,
|
|
348
|
+
sample_rate: int,
|
|
349
|
+
channels: int,
|
|
350
|
+
interim: bool,
|
|
351
|
+
punctuate: bool,
|
|
352
|
+
smart_format: bool,
|
|
353
|
+
) -> ListenResult:
|
|
354
|
+
import websockets
|
|
355
|
+
|
|
356
|
+
api_key = client.auth_manager.get_api_key()
|
|
357
|
+
url = f"wss://api.deepgram.com/v1/listen?model={model}&language={language}&punctuate={str(punctuate).lower()}&smart_format={str(smart_format).lower()}&encoding={encoding}&sample_rate={sample_rate}&channels={channels}"
|
|
358
|
+
if interim:
|
|
359
|
+
url += "&interim_results=true"
|
|
360
|
+
|
|
361
|
+
full_transcript: list[str] = []
|
|
362
|
+
|
|
363
|
+
async with websockets.connect(
|
|
364
|
+
url, additional_headers={"Authorization": f"Token {api_key}"}
|
|
365
|
+
) as ws:
|
|
366
|
+
|
|
367
|
+
async def send_audio() -> None:
|
|
368
|
+
loop = asyncio.get_event_loop()
|
|
369
|
+
while True:
|
|
370
|
+
data = await loop.run_in_executor(None, sys.stdin.buffer.read, 4096)
|
|
371
|
+
if not data:
|
|
372
|
+
break
|
|
373
|
+
await ws.send(data)
|
|
374
|
+
await ws.send(json.dumps({"type": "CloseStream"}))
|
|
375
|
+
|
|
376
|
+
async def receive_transcripts() -> None:
|
|
377
|
+
try:
|
|
378
|
+
async for msg in ws:
|
|
379
|
+
data = json.loads(msg)
|
|
380
|
+
if data.get("type") == "Results":
|
|
381
|
+
channel = data.get("channel", {})
|
|
382
|
+
alternatives = channel.get("alternatives", [])
|
|
383
|
+
if alternatives:
|
|
384
|
+
transcript = alternatives[0].get("transcript", "")
|
|
385
|
+
is_final = data.get("is_final", False)
|
|
386
|
+
if transcript:
|
|
387
|
+
if is_final:
|
|
388
|
+
full_transcript.append(transcript)
|
|
389
|
+
print(transcript, flush=True)
|
|
390
|
+
elif interim:
|
|
391
|
+
print(f"\r{transcript}", end="", flush=True)
|
|
392
|
+
except Exception:
|
|
393
|
+
pass
|
|
394
|
+
|
|
395
|
+
await asyncio.gather(send_audio(), receive_transcripts())
|
|
396
|
+
|
|
397
|
+
return ListenResult(
|
|
398
|
+
status="success",
|
|
399
|
+
transcript=" ".join(full_transcript),
|
|
400
|
+
source="stdin",
|
|
401
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepctl-cmd-listen
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Listen (live speech-to-text) command for deepctl
|
|
5
|
+
Author-email: Deepgram <devrel@deepgram.com>
|
|
6
|
+
Maintainer-email: Deepgram <devrel@deepgram.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Keywords: deepgram,cli,stt,live,streaming,listen
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: deepctl-core>=0.1.10
|
|
18
|
+
Requires-Dist: click>=8.0.0
|
|
19
|
+
Requires-Dist: rich>=13.0.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0.0
|
|
21
|
+
Requires-Dist: websockets>=11.0
|
|
22
|
+
Provides-Extra: mic
|
|
23
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "mic"
|
|
24
|
+
Requires-Dist: numpy>=1.24.0; extra == "mic"
|
|
25
|
+
|
|
26
|
+
# deepctl-cmd-listen
|
|
27
|
+
|
|
28
|
+
> Part of [deepctl](https://github.com/deepgram/cli) — Official Deepgram CLI
|
|
29
|
+
|
|
30
|
+
Listen (live speech-to-text) command for deepctl
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
This package is included with deepctl and does not need to be installed separately.
|
|
35
|
+
|
|
36
|
+
### Install deepctl
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Install with pip
|
|
40
|
+
pip install deepctl
|
|
41
|
+
|
|
42
|
+
# Or install with uv
|
|
43
|
+
uv tool install deepctl
|
|
44
|
+
|
|
45
|
+
# Or install with pipx
|
|
46
|
+
pipx install deepctl
|
|
47
|
+
|
|
48
|
+
# Or run without installing
|
|
49
|
+
uvx deepctl --help
|
|
50
|
+
pipx run deepctl --help
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Commands
|
|
54
|
+
|
|
55
|
+
| Command | Entry Point |
|
|
56
|
+
|---------|-------------|
|
|
57
|
+
| `deepctl listen` | `deepctl_cmd_listen.command:ListenCommand` |
|
|
58
|
+
|
|
59
|
+
## Dependencies
|
|
60
|
+
|
|
61
|
+
- `click>=8.0.0`
|
|
62
|
+
- `rich>=13.0.0`
|
|
63
|
+
- `pydantic>=2.0.0`
|
|
64
|
+
- `websockets>=11.0`
|
|
65
|
+
|
|
66
|
+
## License
|
|
67
|
+
|
|
68
|
+
MIT — see [LICENSE](../../LICENSE)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/deepctl_cmd_listen/__init__.py
|
|
4
|
+
src/deepctl_cmd_listen/command.py
|
|
5
|
+
src/deepctl_cmd_listen/models.py
|
|
6
|
+
src/deepctl_cmd_listen.egg-info/PKG-INFO
|
|
7
|
+
src/deepctl_cmd_listen.egg-info/SOURCES.txt
|
|
8
|
+
src/deepctl_cmd_listen.egg-info/dependency_links.txt
|
|
9
|
+
src/deepctl_cmd_listen.egg-info/entry_points.txt
|
|
10
|
+
src/deepctl_cmd_listen.egg-info/requires.txt
|
|
11
|
+
src/deepctl_cmd_listen.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
deepctl_cmd_listen
|