agent-cli 0.2.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_cli-0.2.0 → agent_cli-0.3.1}/PKG-INFO +56 -33
- {agent_cli-0.2.0 → agent_cli-0.3.1}/README.md +55 -32
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/agents/_cli_options.py +10 -5
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/agents/autocorrect.py +22 -26
- agent_cli-0.3.1/agent_cli/agents/transcribe.py +220 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/agents/voice_assistant.py +31 -113
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/asr.py +8 -7
- agent_cli-0.3.1/agent_cli/llm.py +135 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/utils.py +62 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli.egg-info/PKG-INFO +56 -33
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli.egg-info/SOURCES.txt +2 -2
- agent_cli-0.2.0/tests/test_ollama_client.py → agent_cli-0.3.1/tests/test_llm.py +1 -1
- agent_cli-0.2.0/agent_cli/agents/transcribe.py +0 -131
- agent_cli-0.2.0/agent_cli/ollama_client.py +0 -24
- {agent_cli-0.2.0 → agent_cli-0.3.1}/LICENSE +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/__init__.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/agents/__init__.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/cli.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/config.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/process_manager.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/py.typed +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli.egg-info/dependency_links.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli.egg-info/entry_points.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli.egg-info/requires.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli.egg-info/top_level.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/pyproject.toml +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/setup.cfg +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/tests/test_process_manager.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.1}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
|
|
5
5
|
Author-email: Bas Nijholt <bas@nijho.lt>
|
|
6
6
|
Project-URL: Homepage, https://github.com/basnijholt/agent-cli
|
|
@@ -196,13 +196,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
|
|
|
196
196
|
│ [default: 192.168.1.143] │
|
|
197
197
|
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
198
198
|
│ [default: 10300] │
|
|
199
|
-
│ --
|
|
200
|
-
│
|
|
201
|
-
│ [default:
|
|
199
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
200
|
+
│ Default is devstral:24b. │
|
|
201
|
+
│ [default: devstral:24b] │
|
|
202
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
203
|
+
│ Default is │
|
|
204
|
+
│ http://localhost:11434. │
|
|
205
|
+
│ [default: │
|
|
206
|
+
│ http://localhost:11434] │
|
|
207
|
+
│ --llm --no-llm Use an LLM to process the │
|
|
208
|
+
│ transcript. │
|
|
209
|
+
│ [default: no-llm] │
|
|
202
210
|
│ --stop Stop any running │
|
|
203
211
|
│ background process. │
|
|
204
212
|
│ --status Check if a background │
|
|
205
213
|
│ process is running. │
|
|
214
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
215
|
+
│ [default: clipboard] │
|
|
206
216
|
│ --log-level TEXT Set logging level. │
|
|
207
217
|
│ [default: WARNING] │
|
|
208
218
|
│ --log-file TEXT Path to a file to write │
|
|
@@ -268,35 +278,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
|
|
|
268
278
|
voice-assistant --stop
|
|
269
279
|
|
|
270
280
|
╭─ Options ────────────────────────────────────────────────────────────────────╮
|
|
271
|
-
│ --device-index
|
|
272
|
-
│
|
|
273
|
-
│
|
|
274
|
-
│ --device-name
|
|
275
|
-
│
|
|
276
|
-
│
|
|
277
|
-
│
|
|
278
|
-
│
|
|
279
|
-
│
|
|
280
|
-
│
|
|
281
|
-
│
|
|
282
|
-
│
|
|
283
|
-
│
|
|
284
|
-
│
|
|
285
|
-
│
|
|
286
|
-
│
|
|
287
|
-
│
|
|
288
|
-
│
|
|
289
|
-
│
|
|
290
|
-
│
|
|
291
|
-
│
|
|
292
|
-
│
|
|
293
|
-
│ --
|
|
294
|
-
│
|
|
295
|
-
│
|
|
296
|
-
│
|
|
297
|
-
│
|
|
298
|
-
│ --
|
|
299
|
-
│
|
|
281
|
+
│ --device-index INTEGER Index of the PyAudio input │
|
|
282
|
+
│ device to use. │
|
|
283
|
+
│ [default: None] │
|
|
284
|
+
│ --device-name TEXT Device name keywords for │
|
|
285
|
+
│ partial matching. Supports │
|
|
286
|
+
│ comma-separated list where │
|
|
287
|
+
│ each term can partially │
|
|
288
|
+
│ match device names │
|
|
289
|
+
│ (case-insensitive). First │
|
|
290
|
+
│ matching device is │
|
|
291
|
+
│ selected. │
|
|
292
|
+
│ [default: None] │
|
|
293
|
+
│ --list-devices List available audio input │
|
|
294
|
+
│ devices and exit. │
|
|
295
|
+
│ --asr-server-ip TEXT Wyoming ASR server IP │
|
|
296
|
+
│ address. │
|
|
297
|
+
│ [default: 192.168.1.143] │
|
|
298
|
+
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
299
|
+
│ [default: 10300] │
|
|
300
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
301
|
+
│ Default is devstral:24b. │
|
|
302
|
+
│ [default: devstral:24b] │
|
|
303
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
304
|
+
│ Default is │
|
|
305
|
+
│ http://localhost:11434. │
|
|
306
|
+
│ [default: │
|
|
307
|
+
│ http://localhost:11434] │
|
|
308
|
+
│ --stop Stop any running │
|
|
309
|
+
│ background process. │
|
|
310
|
+
│ --status Check if a background │
|
|
311
|
+
│ process is running. │
|
|
312
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
313
|
+
│ [default: clipboard] │
|
|
314
|
+
│ --log-level TEXT Set logging level. │
|
|
315
|
+
│ [default: WARNING] │
|
|
316
|
+
│ --log-file TEXT Path to a file to write │
|
|
317
|
+
│ logs to. │
|
|
318
|
+
│ [default: None] │
|
|
319
|
+
│ --quiet -q Suppress console output │
|
|
320
|
+
│ from rich. │
|
|
321
|
+
│ --help Show this message and │
|
|
322
|
+
│ exit. │
|
|
300
323
|
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
301
324
|
|
|
302
325
|
```
|
|
@@ -166,13 +166,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
|
|
|
166
166
|
│ [default: 192.168.1.143] │
|
|
167
167
|
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
168
168
|
│ [default: 10300] │
|
|
169
|
-
│ --
|
|
170
|
-
│
|
|
171
|
-
│ [default:
|
|
169
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
170
|
+
│ Default is devstral:24b. │
|
|
171
|
+
│ [default: devstral:24b] │
|
|
172
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
173
|
+
│ Default is │
|
|
174
|
+
│ http://localhost:11434. │
|
|
175
|
+
│ [default: │
|
|
176
|
+
│ http://localhost:11434] │
|
|
177
|
+
│ --llm --no-llm Use an LLM to process the │
|
|
178
|
+
│ transcript. │
|
|
179
|
+
│ [default: no-llm] │
|
|
172
180
|
│ --stop Stop any running │
|
|
173
181
|
│ background process. │
|
|
174
182
|
│ --status Check if a background │
|
|
175
183
|
│ process is running. │
|
|
184
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
185
|
+
│ [default: clipboard] │
|
|
176
186
|
│ --log-level TEXT Set logging level. │
|
|
177
187
|
│ [default: WARNING] │
|
|
178
188
|
│ --log-file TEXT Path to a file to write │
|
|
@@ -238,35 +248,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
|
|
|
238
248
|
voice-assistant --stop
|
|
239
249
|
|
|
240
250
|
╭─ Options ────────────────────────────────────────────────────────────────────╮
|
|
241
|
-
│ --device-index
|
|
242
|
-
│
|
|
243
|
-
│
|
|
244
|
-
│ --device-name
|
|
245
|
-
│
|
|
246
|
-
│
|
|
247
|
-
│
|
|
248
|
-
│
|
|
249
|
-
│
|
|
250
|
-
│
|
|
251
|
-
│
|
|
252
|
-
│
|
|
253
|
-
│
|
|
254
|
-
│
|
|
255
|
-
│
|
|
256
|
-
│
|
|
257
|
-
│
|
|
258
|
-
│
|
|
259
|
-
│
|
|
260
|
-
│
|
|
261
|
-
│
|
|
262
|
-
│
|
|
263
|
-
│ --
|
|
264
|
-
│
|
|
265
|
-
│
|
|
266
|
-
│
|
|
267
|
-
│
|
|
268
|
-
│ --
|
|
269
|
-
│
|
|
251
|
+
│ --device-index INTEGER Index of the PyAudio input │
|
|
252
|
+
│ device to use. │
|
|
253
|
+
│ [default: None] │
|
|
254
|
+
│ --device-name TEXT Device name keywords for │
|
|
255
|
+
│ partial matching. Supports │
|
|
256
|
+
│ comma-separated list where │
|
|
257
|
+
│ each term can partially │
|
|
258
|
+
│ match device names │
|
|
259
|
+
│ (case-insensitive). First │
|
|
260
|
+
│ matching device is │
|
|
261
|
+
│ selected. │
|
|
262
|
+
│ [default: None] │
|
|
263
|
+
│ --list-devices List available audio input │
|
|
264
|
+
│ devices and exit. │
|
|
265
|
+
│ --asr-server-ip TEXT Wyoming ASR server IP │
|
|
266
|
+
│ address. │
|
|
267
|
+
│ [default: 192.168.1.143] │
|
|
268
|
+
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
269
|
+
│ [default: 10300] │
|
|
270
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
271
|
+
│ Default is devstral:24b. │
|
|
272
|
+
│ [default: devstral:24b] │
|
|
273
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
274
|
+
│ Default is │
|
|
275
|
+
│ http://localhost:11434. │
|
|
276
|
+
│ [default: │
|
|
277
|
+
│ http://localhost:11434] │
|
|
278
|
+
│ --stop Stop any running │
|
|
279
|
+
│ background process. │
|
|
280
|
+
│ --status Check if a background │
|
|
281
|
+
│ process is running. │
|
|
282
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
283
|
+
│ [default: clipboard] │
|
|
284
|
+
│ --log-level TEXT Set logging level. │
|
|
285
|
+
│ [default: WARNING] │
|
|
286
|
+
│ --log-file TEXT Path to a file to write │
|
|
287
|
+
│ logs to. │
|
|
288
|
+
│ [default: None] │
|
|
289
|
+
│ --quiet -q Suppress console output │
|
|
290
|
+
│ from rich. │
|
|
291
|
+
│ --help Show this message and │
|
|
292
|
+
│ exit. │
|
|
270
293
|
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
271
294
|
|
|
272
295
|
```
|
|
@@ -18,6 +18,11 @@ OLLAMA_HOST: str = typer.Option(
|
|
|
18
18
|
"--ollama-host",
|
|
19
19
|
help=f"The Ollama server host. Default is {config.OLLAMA_HOST}.",
|
|
20
20
|
)
|
|
21
|
+
LLM: bool = typer.Option(
|
|
22
|
+
False, # noqa: FBT003
|
|
23
|
+
"--llm/--no-llm",
|
|
24
|
+
help="Use an LLM to process the transcript.",
|
|
25
|
+
)
|
|
21
26
|
|
|
22
27
|
|
|
23
28
|
# --- ASR (Audio) Options ---
|
|
@@ -47,11 +52,6 @@ ASR_SERVER_PORT: int = typer.Option(
|
|
|
47
52
|
"--asr-server-port",
|
|
48
53
|
help="Wyoming ASR server port.",
|
|
49
54
|
)
|
|
50
|
-
CLIPBOARD: bool = typer.Option(
|
|
51
|
-
True, # noqa: FBT003
|
|
52
|
-
"--clipboard/--no-clipboard",
|
|
53
|
-
help="Copy transcript to clipboard.",
|
|
54
|
-
)
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
# --- Process Management Options ---
|
|
@@ -68,6 +68,11 @@ STATUS: bool = typer.Option(
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
# --- General Options ---
|
|
71
|
+
CLIPBOARD: bool = typer.Option(
|
|
72
|
+
True, # noqa: FBT003
|
|
73
|
+
"--clipboard/--no-clipboard",
|
|
74
|
+
help="Copy result to clipboard.",
|
|
75
|
+
)
|
|
71
76
|
LOG_LEVEL: str = typer.Option(
|
|
72
77
|
"WARNING",
|
|
73
78
|
"--log-level",
|
|
@@ -27,13 +27,18 @@ import typer
|
|
|
27
27
|
from openai import APIConnectionError
|
|
28
28
|
from pydantic_ai.exceptions import ModelHTTPError
|
|
29
29
|
from rich.console import Console
|
|
30
|
-
from rich.panel import Panel
|
|
31
30
|
from rich.status import Status
|
|
32
31
|
|
|
33
32
|
import agent_cli.agents._cli_options as opts
|
|
34
33
|
from agent_cli.cli import app, setup_logging
|
|
35
|
-
from agent_cli.
|
|
36
|
-
from agent_cli.utils import
|
|
34
|
+
from agent_cli.llm import build_agent
|
|
35
|
+
from agent_cli.utils import (
|
|
36
|
+
get_clipboard_text,
|
|
37
|
+
print_error_message,
|
|
38
|
+
print_input_panel,
|
|
39
|
+
print_output_panel,
|
|
40
|
+
print_status_message,
|
|
41
|
+
)
|
|
37
42
|
|
|
38
43
|
# --- Configuration ---
|
|
39
44
|
|
|
@@ -71,16 +76,7 @@ async def process_text(text: str, model: str, ollama_host: str) -> tuple[str, fl
|
|
|
71
76
|
|
|
72
77
|
def display_original_text(original_text: str, console: Console | None) -> None:
|
|
73
78
|
"""Render the original text panel in verbose mode."""
|
|
74
|
-
|
|
75
|
-
return
|
|
76
|
-
console.print(
|
|
77
|
-
Panel(
|
|
78
|
-
original_text,
|
|
79
|
-
title="[bold cyan]📋 Original Text[/bold cyan]",
|
|
80
|
-
border_style="cyan",
|
|
81
|
-
padding=(1, 2),
|
|
82
|
-
),
|
|
83
|
-
)
|
|
79
|
+
print_input_panel(console, original_text, title="📋 Original Text")
|
|
84
80
|
|
|
85
81
|
|
|
86
82
|
def _display_result(
|
|
@@ -101,16 +97,15 @@ def _display_result(
|
|
|
101
97
|
print(corrected_text)
|
|
102
98
|
else:
|
|
103
99
|
assert console is not None
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
padding=(1, 2),
|
|
110
|
-
),
|
|
100
|
+
print_output_panel(
|
|
101
|
+
console,
|
|
102
|
+
corrected_text,
|
|
103
|
+
title="✨ Corrected Text",
|
|
104
|
+
subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
|
|
111
105
|
)
|
|
112
|
-
|
|
113
|
-
|
|
106
|
+
print_status_message(
|
|
107
|
+
console,
|
|
108
|
+
"✅ Success! Corrected text has been copied to your clipboard.",
|
|
114
109
|
)
|
|
115
110
|
|
|
116
111
|
|
|
@@ -166,9 +161,10 @@ def autocorrect(
|
|
|
166
161
|
except (httpx.ConnectError, ModelHTTPError, APIConnectionError) as e:
|
|
167
162
|
if quiet:
|
|
168
163
|
print(f"❌ {e}")
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
164
|
+
else:
|
|
165
|
+
print_error_message(
|
|
166
|
+
console,
|
|
167
|
+
str(e),
|
|
168
|
+
f"Please check that your Ollama server is running at [bold cyan]{ollama_host}[/bold cyan]",
|
|
173
169
|
)
|
|
174
170
|
sys.exit(1)
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Wyoming ASR Client for streaming microphone audio to a transcription server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from contextlib import AbstractContextManager, nullcontext, suppress
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import pyperclip
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.live import Live
|
|
13
|
+
from rich.text import Text
|
|
14
|
+
|
|
15
|
+
import agent_cli.agents._cli_options as opts
|
|
16
|
+
from agent_cli import asr, process_manager
|
|
17
|
+
from agent_cli.cli import app, setup_logging
|
|
18
|
+
from agent_cli.llm import process_and_update_clipboard
|
|
19
|
+
from agent_cli.utils import (
|
|
20
|
+
print_device_index,
|
|
21
|
+
print_input_panel,
|
|
22
|
+
print_output_panel,
|
|
23
|
+
print_status_message,
|
|
24
|
+
signal_handling_context,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
import pyaudio
|
|
29
|
+
|
|
30
|
+
LOGGER = logging.getLogger()
|
|
31
|
+
|
|
32
|
+
SYSTEM_PROMPT = """
|
|
33
|
+
You are an AI transcription cleanup assistant. Your purpose is to improve and refine raw speech-to-text transcriptions by correcting errors, adding proper punctuation, and enhancing readability while preserving the original meaning and intent.
|
|
34
|
+
|
|
35
|
+
Your tasks include:
|
|
36
|
+
- Correcting obvious speech recognition errors and mishearing
|
|
37
|
+
- Adding appropriate punctuation (periods, commas, question marks, etc.)
|
|
38
|
+
- Fixing capitalization where needed
|
|
39
|
+
- Removing filler words, false starts, and repeated words when they clearly weren't intentional
|
|
40
|
+
- Improving sentence structure and flow while maintaining the speaker's voice and meaning
|
|
41
|
+
- Formatting the text for better readability
|
|
42
|
+
|
|
43
|
+
Important rules:
|
|
44
|
+
- Do not change the core meaning or content of the transcription
|
|
45
|
+
- Do not add information that wasn't spoken
|
|
46
|
+
- Do not remove content unless it's clearly an error or filler
|
|
47
|
+
- Return ONLY the cleaned-up text without any explanations or commentary
|
|
48
|
+
- Do not wrap your output in markdown or code blocks
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
AGENT_INSTRUCTIONS = """
|
|
52
|
+
You will be given a block of raw transcribed text enclosed in <original-text> tags, and a cleanup instruction enclosed in <instruction> tags.
|
|
53
|
+
|
|
54
|
+
Your job is to process the transcribed text according to the instruction, which will typically involve:
|
|
55
|
+
- Correcting speech recognition errors
|
|
56
|
+
- Adding proper punctuation and capitalization
|
|
57
|
+
- Removing obvious filler words and false starts
|
|
58
|
+
- Improving readability while preserving meaning
|
|
59
|
+
|
|
60
|
+
Return ONLY the cleaned-up text with no additional formatting or commentary.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
INSTRUCTION = """
|
|
64
|
+
Please clean up this transcribed text by correcting any speech recognition errors, adding appropriate punctuation and capitalization, removing obvious filler words or false starts, and improving overall readability while preserving the original meaning and intent of the speaker.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
async def async_main(
|
|
69
|
+
*,
|
|
70
|
+
device_index: int | None,
|
|
71
|
+
asr_server_ip: str,
|
|
72
|
+
asr_server_port: int,
|
|
73
|
+
clipboard: bool,
|
|
74
|
+
quiet: bool,
|
|
75
|
+
model: str,
|
|
76
|
+
ollama_host: str,
|
|
77
|
+
llm: bool,
|
|
78
|
+
console: Console | None,
|
|
79
|
+
p: pyaudio.PyAudio,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Async entry point, consuming parsed args."""
|
|
82
|
+
with (
|
|
83
|
+
signal_handling_context(console, LOGGER) as stop_event,
|
|
84
|
+
_maybe_live(console) as live,
|
|
85
|
+
):
|
|
86
|
+
transcript = await asr.transcribe_audio(
|
|
87
|
+
asr_server_ip=asr_server_ip,
|
|
88
|
+
asr_server_port=asr_server_port,
|
|
89
|
+
device_index=device_index,
|
|
90
|
+
logger=LOGGER,
|
|
91
|
+
p=p,
|
|
92
|
+
stop_event=stop_event,
|
|
93
|
+
console=console,
|
|
94
|
+
live=live,
|
|
95
|
+
listening_message="Listening...",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if llm and model and ollama_host and transcript:
|
|
99
|
+
print_input_panel(console, transcript, title="📝 Raw Transcript")
|
|
100
|
+
await process_and_update_clipboard(
|
|
101
|
+
system_prompt=SYSTEM_PROMPT,
|
|
102
|
+
agent_instructions=AGENT_INSTRUCTIONS,
|
|
103
|
+
model=model,
|
|
104
|
+
ollama_host=ollama_host,
|
|
105
|
+
logger=LOGGER,
|
|
106
|
+
console=console,
|
|
107
|
+
original_text=transcript,
|
|
108
|
+
instruction=INSTRUCTION,
|
|
109
|
+
clipboard=clipboard,
|
|
110
|
+
)
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# When not using LLM, show transcript in output panel for consistency
|
|
114
|
+
if transcript:
|
|
115
|
+
if quiet:
|
|
116
|
+
# Quiet mode: print result to stdout for Keyboard Maestro to capture
|
|
117
|
+
print(transcript)
|
|
118
|
+
else:
|
|
119
|
+
print_output_panel(
|
|
120
|
+
console,
|
|
121
|
+
transcript,
|
|
122
|
+
title="📝 Transcript",
|
|
123
|
+
subtitle="[dim]Copied to clipboard[/dim]" if clipboard else None,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if clipboard:
|
|
127
|
+
pyperclip.copy(transcript)
|
|
128
|
+
LOGGER.info("Copied transcript to clipboard.")
|
|
129
|
+
else:
|
|
130
|
+
LOGGER.info("Clipboard copy disabled.")
|
|
131
|
+
else:
|
|
132
|
+
LOGGER.info("Transcript empty.")
|
|
133
|
+
if not quiet:
|
|
134
|
+
print_status_message(console, "⚠️ No transcript captured.", style="yellow")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _maybe_live(console: Console | None) -> AbstractContextManager[Live | None]:
|
|
138
|
+
if console:
|
|
139
|
+
return Live(
|
|
140
|
+
Text("Transcribing...", style="blue"),
|
|
141
|
+
console=console,
|
|
142
|
+
transient=True,
|
|
143
|
+
)
|
|
144
|
+
return nullcontext()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@app.command("transcribe")
|
|
148
|
+
def transcribe(
|
|
149
|
+
*,
|
|
150
|
+
device_index: int | None = opts.DEVICE_INDEX,
|
|
151
|
+
device_name: str | None = opts.DEVICE_NAME,
|
|
152
|
+
# ASR
|
|
153
|
+
list_devices: bool = opts.LIST_DEVICES,
|
|
154
|
+
asr_server_ip: str = opts.ASR_SERVER_IP,
|
|
155
|
+
asr_server_port: int = opts.ASR_SERVER_PORT,
|
|
156
|
+
# LLM
|
|
157
|
+
model: str = opts.MODEL,
|
|
158
|
+
ollama_host: str = opts.OLLAMA_HOST,
|
|
159
|
+
llm: bool = opts.LLM,
|
|
160
|
+
# Process control
|
|
161
|
+
stop: bool = opts.STOP,
|
|
162
|
+
status: bool = opts.STATUS,
|
|
163
|
+
# General
|
|
164
|
+
clipboard: bool = opts.CLIPBOARD,
|
|
165
|
+
log_level: str = opts.LOG_LEVEL,
|
|
166
|
+
log_file: str | None = opts.LOG_FILE,
|
|
167
|
+
quiet: bool = opts.QUIET,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Wyoming ASR Client for streaming microphone audio to a transcription server.
|
|
170
|
+
|
|
171
|
+
Usage:
|
|
172
|
+
- Run in foreground: agent-cli transcribe --device-index 1
|
|
173
|
+
- Run in background: agent-cli transcribe --device-index 1 &
|
|
174
|
+
- Check status: agent-cli transcribe --status
|
|
175
|
+
- Stop background process: agent-cli transcribe --stop
|
|
176
|
+
"""
|
|
177
|
+
setup_logging(log_level, log_file, quiet=quiet)
|
|
178
|
+
console = Console() if not quiet else None
|
|
179
|
+
process_name = "transcribe"
|
|
180
|
+
|
|
181
|
+
if stop:
|
|
182
|
+
if process_manager.kill_process(process_name):
|
|
183
|
+
print_status_message(console, "✅ Transcribe stopped.")
|
|
184
|
+
else:
|
|
185
|
+
print_status_message(console, "⚠️ No transcribe is running.", style="yellow")
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
if status:
|
|
189
|
+
if process_manager.is_process_running(process_name):
|
|
190
|
+
pid = process_manager.read_pid_file(process_name)
|
|
191
|
+
print_status_message(console, f"✅ Transcribe is running (PID: {pid}).")
|
|
192
|
+
else:
|
|
193
|
+
print_status_message(console, "⚠️ Transcribe is not running.", style="yellow")
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
console = Console() if not quiet else None
|
|
197
|
+
|
|
198
|
+
with asr.pyaudio_context() as p:
|
|
199
|
+
if list_devices:
|
|
200
|
+
asr.list_input_devices(p, console)
|
|
201
|
+
return
|
|
202
|
+
device_index, device_name = asr.input_device(p, device_name, device_index)
|
|
203
|
+
print_device_index(console, device_index, device_name)
|
|
204
|
+
|
|
205
|
+
# Use context manager for PID file management
|
|
206
|
+
with process_manager.pid_file_context(process_name), suppress(KeyboardInterrupt):
|
|
207
|
+
asyncio.run(
|
|
208
|
+
async_main(
|
|
209
|
+
device_index=device_index,
|
|
210
|
+
asr_server_ip=asr_server_ip,
|
|
211
|
+
asr_server_port=asr_server_port,
|
|
212
|
+
clipboard=clipboard,
|
|
213
|
+
quiet=quiet,
|
|
214
|
+
model=model,
|
|
215
|
+
ollama_host=ollama_host,
|
|
216
|
+
llm=llm,
|
|
217
|
+
console=console,
|
|
218
|
+
p=p,
|
|
219
|
+
),
|
|
220
|
+
)
|