agent-cli 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_cli-0.2.0 → agent_cli-0.3.0}/PKG-INFO +56 -33
- {agent_cli-0.2.0 → agent_cli-0.3.0}/README.md +55 -32
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/_cli_options.py +10 -5
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/autocorrect.py +1 -1
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/transcribe.py +77 -10
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/voice_assistant.py +16 -99
- agent_cli-0.3.0/agent_cli/llm.py +141 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/PKG-INFO +56 -33
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/SOURCES.txt +2 -2
- agent_cli-0.2.0/tests/test_ollama_client.py → agent_cli-0.3.0/tests/test_llm.py +1 -1
- agent_cli-0.2.0/agent_cli/ollama_client.py +0 -24
- {agent_cli-0.2.0 → agent_cli-0.3.0}/LICENSE +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/__init__.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/__init__.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/asr.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/cli.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/config.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/process_manager.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/py.typed +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/utils.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/dependency_links.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/entry_points.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/requires.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/top_level.txt +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/pyproject.toml +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/setup.cfg +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/tests/test_process_manager.py +0 -0
- {agent_cli-0.2.0 → agent_cli-0.3.0}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
|
|
5
5
|
Author-email: Bas Nijholt <bas@nijho.lt>
|
|
6
6
|
Project-URL: Homepage, https://github.com/basnijholt/agent-cli
|
|
@@ -196,13 +196,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
|
|
|
196
196
|
│ [default: 192.168.1.143] │
|
|
197
197
|
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
198
198
|
│ [default: 10300] │
|
|
199
|
-
│ --
|
|
200
|
-
│
|
|
201
|
-
│ [default:
|
|
199
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
200
|
+
│ Default is devstral:24b. │
|
|
201
|
+
│ [default: devstral:24b] │
|
|
202
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
203
|
+
│ Default is │
|
|
204
|
+
│ http://localhost:11434. │
|
|
205
|
+
│ [default: │
|
|
206
|
+
│ http://localhost:11434] │
|
|
207
|
+
│ --llm --no-llm Use an LLM to process the │
|
|
208
|
+
│ transcript. │
|
|
209
|
+
│ [default: no-llm] │
|
|
202
210
|
│ --stop Stop any running │
|
|
203
211
|
│ background process. │
|
|
204
212
|
│ --status Check if a background │
|
|
205
213
|
│ process is running. │
|
|
214
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
215
|
+
│ [default: clipboard] │
|
|
206
216
|
│ --log-level TEXT Set logging level. │
|
|
207
217
|
│ [default: WARNING] │
|
|
208
218
|
│ --log-file TEXT Path to a file to write │
|
|
@@ -268,35 +278,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
|
|
|
268
278
|
voice-assistant --stop
|
|
269
279
|
|
|
270
280
|
╭─ Options ────────────────────────────────────────────────────────────────────╮
|
|
271
|
-
│ --device-index
|
|
272
|
-
│
|
|
273
|
-
│
|
|
274
|
-
│ --device-name
|
|
275
|
-
│
|
|
276
|
-
│
|
|
277
|
-
│
|
|
278
|
-
│
|
|
279
|
-
│
|
|
280
|
-
│
|
|
281
|
-
│
|
|
282
|
-
│
|
|
283
|
-
│
|
|
284
|
-
│
|
|
285
|
-
│
|
|
286
|
-
│
|
|
287
|
-
│
|
|
288
|
-
│
|
|
289
|
-
│
|
|
290
|
-
│
|
|
291
|
-
│
|
|
292
|
-
│
|
|
293
|
-
│ --
|
|
294
|
-
│
|
|
295
|
-
│
|
|
296
|
-
│
|
|
297
|
-
│
|
|
298
|
-
│ --
|
|
299
|
-
│
|
|
281
|
+
│ --device-index INTEGER Index of the PyAudio input │
|
|
282
|
+
│ device to use. │
|
|
283
|
+
│ [default: None] │
|
|
284
|
+
│ --device-name TEXT Device name keywords for │
|
|
285
|
+
│ partial matching. Supports │
|
|
286
|
+
│ comma-separated list where │
|
|
287
|
+
│ each term can partially │
|
|
288
|
+
│ match device names │
|
|
289
|
+
│ (case-insensitive). First │
|
|
290
|
+
│ matching device is │
|
|
291
|
+
│ selected. │
|
|
292
|
+
│ [default: None] │
|
|
293
|
+
│ --list-devices List available audio input │
|
|
294
|
+
│ devices and exit. │
|
|
295
|
+
│ --asr-server-ip TEXT Wyoming ASR server IP │
|
|
296
|
+
│ address. │
|
|
297
|
+
│ [default: 192.168.1.143] │
|
|
298
|
+
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
299
|
+
│ [default: 10300] │
|
|
300
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
301
|
+
│ Default is devstral:24b. │
|
|
302
|
+
│ [default: devstral:24b] │
|
|
303
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
304
|
+
│ Default is │
|
|
305
|
+
│ http://localhost:11434. │
|
|
306
|
+
│ [default: │
|
|
307
|
+
│ http://localhost:11434] │
|
|
308
|
+
│ --stop Stop any running │
|
|
309
|
+
│ background process. │
|
|
310
|
+
│ --status Check if a background │
|
|
311
|
+
│ process is running. │
|
|
312
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
313
|
+
│ [default: clipboard] │
|
|
314
|
+
│ --log-level TEXT Set logging level. │
|
|
315
|
+
│ [default: WARNING] │
|
|
316
|
+
│ --log-file TEXT Path to a file to write │
|
|
317
|
+
│ logs to. │
|
|
318
|
+
│ [default: None] │
|
|
319
|
+
│ --quiet -q Suppress console output │
|
|
320
|
+
│ from rich. │
|
|
321
|
+
│ --help Show this message and │
|
|
322
|
+
│ exit. │
|
|
300
323
|
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
301
324
|
|
|
302
325
|
```
|
|
@@ -166,13 +166,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
|
|
|
166
166
|
│ [default: 192.168.1.143] │
|
|
167
167
|
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
168
168
|
│ [default: 10300] │
|
|
169
|
-
│ --
|
|
170
|
-
│
|
|
171
|
-
│ [default:
|
|
169
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
170
|
+
│ Default is devstral:24b. │
|
|
171
|
+
│ [default: devstral:24b] │
|
|
172
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
173
|
+
│ Default is │
|
|
174
|
+
│ http://localhost:11434. │
|
|
175
|
+
│ [default: │
|
|
176
|
+
│ http://localhost:11434] │
|
|
177
|
+
│ --llm --no-llm Use an LLM to process the │
|
|
178
|
+
│ transcript. │
|
|
179
|
+
│ [default: no-llm] │
|
|
172
180
|
│ --stop Stop any running │
|
|
173
181
|
│ background process. │
|
|
174
182
|
│ --status Check if a background │
|
|
175
183
|
│ process is running. │
|
|
184
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
185
|
+
│ [default: clipboard] │
|
|
176
186
|
│ --log-level TEXT Set logging level. │
|
|
177
187
|
│ [default: WARNING] │
|
|
178
188
|
│ --log-file TEXT Path to a file to write │
|
|
@@ -238,35 +248,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
|
|
|
238
248
|
voice-assistant --stop
|
|
239
249
|
|
|
240
250
|
╭─ Options ────────────────────────────────────────────────────────────────────╮
|
|
241
|
-
│ --device-index
|
|
242
|
-
│
|
|
243
|
-
│
|
|
244
|
-
│ --device-name
|
|
245
|
-
│
|
|
246
|
-
│
|
|
247
|
-
│
|
|
248
|
-
│
|
|
249
|
-
│
|
|
250
|
-
│
|
|
251
|
-
│
|
|
252
|
-
│
|
|
253
|
-
│
|
|
254
|
-
│
|
|
255
|
-
│
|
|
256
|
-
│
|
|
257
|
-
│
|
|
258
|
-
│
|
|
259
|
-
│
|
|
260
|
-
│
|
|
261
|
-
│
|
|
262
|
-
│
|
|
263
|
-
│ --
|
|
264
|
-
│
|
|
265
|
-
│
|
|
266
|
-
│
|
|
267
|
-
│
|
|
268
|
-
│ --
|
|
269
|
-
│
|
|
251
|
+
│ --device-index INTEGER Index of the PyAudio input │
|
|
252
|
+
│ device to use. │
|
|
253
|
+
│ [default: None] │
|
|
254
|
+
│ --device-name TEXT Device name keywords for │
|
|
255
|
+
│ partial matching. Supports │
|
|
256
|
+
│ comma-separated list where │
|
|
257
|
+
│ each term can partially │
|
|
258
|
+
│ match device names │
|
|
259
|
+
│ (case-insensitive). First │
|
|
260
|
+
│ matching device is │
|
|
261
|
+
│ selected. │
|
|
262
|
+
│ [default: None] │
|
|
263
|
+
│ --list-devices List available audio input │
|
|
264
|
+
│ devices and exit. │
|
|
265
|
+
│ --asr-server-ip TEXT Wyoming ASR server IP │
|
|
266
|
+
│ address. │
|
|
267
|
+
│ [default: 192.168.1.143] │
|
|
268
|
+
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
269
|
+
│ [default: 10300] │
|
|
270
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
271
|
+
│ Default is devstral:24b. │
|
|
272
|
+
│ [default: devstral:24b] │
|
|
273
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
274
|
+
│ Default is │
|
|
275
|
+
│ http://localhost:11434. │
|
|
276
|
+
│ [default: │
|
|
277
|
+
│ http://localhost:11434] │
|
|
278
|
+
│ --stop Stop any running │
|
|
279
|
+
│ background process. │
|
|
280
|
+
│ --status Check if a background │
|
|
281
|
+
│ process is running. │
|
|
282
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
283
|
+
│ [default: clipboard] │
|
|
284
|
+
│ --log-level TEXT Set logging level. │
|
|
285
|
+
│ [default: WARNING] │
|
|
286
|
+
│ --log-file TEXT Path to a file to write │
|
|
287
|
+
│ logs to. │
|
|
288
|
+
│ [default: None] │
|
|
289
|
+
│ --quiet -q Suppress console output │
|
|
290
|
+
│ from rich. │
|
|
291
|
+
│ --help Show this message and │
|
|
292
|
+
│ exit. │
|
|
270
293
|
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
271
294
|
|
|
272
295
|
```
|
|
@@ -18,6 +18,11 @@ OLLAMA_HOST: str = typer.Option(
|
|
|
18
18
|
"--ollama-host",
|
|
19
19
|
help=f"The Ollama server host. Default is {config.OLLAMA_HOST}.",
|
|
20
20
|
)
|
|
21
|
+
LLM: bool = typer.Option(
|
|
22
|
+
False, # noqa: FBT003
|
|
23
|
+
"--llm/--no-llm",
|
|
24
|
+
help="Use an LLM to process the transcript.",
|
|
25
|
+
)
|
|
21
26
|
|
|
22
27
|
|
|
23
28
|
# --- ASR (Audio) Options ---
|
|
@@ -47,11 +52,6 @@ ASR_SERVER_PORT: int = typer.Option(
|
|
|
47
52
|
"--asr-server-port",
|
|
48
53
|
help="Wyoming ASR server port.",
|
|
49
54
|
)
|
|
50
|
-
CLIPBOARD: bool = typer.Option(
|
|
51
|
-
True, # noqa: FBT003
|
|
52
|
-
"--clipboard/--no-clipboard",
|
|
53
|
-
help="Copy transcript to clipboard.",
|
|
54
|
-
)
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
# --- Process Management Options ---
|
|
@@ -68,6 +68,11 @@ STATUS: bool = typer.Option(
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
# --- General Options ---
|
|
71
|
+
CLIPBOARD: bool = typer.Option(
|
|
72
|
+
True, # noqa: FBT003
|
|
73
|
+
"--clipboard/--no-clipboard",
|
|
74
|
+
help="Copy result to clipboard.",
|
|
75
|
+
)
|
|
71
76
|
LOG_LEVEL: str = typer.Option(
|
|
72
77
|
"WARNING",
|
|
73
78
|
"--log-level",
|
|
@@ -32,7 +32,7 @@ from rich.status import Status
|
|
|
32
32
|
|
|
33
33
|
import agent_cli.agents._cli_options as opts
|
|
34
34
|
from agent_cli.cli import app, setup_logging
|
|
35
|
-
from agent_cli.
|
|
35
|
+
from agent_cli.llm import build_agent
|
|
36
36
|
from agent_cli.utils import get_clipboard_text
|
|
37
37
|
|
|
38
38
|
# --- Configuration ---
|
|
@@ -9,23 +9,63 @@ from contextlib import AbstractContextManager, nullcontext, suppress
|
|
|
9
9
|
import pyperclip
|
|
10
10
|
from rich.console import Console
|
|
11
11
|
from rich.live import Live
|
|
12
|
+
from rich.panel import Panel
|
|
12
13
|
from rich.text import Text
|
|
13
14
|
|
|
14
15
|
import agent_cli.agents._cli_options as opts
|
|
15
16
|
from agent_cli import asr, process_manager
|
|
16
17
|
from agent_cli.cli import app, setup_logging
|
|
18
|
+
from agent_cli.llm import process_and_update_clipboard
|
|
17
19
|
from agent_cli.utils import _print, print_device_index, signal_handling_context
|
|
18
20
|
|
|
21
|
+
SYSTEM_PROMPT = """
|
|
22
|
+
You are an AI transcription cleanup assistant. Your purpose is to improve and refine raw speech-to-text transcriptions by correcting errors, adding proper punctuation, and enhancing readability while preserving the original meaning and intent.
|
|
23
|
+
|
|
24
|
+
Your tasks include:
|
|
25
|
+
- Correcting obvious speech recognition errors and mishearing
|
|
26
|
+
- Adding appropriate punctuation (periods, commas, question marks, etc.)
|
|
27
|
+
- Fixing capitalization where needed
|
|
28
|
+
- Removing filler words, false starts, and repeated words when they clearly weren't intentional
|
|
29
|
+
- Improving sentence structure and flow while maintaining the speaker's voice and meaning
|
|
30
|
+
- Formatting the text for better readability
|
|
31
|
+
|
|
32
|
+
Important rules:
|
|
33
|
+
- Do not change the core meaning or content of the transcription
|
|
34
|
+
- Do not add information that wasn't spoken
|
|
35
|
+
- Do not remove content unless it's clearly an error or filler
|
|
36
|
+
- Return ONLY the cleaned-up text without any explanations or commentary
|
|
37
|
+
- Do not wrap your output in markdown or code blocks
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
AGENT_INSTRUCTIONS = """
|
|
41
|
+
You will be given a block of raw transcribed text enclosed in <original-text> tags, and a cleanup instruction enclosed in <instruction> tags.
|
|
42
|
+
|
|
43
|
+
Your job is to process the transcribed text according to the instruction, which will typically involve:
|
|
44
|
+
- Correcting speech recognition errors
|
|
45
|
+
- Adding proper punctuation and capitalization
|
|
46
|
+
- Removing obvious filler words and false starts
|
|
47
|
+
- Improving readability while preserving meaning
|
|
48
|
+
|
|
49
|
+
Return ONLY the cleaned-up text with no additional formatting or commentary.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
INSTRUCTION = """
|
|
53
|
+
Please clean up this transcribed text by correcting any speech recognition errors, adding appropriate punctuation and capitalization, removing obvious filler words or false starts, and improving overall readability while preserving the original meaning and intent of the speaker.
|
|
54
|
+
"""
|
|
55
|
+
|
|
19
56
|
|
|
20
57
|
async def async_main(
|
|
58
|
+
*,
|
|
21
59
|
device_index: int | None,
|
|
22
60
|
device_name: str | None,
|
|
23
61
|
asr_server_ip: str,
|
|
24
62
|
asr_server_port: int,
|
|
25
|
-
*,
|
|
26
63
|
clipboard: bool,
|
|
27
64
|
quiet: bool,
|
|
28
65
|
list_devices: bool,
|
|
66
|
+
model: str,
|
|
67
|
+
ollama_host: str,
|
|
68
|
+
llm: bool,
|
|
29
69
|
) -> None:
|
|
30
70
|
"""Async entry point, consuming parsed args."""
|
|
31
71
|
logger = logging.getLogger()
|
|
@@ -54,14 +94,31 @@ async def async_main(
|
|
|
54
94
|
listening_message="Listening...",
|
|
55
95
|
)
|
|
56
96
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
logger
|
|
97
|
+
if llm and model and ollama_host and transcript:
|
|
98
|
+
_print(console, Panel(transcript, title="[cyan]Raw Transcript 📝[/cyan]"))
|
|
99
|
+
await process_and_update_clipboard(
|
|
100
|
+
system_prompt=SYSTEM_PROMPT,
|
|
101
|
+
agent_instructions=AGENT_INSTRUCTIONS,
|
|
102
|
+
model=model,
|
|
103
|
+
ollama_host=ollama_host,
|
|
104
|
+
logger=logger,
|
|
105
|
+
console=console,
|
|
106
|
+
original_text=transcript,
|
|
107
|
+
instruction=INSTRUCTION,
|
|
108
|
+
clipboard=clipboard,
|
|
109
|
+
)
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
if transcript and clipboard:
|
|
113
|
+
pyperclip.copy(transcript)
|
|
114
|
+
logger.info("Copied transcript to clipboard.")
|
|
115
|
+
_print(console, "[italic blue]Copied to clipboard.[/italic blue]")
|
|
116
|
+
elif not transcript:
|
|
117
|
+
logger.info("Transcript empty.")
|
|
118
|
+
else:
|
|
119
|
+
logger.info("Clipboard copy disabled.")
|
|
120
|
+
if not quiet:
|
|
121
|
+
_print(console, f"[italic green]Transcript: {transcript}[/italic green]")
|
|
65
122
|
|
|
66
123
|
|
|
67
124
|
def _maybe_live(console: Console | None) -> AbstractContextManager[Live | None]:
|
|
@@ -79,12 +136,19 @@ def transcribe(
|
|
|
79
136
|
*,
|
|
80
137
|
device_index: int | None = opts.DEVICE_INDEX,
|
|
81
138
|
device_name: str | None = opts.DEVICE_NAME,
|
|
139
|
+
# ASR
|
|
82
140
|
list_devices: bool = opts.LIST_DEVICES,
|
|
83
141
|
asr_server_ip: str = opts.ASR_SERVER_IP,
|
|
84
142
|
asr_server_port: int = opts.ASR_SERVER_PORT,
|
|
85
|
-
|
|
143
|
+
# LLM
|
|
144
|
+
model: str = opts.MODEL,
|
|
145
|
+
ollama_host: str = opts.OLLAMA_HOST,
|
|
146
|
+
llm: bool = opts.LLM,
|
|
147
|
+
# Process control
|
|
86
148
|
stop: bool = opts.STOP,
|
|
87
149
|
status: bool = opts.STATUS,
|
|
150
|
+
# General
|
|
151
|
+
clipboard: bool = opts.CLIPBOARD,
|
|
88
152
|
log_level: str = opts.LOG_LEVEL,
|
|
89
153
|
log_file: str | None = opts.LOG_FILE,
|
|
90
154
|
quiet: bool = opts.QUIET,
|
|
@@ -127,5 +191,8 @@ def transcribe(
|
|
|
127
191
|
clipboard=clipboard,
|
|
128
192
|
quiet=quiet,
|
|
129
193
|
list_devices=list_devices,
|
|
194
|
+
model=model,
|
|
195
|
+
ollama_host=ollama_host,
|
|
196
|
+
llm=llm,
|
|
130
197
|
),
|
|
131
198
|
)
|
|
@@ -37,20 +37,15 @@ from __future__ import annotations
|
|
|
37
37
|
|
|
38
38
|
import asyncio
|
|
39
39
|
import logging
|
|
40
|
-
import
|
|
41
|
-
import time
|
|
42
|
-
from contextlib import AbstractContextManager, nullcontext, suppress
|
|
43
|
-
from typing import TYPE_CHECKING
|
|
40
|
+
from contextlib import suppress
|
|
44
41
|
|
|
45
|
-
import pyperclip
|
|
46
42
|
from rich.console import Console
|
|
47
43
|
from rich.panel import Panel
|
|
48
|
-
from rich.status import Status
|
|
49
44
|
|
|
50
45
|
import agent_cli.agents._cli_options as opts
|
|
51
46
|
from agent_cli import asr, process_manager
|
|
52
47
|
from agent_cli.cli import app, setup_logging
|
|
53
|
-
from agent_cli.
|
|
48
|
+
from agent_cli.llm import process_and_update_clipboard
|
|
54
49
|
from agent_cli.utils import (
|
|
55
50
|
_print,
|
|
56
51
|
get_clipboard_text,
|
|
@@ -58,10 +53,6 @@ from agent_cli.utils import (
|
|
|
58
53
|
signal_handling_context,
|
|
59
54
|
)
|
|
60
55
|
|
|
61
|
-
if TYPE_CHECKING:
|
|
62
|
-
from pydantic_ai import Agent
|
|
63
|
-
|
|
64
|
-
|
|
65
56
|
# LLM Prompts
|
|
66
57
|
SYSTEM_PROMPT = """\
|
|
67
58
|
You are a versatile AI text assistant. Your purpose is to either **modify** a given text or **answer questions** about it, based on a specific instruction.
|
|
@@ -86,93 +77,6 @@ Return ONLY the resulting text (either the edit or the answer), with no extra fo
|
|
|
86
77
|
"""
|
|
87
78
|
|
|
88
79
|
|
|
89
|
-
# --- LLM (Editing) Logic ---
|
|
90
|
-
|
|
91
|
-
INPUT_TEMPLATE = """
|
|
92
|
-
<original-text>
|
|
93
|
-
{original_text}
|
|
94
|
-
</original-text>
|
|
95
|
-
|
|
96
|
-
<instruction>
|
|
97
|
-
{instruction}
|
|
98
|
-
</instruction>
|
|
99
|
-
"""
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
async def process_with_llm(
|
|
103
|
-
agent: Agent,
|
|
104
|
-
original_text: str,
|
|
105
|
-
instruction: str,
|
|
106
|
-
) -> tuple[str, float]:
|
|
107
|
-
"""Run the agent asynchronously and return corrected text and elapsed time."""
|
|
108
|
-
user_input = INPUT_TEMPLATE.format(original_text=original_text, instruction=instruction)
|
|
109
|
-
t_start = time.monotonic()
|
|
110
|
-
result = await agent.run(user_input)
|
|
111
|
-
t_end = time.monotonic()
|
|
112
|
-
return result.output, t_end - t_start
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def _maybe_status(console: Console | None, model: str) -> AbstractContextManager[Status | None]:
|
|
116
|
-
"""Context manager for status display."""
|
|
117
|
-
if console:
|
|
118
|
-
return Status(
|
|
119
|
-
f"[bold yellow]🤖 Applying instruction with {model}...[/bold yellow]",
|
|
120
|
-
console=console,
|
|
121
|
-
)
|
|
122
|
-
return nullcontext()
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
async def process_and_update_clipboard(
|
|
126
|
-
model: str,
|
|
127
|
-
ollama_host: str,
|
|
128
|
-
logger: logging.Logger,
|
|
129
|
-
console: Console | None,
|
|
130
|
-
original_text: str,
|
|
131
|
-
instruction: str,
|
|
132
|
-
) -> None:
|
|
133
|
-
"""Processes the text with the LLM, updates the clipboard, and displays the result.
|
|
134
|
-
|
|
135
|
-
In quiet mode, only the result is printed to stdout.
|
|
136
|
-
"""
|
|
137
|
-
agent = build_agent(
|
|
138
|
-
model=model,
|
|
139
|
-
ollama_host=ollama_host,
|
|
140
|
-
system_prompt=SYSTEM_PROMPT,
|
|
141
|
-
instructions=AGENT_INSTRUCTIONS,
|
|
142
|
-
)
|
|
143
|
-
try:
|
|
144
|
-
with _maybe_status(console, model):
|
|
145
|
-
result_text, elapsed = await process_with_llm(agent, original_text, instruction)
|
|
146
|
-
|
|
147
|
-
pyperclip.copy(result_text)
|
|
148
|
-
logger.info("Copied result to clipboard.")
|
|
149
|
-
|
|
150
|
-
if console:
|
|
151
|
-
console.print(
|
|
152
|
-
Panel(
|
|
153
|
-
result_text,
|
|
154
|
-
title="[bold green]✨ Result (Copied to Clipboard)[/bold green]",
|
|
155
|
-
border_style="green",
|
|
156
|
-
subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
|
|
157
|
-
),
|
|
158
|
-
)
|
|
159
|
-
else:
|
|
160
|
-
# Quiet mode: print result to stdout for Keyboard Maestro to capture
|
|
161
|
-
print(result_text)
|
|
162
|
-
|
|
163
|
-
except Exception as e:
|
|
164
|
-
logger.exception("An error occurred during LLM processing.")
|
|
165
|
-
_print(
|
|
166
|
-
console,
|
|
167
|
-
f"❌ [bold red]An unexpected LLM error occurred: {e}[/bold red]",
|
|
168
|
-
)
|
|
169
|
-
_print(
|
|
170
|
-
console,
|
|
171
|
-
f" Please check your Ollama server at [cyan]{ollama_host}[/cyan]",
|
|
172
|
-
)
|
|
173
|
-
sys.exit(1)
|
|
174
|
-
|
|
175
|
-
|
|
176
80
|
# --- Main Application Logic ---
|
|
177
81
|
|
|
178
82
|
|
|
@@ -186,6 +90,7 @@ async def async_main(
|
|
|
186
90
|
asr_server_port: int,
|
|
187
91
|
model: str,
|
|
188
92
|
ollama_host: str,
|
|
93
|
+
clipboard: bool,
|
|
189
94
|
) -> None:
|
|
190
95
|
"""Main async function, consumes parsed arguments."""
|
|
191
96
|
logger = logging.getLogger()
|
|
@@ -232,16 +137,22 @@ async def async_main(
|
|
|
232
137
|
)
|
|
233
138
|
|
|
234
139
|
if not instruction or not instruction.strip():
|
|
235
|
-
_print(
|
|
140
|
+
_print(
|
|
141
|
+
console,
|
|
142
|
+
"[yellow]No instruction was transcribed. Exiting.[/yellow]",
|
|
143
|
+
)
|
|
236
144
|
return
|
|
237
145
|
|
|
238
146
|
await process_and_update_clipboard(
|
|
147
|
+
system_prompt=SYSTEM_PROMPT,
|
|
148
|
+
agent_instructions=AGENT_INSTRUCTIONS,
|
|
239
149
|
model=model,
|
|
240
150
|
ollama_host=ollama_host,
|
|
241
151
|
logger=logger,
|
|
242
152
|
console=console,
|
|
243
153
|
original_text=original_text,
|
|
244
154
|
instruction=instruction,
|
|
155
|
+
clipboard=clipboard,
|
|
245
156
|
)
|
|
246
157
|
|
|
247
158
|
|
|
@@ -250,13 +161,18 @@ def voice_assistant(
|
|
|
250
161
|
device_index: int | None = opts.DEVICE_INDEX,
|
|
251
162
|
device_name: str | None = opts.DEVICE_NAME,
|
|
252
163
|
*,
|
|
164
|
+
# ASR
|
|
253
165
|
list_devices: bool = opts.LIST_DEVICES,
|
|
254
166
|
asr_server_ip: str = opts.ASR_SERVER_IP,
|
|
255
167
|
asr_server_port: int = opts.ASR_SERVER_PORT,
|
|
168
|
+
# LLM
|
|
256
169
|
model: str = opts.MODEL,
|
|
257
170
|
ollama_host: str = opts.OLLAMA_HOST,
|
|
171
|
+
# Process control
|
|
258
172
|
stop: bool = opts.STOP,
|
|
259
173
|
status: bool = opts.STATUS,
|
|
174
|
+
# General
|
|
175
|
+
clipboard: bool = opts.CLIPBOARD,
|
|
260
176
|
log_level: str = opts.LOG_LEVEL,
|
|
261
177
|
log_file: str | None = opts.LOG_FILE,
|
|
262
178
|
quiet: bool = opts.QUIET,
|
|
@@ -303,5 +219,6 @@ def voice_assistant(
|
|
|
303
219
|
asr_server_port=asr_server_port,
|
|
304
220
|
model=model,
|
|
305
221
|
ollama_host=ollama_host,
|
|
222
|
+
clipboard=clipboard,
|
|
306
223
|
),
|
|
307
224
|
)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Client for interacting with Ollama."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from contextlib import AbstractContextManager, nullcontext
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import pyperclip
|
|
11
|
+
from pydantic_ai import Agent
|
|
12
|
+
from pydantic_ai.models.openai import OpenAIModel
|
|
13
|
+
from pydantic_ai.providers.openai import OpenAIProvider
|
|
14
|
+
from rich.panel import Panel
|
|
15
|
+
from rich.status import Status
|
|
16
|
+
|
|
17
|
+
from agent_cli.utils import _print
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
import logging
|
|
21
|
+
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_agent(
|
|
26
|
+
model: str,
|
|
27
|
+
ollama_host: str,
|
|
28
|
+
*,
|
|
29
|
+
system_prompt: str | None = None,
|
|
30
|
+
instructions: str | None = None,
|
|
31
|
+
) -> Agent:
|
|
32
|
+
"""Construct and return a PydanticAI agent configured for local Ollama."""
|
|
33
|
+
ollama_provider = OpenAIProvider(base_url=f"{ollama_host}/v1")
|
|
34
|
+
ollama_model = OpenAIModel(model_name=model, provider=ollama_provider)
|
|
35
|
+
return Agent(
|
|
36
|
+
model=ollama_model,
|
|
37
|
+
system_prompt=system_prompt or (),
|
|
38
|
+
instructions=instructions,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# --- LLM (Editing) Logic ---
|
|
43
|
+
|
|
44
|
+
INPUT_TEMPLATE = """
|
|
45
|
+
<original-text>
|
|
46
|
+
{original_text}
|
|
47
|
+
</original-text>
|
|
48
|
+
|
|
49
|
+
<instruction>
|
|
50
|
+
{instruction}
|
|
51
|
+
</instruction>
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def process_with_llm(
|
|
56
|
+
agent: Agent,
|
|
57
|
+
original_text: str,
|
|
58
|
+
instruction: str,
|
|
59
|
+
) -> tuple[str, float]:
|
|
60
|
+
"""Run the agent asynchronously and return corrected text and elapsed time."""
|
|
61
|
+
user_input = INPUT_TEMPLATE.format(
|
|
62
|
+
original_text=original_text,
|
|
63
|
+
instruction=instruction,
|
|
64
|
+
)
|
|
65
|
+
t_start = time.monotonic()
|
|
66
|
+
result = await agent.run(user_input)
|
|
67
|
+
t_end = time.monotonic()
|
|
68
|
+
return result.output, t_end - t_start
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _maybe_status(
|
|
72
|
+
console: Console | None,
|
|
73
|
+
model: str,
|
|
74
|
+
) -> AbstractContextManager[Status | None]:
|
|
75
|
+
"""Context manager for status display."""
|
|
76
|
+
if console:
|
|
77
|
+
return Status(
|
|
78
|
+
f"[bold yellow]🤖 Applying instruction with {model}...[/bold yellow]",
|
|
79
|
+
console=console,
|
|
80
|
+
)
|
|
81
|
+
return nullcontext()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def process_and_update_clipboard(
|
|
85
|
+
system_prompt: str,
|
|
86
|
+
agent_instructions: str,
|
|
87
|
+
*,
|
|
88
|
+
model: str,
|
|
89
|
+
ollama_host: str,
|
|
90
|
+
logger: logging.Logger,
|
|
91
|
+
console: Console | None,
|
|
92
|
+
original_text: str,
|
|
93
|
+
instruction: str,
|
|
94
|
+
clipboard: bool,
|
|
95
|
+
) -> None:
|
|
96
|
+
"""Processes the text with the LLM, updates the clipboard, and displays the result.
|
|
97
|
+
|
|
98
|
+
In quiet mode, only the result is printed to stdout.
|
|
99
|
+
"""
|
|
100
|
+
agent = build_agent(
|
|
101
|
+
model=model,
|
|
102
|
+
ollama_host=ollama_host,
|
|
103
|
+
system_prompt=system_prompt,
|
|
104
|
+
instructions=agent_instructions,
|
|
105
|
+
)
|
|
106
|
+
try:
|
|
107
|
+
with _maybe_status(console, model):
|
|
108
|
+
result_text, elapsed = await process_with_llm(
|
|
109
|
+
agent,
|
|
110
|
+
original_text,
|
|
111
|
+
instruction,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if clipboard:
|
|
115
|
+
pyperclip.copy(result_text)
|
|
116
|
+
logger.info("Copied result to clipboard.")
|
|
117
|
+
|
|
118
|
+
if console:
|
|
119
|
+
console.print(
|
|
120
|
+
Panel(
|
|
121
|
+
result_text,
|
|
122
|
+
title="[bold green]✨ Result (Copied to Clipboard)[/bold green]",
|
|
123
|
+
border_style="green",
|
|
124
|
+
subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
|
|
125
|
+
),
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
# Quiet mode: print result to stdout for Keyboard Maestro to capture
|
|
129
|
+
print(result_text)
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.exception("An error occurred during LLM processing.")
|
|
133
|
+
_print(
|
|
134
|
+
console,
|
|
135
|
+
f"❌ [bold red]An unexpected LLM error occurred: {e}[/bold red]",
|
|
136
|
+
)
|
|
137
|
+
_print(
|
|
138
|
+
console,
|
|
139
|
+
f" Please check your Ollama server at [cyan]{ollama_host}[/cyan]",
|
|
140
|
+
)
|
|
141
|
+
sys.exit(1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
|
|
5
5
|
Author-email: Bas Nijholt <bas@nijho.lt>
|
|
6
6
|
Project-URL: Homepage, https://github.com/basnijholt/agent-cli
|
|
@@ -196,13 +196,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
|
|
|
196
196
|
│ [default: 192.168.1.143] │
|
|
197
197
|
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
198
198
|
│ [default: 10300] │
|
|
199
|
-
│ --
|
|
200
|
-
│
|
|
201
|
-
│ [default:
|
|
199
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
200
|
+
│ Default is devstral:24b. │
|
|
201
|
+
│ [default: devstral:24b] │
|
|
202
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
203
|
+
│ Default is │
|
|
204
|
+
│ http://localhost:11434. │
|
|
205
|
+
│ [default: │
|
|
206
|
+
│ http://localhost:11434] │
|
|
207
|
+
│ --llm --no-llm Use an LLM to process the │
|
|
208
|
+
│ transcript. │
|
|
209
|
+
│ [default: no-llm] │
|
|
202
210
|
│ --stop Stop any running │
|
|
203
211
|
│ background process. │
|
|
204
212
|
│ --status Check if a background │
|
|
205
213
|
│ process is running. │
|
|
214
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
215
|
+
│ [default: clipboard] │
|
|
206
216
|
│ --log-level TEXT Set logging level. │
|
|
207
217
|
│ [default: WARNING] │
|
|
208
218
|
│ --log-file TEXT Path to a file to write │
|
|
@@ -268,35 +278,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
|
|
|
268
278
|
voice-assistant --stop
|
|
269
279
|
|
|
270
280
|
╭─ Options ────────────────────────────────────────────────────────────────────╮
|
|
271
|
-
│ --device-index
|
|
272
|
-
│
|
|
273
|
-
│
|
|
274
|
-
│ --device-name
|
|
275
|
-
│
|
|
276
|
-
│
|
|
277
|
-
│
|
|
278
|
-
│
|
|
279
|
-
│
|
|
280
|
-
│
|
|
281
|
-
│
|
|
282
|
-
│
|
|
283
|
-
│
|
|
284
|
-
│
|
|
285
|
-
│
|
|
286
|
-
│
|
|
287
|
-
│
|
|
288
|
-
│
|
|
289
|
-
│
|
|
290
|
-
│
|
|
291
|
-
│
|
|
292
|
-
│
|
|
293
|
-
│ --
|
|
294
|
-
│
|
|
295
|
-
│
|
|
296
|
-
│
|
|
297
|
-
│
|
|
298
|
-
│ --
|
|
299
|
-
│
|
|
281
|
+
│ --device-index INTEGER Index of the PyAudio input │
|
|
282
|
+
│ device to use. │
|
|
283
|
+
│ [default: None] │
|
|
284
|
+
│ --device-name TEXT Device name keywords for │
|
|
285
|
+
│ partial matching. Supports │
|
|
286
|
+
│ comma-separated list where │
|
|
287
|
+
│ each term can partially │
|
|
288
|
+
│ match device names │
|
|
289
|
+
│ (case-insensitive). First │
|
|
290
|
+
│ matching device is │
|
|
291
|
+
│ selected. │
|
|
292
|
+
│ [default: None] │
|
|
293
|
+
│ --list-devices List available audio input │
|
|
294
|
+
│ devices and exit. │
|
|
295
|
+
│ --asr-server-ip TEXT Wyoming ASR server IP │
|
|
296
|
+
│ address. │
|
|
297
|
+
│ [default: 192.168.1.143] │
|
|
298
|
+
│ --asr-server-port INTEGER Wyoming ASR server port. │
|
|
299
|
+
│ [default: 10300] │
|
|
300
|
+
│ --model -m TEXT The Ollama model to use. │
|
|
301
|
+
│ Default is devstral:24b. │
|
|
302
|
+
│ [default: devstral:24b] │
|
|
303
|
+
│ --ollama-host TEXT The Ollama server host. │
|
|
304
|
+
│ Default is │
|
|
305
|
+
│ http://localhost:11434. │
|
|
306
|
+
│ [default: │
|
|
307
|
+
│ http://localhost:11434] │
|
|
308
|
+
│ --stop Stop any running │
|
|
309
|
+
│ background process. │
|
|
310
|
+
│ --status Check if a background │
|
|
311
|
+
│ process is running. │
|
|
312
|
+
│ --clipboard --no-clipboard Copy result to clipboard. │
|
|
313
|
+
│ [default: clipboard] │
|
|
314
|
+
│ --log-level TEXT Set logging level. │
|
|
315
|
+
│ [default: WARNING] │
|
|
316
|
+
│ --log-file TEXT Path to a file to write │
|
|
317
|
+
│ logs to. │
|
|
318
|
+
│ [default: None] │
|
|
319
|
+
│ --quiet -q Suppress console output │
|
|
320
|
+
│ from rich. │
|
|
321
|
+
│ --help Show this message and │
|
|
322
|
+
│ exit. │
|
|
300
323
|
╰──────────────────────────────────────────────────────────────────────────────╯
|
|
301
324
|
|
|
302
325
|
```
|
|
@@ -5,7 +5,7 @@ agent_cli/__init__.py
|
|
|
5
5
|
agent_cli/asr.py
|
|
6
6
|
agent_cli/cli.py
|
|
7
7
|
agent_cli/config.py
|
|
8
|
-
agent_cli/
|
|
8
|
+
agent_cli/llm.py
|
|
9
9
|
agent_cli/process_manager.py
|
|
10
10
|
agent_cli/py.typed
|
|
11
11
|
agent_cli/utils.py
|
|
@@ -20,6 +20,6 @@ agent_cli/agents/_cli_options.py
|
|
|
20
20
|
agent_cli/agents/autocorrect.py
|
|
21
21
|
agent_cli/agents/transcribe.py
|
|
22
22
|
agent_cli/agents/voice_assistant.py
|
|
23
|
-
tests/
|
|
23
|
+
tests/test_llm.py
|
|
24
24
|
tests/test_process_manager.py
|
|
25
25
|
tests/test_utils.py
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
"""Client for interacting with Ollama."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from pydantic_ai import Agent
|
|
6
|
-
from pydantic_ai.models.openai import OpenAIModel
|
|
7
|
-
from pydantic_ai.providers.openai import OpenAIProvider
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def build_agent(
|
|
11
|
-
model: str,
|
|
12
|
-
ollama_host: str,
|
|
13
|
-
*,
|
|
14
|
-
system_prompt: str | None = None,
|
|
15
|
-
instructions: str | None = None,
|
|
16
|
-
) -> Agent:
|
|
17
|
-
"""Construct and return a PydanticAI agent configured for local Ollama."""
|
|
18
|
-
ollama_provider = OpenAIProvider(base_url=f"{ollama_host}/v1")
|
|
19
|
-
ollama_model = OpenAIModel(model_name=model, provider=ollama_provider)
|
|
20
|
-
return Agent(
|
|
21
|
-
model=ollama_model,
|
|
22
|
-
system_prompt=system_prompt or (),
|
|
23
|
-
instructions=instructions,
|
|
24
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|