agent-cli 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {agent_cli-0.2.0 → agent_cli-0.3.0}/PKG-INFO +56 -33
  2. {agent_cli-0.2.0 → agent_cli-0.3.0}/README.md +55 -32
  3. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/_cli_options.py +10 -5
  4. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/autocorrect.py +1 -1
  5. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/transcribe.py +77 -10
  6. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/voice_assistant.py +16 -99
  7. agent_cli-0.3.0/agent_cli/llm.py +141 -0
  8. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/PKG-INFO +56 -33
  9. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/SOURCES.txt +2 -2
  10. agent_cli-0.2.0/tests/test_ollama_client.py → agent_cli-0.3.0/tests/test_llm.py +1 -1
  11. agent_cli-0.2.0/agent_cli/ollama_client.py +0 -24
  12. {agent_cli-0.2.0 → agent_cli-0.3.0}/LICENSE +0 -0
  13. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/__init__.py +0 -0
  14. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/agents/__init__.py +0 -0
  15. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/asr.py +0 -0
  16. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/cli.py +0 -0
  17. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/config.py +0 -0
  18. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/process_manager.py +0 -0
  19. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/py.typed +0 -0
  20. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli/utils.py +0 -0
  21. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/dependency_links.txt +0 -0
  22. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/entry_points.txt +0 -0
  23. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/requires.txt +0 -0
  24. {agent_cli-0.2.0 → agent_cli-0.3.0}/agent_cli.egg-info/top_level.txt +0 -0
  25. {agent_cli-0.2.0 → agent_cli-0.3.0}/pyproject.toml +0 -0
  26. {agent_cli-0.2.0 → agent_cli-0.3.0}/setup.cfg +0 -0
  27. {agent_cli-0.2.0 → agent_cli-0.3.0}/tests/test_process_manager.py +0 -0
  28. {agent_cli-0.2.0 → agent_cli-0.3.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-cli
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
5
5
  Author-email: Bas Nijholt <bas@nijho.lt>
6
6
  Project-URL: Homepage, https://github.com/basnijholt/agent-cli
@@ -196,13 +196,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
196
196
  │ [default: 192.168.1.143] │
197
197
  │ --asr-server-port INTEGER Wyoming ASR server port. │
198
198
  │ [default: 10300] │
199
- │ --clipboard --no-clipboard Copy transcript to
200
- clipboard.
201
- │ [default: clipboard]
199
+ │ --model -m TEXT The Ollama model to use.
200
+ Default is devstral:24b.
201
+ │ [default: devstral:24b]
202
+ │ --ollama-host TEXT The Ollama server host. │
203
+ │ Default is │
204
+ │ http://localhost:11434. │
205
+ │ [default: │
206
+ │ http://localhost:11434] │
207
+ │ --llm --no-llm Use an LLM to process the │
208
+ │ transcript. │
209
+ │ [default: no-llm] │
202
210
  │ --stop Stop any running │
203
211
  │ background process. │
204
212
  │ --status Check if a background │
205
213
  │ process is running. │
214
+ │ --clipboard --no-clipboard Copy result to clipboard. │
215
+ │ [default: clipboard] │
206
216
  │ --log-level TEXT Set logging level. │
207
217
  │ [default: WARNING] │
208
218
  │ --log-file TEXT Path to a file to write │
@@ -268,35 +278,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
268
278
  voice-assistant --stop
269
279
 
270
280
  ╭─ Options ────────────────────────────────────────────────────────────────────╮
271
- │ --device-index INTEGER Index of the PyAudio input device to
272
- use.
273
- [default: None]
274
- │ --device-name TEXT Device name keywords for partial
275
- matching. Supports comma-separated list
276
- where each term can partially match
277
- device names (case-insensitive). First
278
- matching device is selected.
279
- [default: None]
280
- --list-devices List available audio input devices and
281
- exit.
282
- --asr-server-ip TEXT Wyoming ASR server IP address.
283
- [default: 192.168.1.143]
284
- --asr-server-port INTEGER Wyoming ASR server port.
285
- [default: 10300]
286
- --model -m TEXT The Ollama model to use. Default is
287
- devstral:24b.
288
- [default: devstral:24b]
289
- --ollama-host TEXT The Ollama server host. Default is
290
- http://localhost:11434.
291
- [default: http://localhost:11434]
292
- --stop Stop any running background process.
293
- │ --status Check if a background process is
294
- running.
295
- --log-level TEXT Set logging level. [default: WARNING]
296
- --log-file TEXT Path to a file to write logs to.
297
- [default: None]
298
- │ --quiet -q Suppress console output from rich.
299
- --help Show this message and exit.
281
+ │ --device-index INTEGER Index of the PyAudio input │
282
+ device to use.
283
+ [default: None]
284
+ │ --device-name TEXT Device name keywords for
285
+ partial matching. Supports │
286
+ comma-separated list where
287
+ each term can partially
288
+ match device names
289
+ (case-insensitive). First
290
+ matching device is
291
+ selected.
292
+ [default: None]
293
+ --list-devices List available audio input
294
+ devices and exit.
295
+ --asr-server-ip TEXT Wyoming ASR server IP
296
+ address.
297
+ [default: 192.168.1.143]
298
+ --asr-server-port INTEGER Wyoming ASR server port.
299
+ [default: 10300]
300
+ --model -m TEXT The Ollama model to use.
301
+ Default is devstral:24b.
302
+ [default: devstral:24b]
303
+ │ --ollama-host TEXT The Ollama server host.
304
+ Default is
305
+ http://localhost:11434. │
306
+ [default:
307
+ http://localhost:11434]
308
+ │ --stop Stop any running
309
+ background process.
310
+ │ --status Check if a background │
311
+ │ process is running. │
312
+ │ --clipboard --no-clipboard Copy result to clipboard. │
313
+ │ [default: clipboard] │
314
+ │ --log-level TEXT Set logging level. │
315
+ │ [default: WARNING] │
316
+ │ --log-file TEXT Path to a file to write │
317
+ │ logs to. │
318
+ │ [default: None] │
319
+ │ --quiet -q Suppress console output │
320
+ │ from rich. │
321
+ │ --help Show this message and │
322
+ │ exit. │
300
323
  ╰──────────────────────────────────────────────────────────────────────────────╯
301
324
 
302
325
  ```
@@ -166,13 +166,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
166
166
  │ [default: 192.168.1.143] │
167
167
  │ --asr-server-port INTEGER Wyoming ASR server port. │
168
168
  │ [default: 10300] │
169
- │ --clipboard --no-clipboard Copy transcript to
170
- clipboard.
171
- │ [default: clipboard]
169
+ │ --model -m TEXT The Ollama model to use.
170
+ Default is devstral:24b.
171
+ │ [default: devstral:24b]
172
+ │ --ollama-host TEXT The Ollama server host. │
173
+ │ Default is │
174
+ │ http://localhost:11434. │
175
+ │ [default: │
176
+ │ http://localhost:11434] │
177
+ │ --llm --no-llm Use an LLM to process the │
178
+ │ transcript. │
179
+ │ [default: no-llm] │
172
180
  │ --stop Stop any running │
173
181
  │ background process. │
174
182
  │ --status Check if a background │
175
183
  │ process is running. │
184
+ │ --clipboard --no-clipboard Copy result to clipboard. │
185
+ │ [default: clipboard] │
176
186
  │ --log-level TEXT Set logging level. │
177
187
  │ [default: WARNING] │
178
188
  │ --log-file TEXT Path to a file to write │
@@ -238,35 +248,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
238
248
  voice-assistant --stop
239
249
 
240
250
  ╭─ Options ────────────────────────────────────────────────────────────────────╮
241
- │ --device-index INTEGER Index of the PyAudio input device to
242
- use.
243
- [default: None]
244
- │ --device-name TEXT Device name keywords for partial
245
- matching. Supports comma-separated list
246
- where each term can partially match
247
- device names (case-insensitive). First
248
- matching device is selected.
249
- [default: None]
250
- --list-devices List available audio input devices and
251
- exit.
252
- --asr-server-ip TEXT Wyoming ASR server IP address.
253
- [default: 192.168.1.143]
254
- --asr-server-port INTEGER Wyoming ASR server port.
255
- [default: 10300]
256
- --model -m TEXT The Ollama model to use. Default is
257
- devstral:24b.
258
- [default: devstral:24b]
259
- --ollama-host TEXT The Ollama server host. Default is
260
- http://localhost:11434.
261
- [default: http://localhost:11434]
262
- --stop Stop any running background process.
263
- │ --status Check if a background process is
264
- running.
265
- --log-level TEXT Set logging level. [default: WARNING]
266
- --log-file TEXT Path to a file to write logs to.
267
- [default: None]
268
- │ --quiet -q Suppress console output from rich.
269
- --help Show this message and exit.
251
+ │ --device-index INTEGER Index of the PyAudio input │
252
+ device to use.
253
+ [default: None]
254
+ │ --device-name TEXT Device name keywords for
255
+ partial matching. Supports │
256
+ comma-separated list where
257
+ each term can partially
258
+ match device names
259
+ (case-insensitive). First
260
+ matching device is
261
+ selected.
262
+ [default: None]
263
+ --list-devices List available audio input
264
+ devices and exit.
265
+ --asr-server-ip TEXT Wyoming ASR server IP
266
+ address.
267
+ [default: 192.168.1.143]
268
+ --asr-server-port INTEGER Wyoming ASR server port.
269
+ [default: 10300]
270
+ --model -m TEXT The Ollama model to use.
271
+ Default is devstral:24b.
272
+ [default: devstral:24b]
273
+ │ --ollama-host TEXT The Ollama server host.
274
+ Default is
275
+ http://localhost:11434. │
276
+ [default:
277
+ http://localhost:11434]
278
+ │ --stop Stop any running
279
+ background process.
280
+ │ --status Check if a background │
281
+ │ process is running. │
282
+ │ --clipboard --no-clipboard Copy result to clipboard. │
283
+ │ [default: clipboard] │
284
+ │ --log-level TEXT Set logging level. │
285
+ │ [default: WARNING] │
286
+ │ --log-file TEXT Path to a file to write │
287
+ │ logs to. │
288
+ │ [default: None] │
289
+ │ --quiet -q Suppress console output │
290
+ │ from rich. │
291
+ │ --help Show this message and │
292
+ │ exit. │
270
293
  ╰──────────────────────────────────────────────────────────────────────────────╯
271
294
 
272
295
  ```
@@ -18,6 +18,11 @@ OLLAMA_HOST: str = typer.Option(
18
18
  "--ollama-host",
19
19
  help=f"The Ollama server host. Default is {config.OLLAMA_HOST}.",
20
20
  )
21
+ LLM: bool = typer.Option(
22
+ False, # noqa: FBT003
23
+ "--llm/--no-llm",
24
+ help="Use an LLM to process the transcript.",
25
+ )
21
26
 
22
27
 
23
28
  # --- ASR (Audio) Options ---
@@ -47,11 +52,6 @@ ASR_SERVER_PORT: int = typer.Option(
47
52
  "--asr-server-port",
48
53
  help="Wyoming ASR server port.",
49
54
  )
50
- CLIPBOARD: bool = typer.Option(
51
- True, # noqa: FBT003
52
- "--clipboard/--no-clipboard",
53
- help="Copy transcript to clipboard.",
54
- )
55
55
 
56
56
 
57
57
  # --- Process Management Options ---
@@ -68,6 +68,11 @@ STATUS: bool = typer.Option(
68
68
 
69
69
 
70
70
  # --- General Options ---
71
+ CLIPBOARD: bool = typer.Option(
72
+ True, # noqa: FBT003
73
+ "--clipboard/--no-clipboard",
74
+ help="Copy result to clipboard.",
75
+ )
71
76
  LOG_LEVEL: str = typer.Option(
72
77
  "WARNING",
73
78
  "--log-level",
@@ -32,7 +32,7 @@ from rich.status import Status
32
32
 
33
33
  import agent_cli.agents._cli_options as opts
34
34
  from agent_cli.cli import app, setup_logging
35
- from agent_cli.ollama_client import build_agent
35
+ from agent_cli.llm import build_agent
36
36
  from agent_cli.utils import get_clipboard_text
37
37
 
38
38
  # --- Configuration ---
@@ -9,23 +9,63 @@ from contextlib import AbstractContextManager, nullcontext, suppress
9
9
  import pyperclip
10
10
  from rich.console import Console
11
11
  from rich.live import Live
12
+ from rich.panel import Panel
12
13
  from rich.text import Text
13
14
 
14
15
  import agent_cli.agents._cli_options as opts
15
16
  from agent_cli import asr, process_manager
16
17
  from agent_cli.cli import app, setup_logging
18
+ from agent_cli.llm import process_and_update_clipboard
17
19
  from agent_cli.utils import _print, print_device_index, signal_handling_context
18
20
 
21
+ SYSTEM_PROMPT = """
22
+ You are an AI transcription cleanup assistant. Your purpose is to improve and refine raw speech-to-text transcriptions by correcting errors, adding proper punctuation, and enhancing readability while preserving the original meaning and intent.
23
+
24
+ Your tasks include:
25
+ - Correcting obvious speech recognition errors and mishearing
26
+ - Adding appropriate punctuation (periods, commas, question marks, etc.)
27
+ - Fixing capitalization where needed
28
+ - Removing filler words, false starts, and repeated words when they clearly weren't intentional
29
+ - Improving sentence structure and flow while maintaining the speaker's voice and meaning
30
+ - Formatting the text for better readability
31
+
32
+ Important rules:
33
+ - Do not change the core meaning or content of the transcription
34
+ - Do not add information that wasn't spoken
35
+ - Do not remove content unless it's clearly an error or filler
36
+ - Return ONLY the cleaned-up text without any explanations or commentary
37
+ - Do not wrap your output in markdown or code blocks
38
+ """
39
+
40
+ AGENT_INSTRUCTIONS = """
41
+ You will be given a block of raw transcribed text enclosed in <original-text> tags, and a cleanup instruction enclosed in <instruction> tags.
42
+
43
+ Your job is to process the transcribed text according to the instruction, which will typically involve:
44
+ - Correcting speech recognition errors
45
+ - Adding proper punctuation and capitalization
46
+ - Removing obvious filler words and false starts
47
+ - Improving readability while preserving meaning
48
+
49
+ Return ONLY the cleaned-up text with no additional formatting or commentary.
50
+ """
51
+
52
+ INSTRUCTION = """
53
+ Please clean up this transcribed text by correcting any speech recognition errors, adding appropriate punctuation and capitalization, removing obvious filler words or false starts, and improving overall readability while preserving the original meaning and intent of the speaker.
54
+ """
55
+
19
56
 
20
57
  async def async_main(
58
+ *,
21
59
  device_index: int | None,
22
60
  device_name: str | None,
23
61
  asr_server_ip: str,
24
62
  asr_server_port: int,
25
- *,
26
63
  clipboard: bool,
27
64
  quiet: bool,
28
65
  list_devices: bool,
66
+ model: str,
67
+ ollama_host: str,
68
+ llm: bool,
29
69
  ) -> None:
30
70
  """Async entry point, consuming parsed args."""
31
71
  logger = logging.getLogger()
@@ -54,14 +94,31 @@ async def async_main(
54
94
  listening_message="Listening...",
55
95
  )
56
96
 
57
- if transcript and clipboard:
58
- pyperclip.copy(transcript)
59
- logger.info("Copied transcript to clipboard.")
60
- _print(console, "[italic green]Copied to clipboard.[/italic green]")
61
- elif not transcript:
62
- logger.info("Transcript empty.")
63
- else:
64
- logger.info("Clipboard copy disabled.")
97
+ if llm and model and ollama_host and transcript:
98
+ _print(console, Panel(transcript, title="[cyan]Raw Transcript 📝[/cyan]"))
99
+ await process_and_update_clipboard(
100
+ system_prompt=SYSTEM_PROMPT,
101
+ agent_instructions=AGENT_INSTRUCTIONS,
102
+ model=model,
103
+ ollama_host=ollama_host,
104
+ logger=logger,
105
+ console=console,
106
+ original_text=transcript,
107
+ instruction=INSTRUCTION,
108
+ clipboard=clipboard,
109
+ )
110
+ return
111
+
112
+ if transcript and clipboard:
113
+ pyperclip.copy(transcript)
114
+ logger.info("Copied transcript to clipboard.")
115
+ _print(console, "[italic blue]Copied to clipboard.[/italic blue]")
116
+ elif not transcript:
117
+ logger.info("Transcript empty.")
118
+ else:
119
+ logger.info("Clipboard copy disabled.")
120
+ if not quiet:
121
+ _print(console, f"[italic green]Transcript: {transcript}[/italic green]")
65
122
 
66
123
 
67
124
  def _maybe_live(console: Console | None) -> AbstractContextManager[Live | None]:
@@ -79,12 +136,19 @@ def transcribe(
79
136
  *,
80
137
  device_index: int | None = opts.DEVICE_INDEX,
81
138
  device_name: str | None = opts.DEVICE_NAME,
139
+ # ASR
82
140
  list_devices: bool = opts.LIST_DEVICES,
83
141
  asr_server_ip: str = opts.ASR_SERVER_IP,
84
142
  asr_server_port: int = opts.ASR_SERVER_PORT,
85
- clipboard: bool = opts.CLIPBOARD,
143
+ # LLM
144
+ model: str = opts.MODEL,
145
+ ollama_host: str = opts.OLLAMA_HOST,
146
+ llm: bool = opts.LLM,
147
+ # Process control
86
148
  stop: bool = opts.STOP,
87
149
  status: bool = opts.STATUS,
150
+ # General
151
+ clipboard: bool = opts.CLIPBOARD,
88
152
  log_level: str = opts.LOG_LEVEL,
89
153
  log_file: str | None = opts.LOG_FILE,
90
154
  quiet: bool = opts.QUIET,
@@ -127,5 +191,8 @@ def transcribe(
127
191
  clipboard=clipboard,
128
192
  quiet=quiet,
129
193
  list_devices=list_devices,
194
+ model=model,
195
+ ollama_host=ollama_host,
196
+ llm=llm,
130
197
  ),
131
198
  )
@@ -37,20 +37,15 @@ from __future__ import annotations
37
37
 
38
38
  import asyncio
39
39
  import logging
40
- import sys
41
- import time
42
- from contextlib import AbstractContextManager, nullcontext, suppress
43
- from typing import TYPE_CHECKING
40
+ from contextlib import suppress
44
41
 
45
- import pyperclip
46
42
  from rich.console import Console
47
43
  from rich.panel import Panel
48
- from rich.status import Status
49
44
 
50
45
  import agent_cli.agents._cli_options as opts
51
46
  from agent_cli import asr, process_manager
52
47
  from agent_cli.cli import app, setup_logging
53
- from agent_cli.ollama_client import build_agent
48
+ from agent_cli.llm import process_and_update_clipboard
54
49
  from agent_cli.utils import (
55
50
  _print,
56
51
  get_clipboard_text,
@@ -58,10 +53,6 @@ from agent_cli.utils import (
58
53
  signal_handling_context,
59
54
  )
60
55
 
61
- if TYPE_CHECKING:
62
- from pydantic_ai import Agent
63
-
64
-
65
56
  # LLM Prompts
66
57
  SYSTEM_PROMPT = """\
67
58
  You are a versatile AI text assistant. Your purpose is to either **modify** a given text or **answer questions** about it, based on a specific instruction.
@@ -86,93 +77,6 @@ Return ONLY the resulting text (either the edit or the answer), with no extra fo
86
77
  """
87
78
 
88
79
 
89
- # --- LLM (Editing) Logic ---
90
-
91
- INPUT_TEMPLATE = """
92
- <original-text>
93
- {original_text}
94
- </original-text>
95
-
96
- <instruction>
97
- {instruction}
98
- </instruction>
99
- """
100
-
101
-
102
- async def process_with_llm(
103
- agent: Agent,
104
- original_text: str,
105
- instruction: str,
106
- ) -> tuple[str, float]:
107
- """Run the agent asynchronously and return corrected text and elapsed time."""
108
- user_input = INPUT_TEMPLATE.format(original_text=original_text, instruction=instruction)
109
- t_start = time.monotonic()
110
- result = await agent.run(user_input)
111
- t_end = time.monotonic()
112
- return result.output, t_end - t_start
113
-
114
-
115
- def _maybe_status(console: Console | None, model: str) -> AbstractContextManager[Status | None]:
116
- """Context manager for status display."""
117
- if console:
118
- return Status(
119
- f"[bold yellow]🤖 Applying instruction with {model}...[/bold yellow]",
120
- console=console,
121
- )
122
- return nullcontext()
123
-
124
-
125
- async def process_and_update_clipboard(
126
- model: str,
127
- ollama_host: str,
128
- logger: logging.Logger,
129
- console: Console | None,
130
- original_text: str,
131
- instruction: str,
132
- ) -> None:
133
- """Processes the text with the LLM, updates the clipboard, and displays the result.
134
-
135
- In quiet mode, only the result is printed to stdout.
136
- """
137
- agent = build_agent(
138
- model=model,
139
- ollama_host=ollama_host,
140
- system_prompt=SYSTEM_PROMPT,
141
- instructions=AGENT_INSTRUCTIONS,
142
- )
143
- try:
144
- with _maybe_status(console, model):
145
- result_text, elapsed = await process_with_llm(agent, original_text, instruction)
146
-
147
- pyperclip.copy(result_text)
148
- logger.info("Copied result to clipboard.")
149
-
150
- if console:
151
- console.print(
152
- Panel(
153
- result_text,
154
- title="[bold green]✨ Result (Copied to Clipboard)[/bold green]",
155
- border_style="green",
156
- subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
157
- ),
158
- )
159
- else:
160
- # Quiet mode: print result to stdout for Keyboard Maestro to capture
161
- print(result_text)
162
-
163
- except Exception as e:
164
- logger.exception("An error occurred during LLM processing.")
165
- _print(
166
- console,
167
- f"❌ [bold red]An unexpected LLM error occurred: {e}[/bold red]",
168
- )
169
- _print(
170
- console,
171
- f" Please check your Ollama server at [cyan]{ollama_host}[/cyan]",
172
- )
173
- sys.exit(1)
174
-
175
-
176
80
  # --- Main Application Logic ---
177
81
 
178
82
 
@@ -186,6 +90,7 @@ async def async_main(
186
90
  asr_server_port: int,
187
91
  model: str,
188
92
  ollama_host: str,
93
+ clipboard: bool,
189
94
  ) -> None:
190
95
  """Main async function, consumes parsed arguments."""
191
96
  logger = logging.getLogger()
@@ -232,16 +137,22 @@ async def async_main(
232
137
  )
233
138
 
234
139
  if not instruction or not instruction.strip():
235
- _print(console, "[yellow]No instruction was transcribed. Exiting.[/yellow]")
140
+ _print(
141
+ console,
142
+ "[yellow]No instruction was transcribed. Exiting.[/yellow]",
143
+ )
236
144
  return
237
145
 
238
146
  await process_and_update_clipboard(
147
+ system_prompt=SYSTEM_PROMPT,
148
+ agent_instructions=AGENT_INSTRUCTIONS,
239
149
  model=model,
240
150
  ollama_host=ollama_host,
241
151
  logger=logger,
242
152
  console=console,
243
153
  original_text=original_text,
244
154
  instruction=instruction,
155
+ clipboard=clipboard,
245
156
  )
246
157
 
247
158
 
@@ -250,13 +161,18 @@ def voice_assistant(
250
161
  device_index: int | None = opts.DEVICE_INDEX,
251
162
  device_name: str | None = opts.DEVICE_NAME,
252
163
  *,
164
+ # ASR
253
165
  list_devices: bool = opts.LIST_DEVICES,
254
166
  asr_server_ip: str = opts.ASR_SERVER_IP,
255
167
  asr_server_port: int = opts.ASR_SERVER_PORT,
168
+ # LLM
256
169
  model: str = opts.MODEL,
257
170
  ollama_host: str = opts.OLLAMA_HOST,
171
+ # Process control
258
172
  stop: bool = opts.STOP,
259
173
  status: bool = opts.STATUS,
174
+ # General
175
+ clipboard: bool = opts.CLIPBOARD,
260
176
  log_level: str = opts.LOG_LEVEL,
261
177
  log_file: str | None = opts.LOG_FILE,
262
178
  quiet: bool = opts.QUIET,
@@ -303,5 +219,6 @@ def voice_assistant(
303
219
  asr_server_port=asr_server_port,
304
220
  model=model,
305
221
  ollama_host=ollama_host,
222
+ clipboard=clipboard,
306
223
  ),
307
224
  )
@@ -0,0 +1,141 @@
1
+ """Client for interacting with Ollama."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ import time
7
+ from contextlib import AbstractContextManager, nullcontext
8
+ from typing import TYPE_CHECKING
9
+
10
+ import pyperclip
11
+ from pydantic_ai import Agent
12
+ from pydantic_ai.models.openai import OpenAIModel
13
+ from pydantic_ai.providers.openai import OpenAIProvider
14
+ from rich.panel import Panel
15
+ from rich.status import Status
16
+
17
+ from agent_cli.utils import _print
18
+
19
+ if TYPE_CHECKING:
20
+ import logging
21
+
22
+ from rich.console import Console
23
+
24
+
25
+ def build_agent(
26
+ model: str,
27
+ ollama_host: str,
28
+ *,
29
+ system_prompt: str | None = None,
30
+ instructions: str | None = None,
31
+ ) -> Agent:
32
+ """Construct and return a PydanticAI agent configured for local Ollama."""
33
+ ollama_provider = OpenAIProvider(base_url=f"{ollama_host}/v1")
34
+ ollama_model = OpenAIModel(model_name=model, provider=ollama_provider)
35
+ return Agent(
36
+ model=ollama_model,
37
+ system_prompt=system_prompt or (),
38
+ instructions=instructions,
39
+ )
40
+
41
+
42
+ # --- LLM (Editing) Logic ---
43
+
44
+ INPUT_TEMPLATE = """
45
+ <original-text>
46
+ {original_text}
47
+ </original-text>
48
+
49
+ <instruction>
50
+ {instruction}
51
+ </instruction>
52
+ """
53
+
54
+
55
+ async def process_with_llm(
56
+ agent: Agent,
57
+ original_text: str,
58
+ instruction: str,
59
+ ) -> tuple[str, float]:
60
+ """Run the agent asynchronously and return corrected text and elapsed time."""
61
+ user_input = INPUT_TEMPLATE.format(
62
+ original_text=original_text,
63
+ instruction=instruction,
64
+ )
65
+ t_start = time.monotonic()
66
+ result = await agent.run(user_input)
67
+ t_end = time.monotonic()
68
+ return result.output, t_end - t_start
69
+
70
+
71
+ def _maybe_status(
72
+ console: Console | None,
73
+ model: str,
74
+ ) -> AbstractContextManager[Status | None]:
75
+ """Context manager for status display."""
76
+ if console:
77
+ return Status(
78
+ f"[bold yellow]🤖 Applying instruction with {model}...[/bold yellow]",
79
+ console=console,
80
+ )
81
+ return nullcontext()
82
+
83
+
84
+ async def process_and_update_clipboard(
85
+ system_prompt: str,
86
+ agent_instructions: str,
87
+ *,
88
+ model: str,
89
+ ollama_host: str,
90
+ logger: logging.Logger,
91
+ console: Console | None,
92
+ original_text: str,
93
+ instruction: str,
94
+ clipboard: bool,
95
+ ) -> None:
96
+ """Processes the text with the LLM, updates the clipboard, and displays the result.
97
+
98
+ In quiet mode, only the result is printed to stdout.
99
+ """
100
+ agent = build_agent(
101
+ model=model,
102
+ ollama_host=ollama_host,
103
+ system_prompt=system_prompt,
104
+ instructions=agent_instructions,
105
+ )
106
+ try:
107
+ with _maybe_status(console, model):
108
+ result_text, elapsed = await process_with_llm(
109
+ agent,
110
+ original_text,
111
+ instruction,
112
+ )
113
+
114
+ if clipboard:
115
+ pyperclip.copy(result_text)
116
+ logger.info("Copied result to clipboard.")
117
+
118
+ if console:
119
+ console.print(
120
+ Panel(
121
+ result_text,
122
+ title="[bold green]✨ Result (Copied to Clipboard)[/bold green]",
123
+ border_style="green",
124
+ subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
125
+ ),
126
+ )
127
+ else:
128
+ # Quiet mode: print result to stdout for Keyboard Maestro to capture
129
+ print(result_text)
130
+
131
+ except Exception as e:
132
+ logger.exception("An error occurred during LLM processing.")
133
+ _print(
134
+ console,
135
+ f"❌ [bold red]An unexpected LLM error occurred: {e}[/bold red]",
136
+ )
137
+ _print(
138
+ console,
139
+ f" Please check your Ollama server at [cyan]{ollama_host}[/cyan]",
140
+ )
141
+ sys.exit(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-cli
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
5
5
  Author-email: Bas Nijholt <bas@nijho.lt>
6
6
  Project-URL: Homepage, https://github.com/basnijholt/agent-cli
@@ -196,13 +196,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
196
196
  │ [default: 192.168.1.143] │
197
197
  │ --asr-server-port INTEGER Wyoming ASR server port. │
198
198
  │ [default: 10300] │
199
- │ --clipboard --no-clipboard Copy transcript to
200
- clipboard.
201
- │ [default: clipboard]
199
+ │ --model -m TEXT The Ollama model to use.
200
+ Default is devstral:24b.
201
+ │ [default: devstral:24b]
202
+ │ --ollama-host TEXT The Ollama server host. │
203
+ │ Default is │
204
+ │ http://localhost:11434. │
205
+ │ [default: │
206
+ │ http://localhost:11434] │
207
+ │ --llm --no-llm Use an LLM to process the │
208
+ │ transcript. │
209
+ │ [default: no-llm] │
202
210
  │ --stop Stop any running │
203
211
  │ background process. │
204
212
  │ --status Check if a background │
205
213
  │ process is running. │
214
+ │ --clipboard --no-clipboard Copy result to clipboard. │
215
+ │ [default: clipboard] │
206
216
  │ --log-level TEXT Set logging level. │
207
217
  │ [default: WARNING] │
208
218
  │ --log-file TEXT Path to a file to write │
@@ -268,35 +278,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
268
278
  voice-assistant --stop
269
279
 
270
280
  ╭─ Options ────────────────────────────────────────────────────────────────────╮
271
- │ --device-index INTEGER Index of the PyAudio input device to
272
- use.
273
- [default: None]
274
- │ --device-name TEXT Device name keywords for partial
275
- matching. Supports comma-separated list
276
- where each term can partially match
277
- device names (case-insensitive). First
278
- matching device is selected.
279
- [default: None]
280
- --list-devices List available audio input devices and
281
- exit.
282
- --asr-server-ip TEXT Wyoming ASR server IP address.
283
- [default: 192.168.1.143]
284
- --asr-server-port INTEGER Wyoming ASR server port.
285
- [default: 10300]
286
- --model -m TEXT The Ollama model to use. Default is
287
- devstral:24b.
288
- [default: devstral:24b]
289
- --ollama-host TEXT The Ollama server host. Default is
290
- http://localhost:11434.
291
- [default: http://localhost:11434]
292
- --stop Stop any running background process.
293
- │ --status Check if a background process is
294
- running.
295
- --log-level TEXT Set logging level. [default: WARNING]
296
- --log-file TEXT Path to a file to write logs to.
297
- [default: None]
298
- │ --quiet -q Suppress console output from rich.
299
- --help Show this message and exit.
281
+ │ --device-index INTEGER Index of the PyAudio input │
282
+ device to use.
283
+ [default: None]
284
+ │ --device-name TEXT Device name keywords for
285
+ partial matching. Supports │
286
+ comma-separated list where
287
+ each term can partially
288
+ match device names
289
+ (case-insensitive). First
290
+ matching device is
291
+ selected.
292
+ [default: None]
293
+ --list-devices List available audio input
294
+ devices and exit.
295
+ --asr-server-ip TEXT Wyoming ASR server IP
296
+ address.
297
+ [default: 192.168.1.143]
298
+ --asr-server-port INTEGER Wyoming ASR server port.
299
+ [default: 10300]
300
+ --model -m TEXT The Ollama model to use.
301
+ Default is devstral:24b.
302
+ [default: devstral:24b]
303
+ │ --ollama-host TEXT The Ollama server host.
304
+ Default is
305
+ http://localhost:11434. │
306
+ [default:
307
+ http://localhost:11434]
308
+ │ --stop Stop any running
309
+ background process.
310
+ │ --status Check if a background │
311
+ │ process is running. │
312
+ │ --clipboard --no-clipboard Copy result to clipboard. │
313
+ │ [default: clipboard] │
314
+ │ --log-level TEXT Set logging level. │
315
+ │ [default: WARNING] │
316
+ │ --log-file TEXT Path to a file to write │
317
+ │ logs to. │
318
+ │ [default: None] │
319
+ │ --quiet -q Suppress console output │
320
+ │ from rich. │
321
+ │ --help Show this message and │
322
+ │ exit. │
300
323
  ╰──────────────────────────────────────────────────────────────────────────────╯
301
324
 
302
325
  ```
@@ -5,7 +5,7 @@ agent_cli/__init__.py
5
5
  agent_cli/asr.py
6
6
  agent_cli/cli.py
7
7
  agent_cli/config.py
8
- agent_cli/ollama_client.py
8
+ agent_cli/llm.py
9
9
  agent_cli/process_manager.py
10
10
  agent_cli/py.typed
11
11
  agent_cli/utils.py
@@ -20,6 +20,6 @@ agent_cli/agents/_cli_options.py
20
20
  agent_cli/agents/autocorrect.py
21
21
  agent_cli/agents/transcribe.py
22
22
  agent_cli/agents/voice_assistant.py
23
- tests/test_ollama_client.py
23
+ tests/test_llm.py
24
24
  tests/test_process_manager.py
25
25
  tests/test_utils.py
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  from typing import TYPE_CHECKING
6
6
 
7
- from agent_cli.ollama_client import build_agent
7
+ from agent_cli.llm import build_agent
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  import pytest
@@ -1,24 +0,0 @@
1
- """Client for interacting with Ollama."""
2
-
3
- from __future__ import annotations
4
-
5
- from pydantic_ai import Agent
6
- from pydantic_ai.models.openai import OpenAIModel
7
- from pydantic_ai.providers.openai import OpenAIProvider
8
-
9
-
10
- def build_agent(
11
- model: str,
12
- ollama_host: str,
13
- *,
14
- system_prompt: str | None = None,
15
- instructions: str | None = None,
16
- ) -> Agent:
17
- """Construct and return a PydanticAI agent configured for local Ollama."""
18
- ollama_provider = OpenAIProvider(base_url=f"{ollama_host}/v1")
19
- ollama_model = OpenAIModel(model_name=model, provider=ollama_provider)
20
- return Agent(
21
- model=ollama_model,
22
- system_prompt=system_prompt or (),
23
- instructions=instructions,
24
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes