agent-cli 0.70.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/__init__.py +5 -0
- agent_cli/__main__.py +6 -0
- agent_cli/_extras.json +14 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/_tools.py +368 -0
- agent_cli/agents/__init__.py +23 -0
- agent_cli/agents/_voice_agent_common.py +136 -0
- agent_cli/agents/assistant.py +383 -0
- agent_cli/agents/autocorrect.py +284 -0
- agent_cli/agents/chat.py +496 -0
- agent_cli/agents/memory/__init__.py +31 -0
- agent_cli/agents/memory/add.py +190 -0
- agent_cli/agents/memory/proxy.py +160 -0
- agent_cli/agents/rag_proxy.py +128 -0
- agent_cli/agents/speak.py +209 -0
- agent_cli/agents/transcribe.py +671 -0
- agent_cli/agents/transcribe_daemon.py +499 -0
- agent_cli/agents/voice_edit.py +291 -0
- agent_cli/api.py +22 -0
- agent_cli/cli.py +106 -0
- agent_cli/config.py +503 -0
- agent_cli/config_cmd.py +307 -0
- agent_cli/constants.py +27 -0
- agent_cli/core/__init__.py +1 -0
- agent_cli/core/audio.py +461 -0
- agent_cli/core/audio_format.py +299 -0
- agent_cli/core/chroma.py +88 -0
- agent_cli/core/deps.py +191 -0
- agent_cli/core/openai_proxy.py +139 -0
- agent_cli/core/process.py +195 -0
- agent_cli/core/reranker.py +120 -0
- agent_cli/core/sse.py +87 -0
- agent_cli/core/transcription_logger.py +70 -0
- agent_cli/core/utils.py +526 -0
- agent_cli/core/vad.py +175 -0
- agent_cli/core/watch.py +65 -0
- agent_cli/dev/__init__.py +14 -0
- agent_cli/dev/cli.py +1588 -0
- agent_cli/dev/coding_agents/__init__.py +19 -0
- agent_cli/dev/coding_agents/aider.py +24 -0
- agent_cli/dev/coding_agents/base.py +167 -0
- agent_cli/dev/coding_agents/claude.py +39 -0
- agent_cli/dev/coding_agents/codex.py +24 -0
- agent_cli/dev/coding_agents/continue_dev.py +15 -0
- agent_cli/dev/coding_agents/copilot.py +24 -0
- agent_cli/dev/coding_agents/cursor_agent.py +48 -0
- agent_cli/dev/coding_agents/gemini.py +28 -0
- agent_cli/dev/coding_agents/opencode.py +15 -0
- agent_cli/dev/coding_agents/registry.py +49 -0
- agent_cli/dev/editors/__init__.py +19 -0
- agent_cli/dev/editors/base.py +89 -0
- agent_cli/dev/editors/cursor.py +15 -0
- agent_cli/dev/editors/emacs.py +46 -0
- agent_cli/dev/editors/jetbrains.py +56 -0
- agent_cli/dev/editors/nano.py +31 -0
- agent_cli/dev/editors/neovim.py +33 -0
- agent_cli/dev/editors/registry.py +59 -0
- agent_cli/dev/editors/sublime.py +20 -0
- agent_cli/dev/editors/vim.py +42 -0
- agent_cli/dev/editors/vscode.py +15 -0
- agent_cli/dev/editors/zed.py +20 -0
- agent_cli/dev/project.py +568 -0
- agent_cli/dev/registry.py +52 -0
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/terminals/__init__.py +19 -0
- agent_cli/dev/terminals/apple_terminal.py +82 -0
- agent_cli/dev/terminals/base.py +56 -0
- agent_cli/dev/terminals/gnome.py +51 -0
- agent_cli/dev/terminals/iterm2.py +84 -0
- agent_cli/dev/terminals/kitty.py +77 -0
- agent_cli/dev/terminals/registry.py +48 -0
- agent_cli/dev/terminals/tmux.py +58 -0
- agent_cli/dev/terminals/warp.py +132 -0
- agent_cli/dev/terminals/zellij.py +78 -0
- agent_cli/dev/worktree.py +856 -0
- agent_cli/docs_gen.py +417 -0
- agent_cli/example-config.toml +185 -0
- agent_cli/install/__init__.py +5 -0
- agent_cli/install/common.py +89 -0
- agent_cli/install/extras.py +174 -0
- agent_cli/install/hotkeys.py +48 -0
- agent_cli/install/services.py +87 -0
- agent_cli/memory/__init__.py +7 -0
- agent_cli/memory/_files.py +250 -0
- agent_cli/memory/_filters.py +63 -0
- agent_cli/memory/_git.py +157 -0
- agent_cli/memory/_indexer.py +142 -0
- agent_cli/memory/_ingest.py +408 -0
- agent_cli/memory/_persistence.py +182 -0
- agent_cli/memory/_prompt.py +91 -0
- agent_cli/memory/_retrieval.py +294 -0
- agent_cli/memory/_store.py +169 -0
- agent_cli/memory/_streaming.py +44 -0
- agent_cli/memory/_tasks.py +48 -0
- agent_cli/memory/api.py +113 -0
- agent_cli/memory/client.py +272 -0
- agent_cli/memory/engine.py +361 -0
- agent_cli/memory/entities.py +43 -0
- agent_cli/memory/models.py +112 -0
- agent_cli/opts.py +433 -0
- agent_cli/py.typed +0 -0
- agent_cli/rag/__init__.py +3 -0
- agent_cli/rag/_indexer.py +67 -0
- agent_cli/rag/_indexing.py +226 -0
- agent_cli/rag/_prompt.py +30 -0
- agent_cli/rag/_retriever.py +156 -0
- agent_cli/rag/_store.py +48 -0
- agent_cli/rag/_utils.py +218 -0
- agent_cli/rag/api.py +175 -0
- agent_cli/rag/client.py +299 -0
- agent_cli/rag/engine.py +302 -0
- agent_cli/rag/models.py +55 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/__init__.py +1 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/linux-hotkeys/README.md +63 -0
- agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
- agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
- agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
- agent_cli/scripts/macos-hotkeys/README.md +45 -0
- agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
- agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
- agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
- agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
- agent_cli/scripts/nvidia-asr-server/README.md +99 -0
- agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
- agent_cli/scripts/nvidia-asr-server/server.py +255 -0
- agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
- agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
- agent_cli/scripts/run-openwakeword.sh +11 -0
- agent_cli/scripts/run-piper-windows.ps1 +30 -0
- agent_cli/scripts/run-piper.sh +24 -0
- agent_cli/scripts/run-whisper-linux.sh +40 -0
- agent_cli/scripts/run-whisper-macos.sh +6 -0
- agent_cli/scripts/run-whisper-windows.ps1 +51 -0
- agent_cli/scripts/run-whisper.sh +9 -0
- agent_cli/scripts/run_faster_whisper_server.py +136 -0
- agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
- agent_cli/scripts/setup-linux.sh +108 -0
- agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
- agent_cli/scripts/setup-macos.sh +76 -0
- agent_cli/scripts/setup-windows.ps1 +63 -0
- agent_cli/scripts/start-all-services-windows.ps1 +53 -0
- agent_cli/scripts/start-all-services.sh +178 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/__init__.py +3 -0
- agent_cli/server/cli.py +721 -0
- agent_cli/server/common.py +222 -0
- agent_cli/server/model_manager.py +288 -0
- agent_cli/server/model_registry.py +225 -0
- agent_cli/server/proxy/__init__.py +3 -0
- agent_cli/server/proxy/api.py +444 -0
- agent_cli/server/streaming.py +67 -0
- agent_cli/server/tts/__init__.py +3 -0
- agent_cli/server/tts/api.py +335 -0
- agent_cli/server/tts/backends/__init__.py +82 -0
- agent_cli/server/tts/backends/base.py +139 -0
- agent_cli/server/tts/backends/kokoro.py +403 -0
- agent_cli/server/tts/backends/piper.py +253 -0
- agent_cli/server/tts/model_manager.py +201 -0
- agent_cli/server/tts/model_registry.py +28 -0
- agent_cli/server/tts/wyoming_handler.py +249 -0
- agent_cli/server/whisper/__init__.py +3 -0
- agent_cli/server/whisper/api.py +413 -0
- agent_cli/server/whisper/backends/__init__.py +89 -0
- agent_cli/server/whisper/backends/base.py +97 -0
- agent_cli/server/whisper/backends/faster_whisper.py +225 -0
- agent_cli/server/whisper/backends/mlx.py +270 -0
- agent_cli/server/whisper/languages.py +116 -0
- agent_cli/server/whisper/model_manager.py +157 -0
- agent_cli/server/whisper/model_registry.py +28 -0
- agent_cli/server/whisper/wyoming_handler.py +203 -0
- agent_cli/services/__init__.py +343 -0
- agent_cli/services/_wyoming_utils.py +64 -0
- agent_cli/services/asr.py +506 -0
- agent_cli/services/llm.py +228 -0
- agent_cli/services/tts.py +450 -0
- agent_cli/services/wake_word.py +142 -0
- agent_cli-0.70.5.dist-info/METADATA +2118 -0
- agent_cli-0.70.5.dist-info/RECORD +196 -0
- agent_cli-0.70.5.dist-info/WHEEL +4 -0
- agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
- agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Interact with clipboard text via a voice command using Wyoming and an Ollama LLM.
|
|
2
|
+
|
|
3
|
+
WORKFLOW:
|
|
4
|
+
1. The script starts and immediately copies the current content of the clipboard.
|
|
5
|
+
2. It then starts listening for a voice command via the microphone.
|
|
6
|
+
3. The user triggers a stop signal (e.g., via a Keyboard Maestro hotkey sending SIGINT).
|
|
7
|
+
4. The script stops recording and finalizes the transcription of the voice command.
|
|
8
|
+
5. It sends the original clipboard text and the transcribed command to a local LLM.
|
|
9
|
+
6. The LLM processes the text based on the instruction (either editing it or answering a question).
|
|
10
|
+
7. The resulting text is then copied back to the clipboard.
|
|
11
|
+
|
|
12
|
+
KEYBOARD MAESTRO INTEGRATION:
|
|
13
|
+
To create a hotkey toggle for this script, set up a Keyboard Maestro macro with:
|
|
14
|
+
|
|
15
|
+
1. Trigger: Hot Key (e.g., Cmd+Shift+A for "Assistant")
|
|
16
|
+
|
|
17
|
+
2. If/Then/Else Action:
|
|
18
|
+
- Condition: Shell script returns success
|
|
19
|
+
- Script: voice-edit --status >/dev/null 2>&1
|
|
20
|
+
|
|
21
|
+
3. Then Actions (if process is running):
|
|
22
|
+
- Display Text Briefly: "🗣️ Processing command..."
|
|
23
|
+
- Execute Shell Script: voice-edit --stop --quiet
|
|
24
|
+
- (The script will show its own "Done" notification)
|
|
25
|
+
|
|
26
|
+
4. Else Actions (if process is not running):
|
|
27
|
+
- Display Text Briefly: "📋 Listening for command..."
|
|
28
|
+
- Execute Shell Script: voice-edit --input-device-index 1 --quiet &
|
|
29
|
+
- Select "Display results in a notification"
|
|
30
|
+
|
|
31
|
+
This approach uses standard Unix background processes (&) instead of Python daemons!
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import asyncio
|
|
37
|
+
import json
|
|
38
|
+
import logging
|
|
39
|
+
from contextlib import suppress
|
|
40
|
+
from pathlib import Path # noqa: TC003
|
|
41
|
+
|
|
42
|
+
from agent_cli import config, opts
|
|
43
|
+
from agent_cli.agents._voice_agent_common import (
|
|
44
|
+
get_instruction_from_audio,
|
|
45
|
+
process_instruction_and_respond,
|
|
46
|
+
)
|
|
47
|
+
from agent_cli.cli import app
|
|
48
|
+
from agent_cli.core import process
|
|
49
|
+
from agent_cli.core.audio import setup_devices
|
|
50
|
+
from agent_cli.core.deps import requires_extras
|
|
51
|
+
from agent_cli.core.utils import (
|
|
52
|
+
enable_json_mode,
|
|
53
|
+
get_clipboard_text,
|
|
54
|
+
maybe_live,
|
|
55
|
+
print_command_line_args,
|
|
56
|
+
print_input_panel,
|
|
57
|
+
print_with_style,
|
|
58
|
+
setup_logging,
|
|
59
|
+
signal_handling_context,
|
|
60
|
+
stop_or_status_or_toggle,
|
|
61
|
+
)
|
|
62
|
+
from agent_cli.services import asr
|
|
63
|
+
|
|
64
|
+
LOGGER = logging.getLogger()
|
|
65
|
+
|
|
66
|
+
# LLM Prompts
|
|
67
|
+
SYSTEM_PROMPT = """\
|
|
68
|
+
You are a versatile AI text assistant. Your purpose is to either **modify** a given text or **answer questions** about it, based on a specific instruction.
|
|
69
|
+
|
|
70
|
+
- If the instruction is a **command to edit** the text (e.g., "make this more formal," "add emojis," "correct spelling"), you must return ONLY the full, modified text.
|
|
71
|
+
- If the instruction is a **question about** the text (e.g., "summarize this," "what are the key points?," "translate to French"), you must return ONLY the answer.
|
|
72
|
+
|
|
73
|
+
In all cases, you must follow these strict rules:
|
|
74
|
+
- Do not provide any explanations, apologies, or introductory phrases like "Here is the result:".
|
|
75
|
+
- Do not wrap your output in markdown or code blocks.
|
|
76
|
+
- Your output should be the direct result of the instruction: either the edited text or the answer to the question.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
AGENT_INSTRUCTIONS = """\
|
|
80
|
+
You will be given a block of text enclosed in <original-text> tags, and an instruction enclosed in <instruction> tags.
|
|
81
|
+
Analyze the instruction to determine if it's a command to edit the text or a question about it.
|
|
82
|
+
|
|
83
|
+
- If it is an editing command, apply the changes to the original text and return the complete, modified version.
|
|
84
|
+
- If it is a question, formulate an answer based on the original text.
|
|
85
|
+
|
|
86
|
+
Return ONLY the resulting text (either the edit or the answer), with no extra formatting or commentary.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# --- Main Application Logic ---
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def _async_main(
|
|
94
|
+
*,
|
|
95
|
+
provider_cfg: config.ProviderSelection,
|
|
96
|
+
general_cfg: config.General,
|
|
97
|
+
audio_in_cfg: config.AudioInput,
|
|
98
|
+
wyoming_asr_cfg: config.WyomingASR,
|
|
99
|
+
openai_asr_cfg: config.OpenAIASR,
|
|
100
|
+
gemini_asr_cfg: config.GeminiASR,
|
|
101
|
+
ollama_cfg: config.Ollama,
|
|
102
|
+
openai_llm_cfg: config.OpenAILLM,
|
|
103
|
+
gemini_llm_cfg: config.GeminiLLM,
|
|
104
|
+
audio_out_cfg: config.AudioOutput,
|
|
105
|
+
wyoming_tts_cfg: config.WyomingTTS,
|
|
106
|
+
openai_tts_cfg: config.OpenAITTS,
|
|
107
|
+
kokoro_tts_cfg: config.KokoroTTS,
|
|
108
|
+
gemini_tts_cfg: config.GeminiTTS,
|
|
109
|
+
) -> str | None:
|
|
110
|
+
"""Core asynchronous logic for the voice assistant."""
|
|
111
|
+
device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
|
|
112
|
+
if device_info is None:
|
|
113
|
+
return None
|
|
114
|
+
input_device_index, _, tts_output_device_index = device_info
|
|
115
|
+
audio_in_cfg.input_device_index = input_device_index
|
|
116
|
+
audio_out_cfg.output_device_index = tts_output_device_index
|
|
117
|
+
|
|
118
|
+
original_text = get_clipboard_text()
|
|
119
|
+
if original_text is None:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
if not general_cfg.quiet and original_text:
|
|
123
|
+
print_input_panel(original_text, title="📝 Text to Process")
|
|
124
|
+
|
|
125
|
+
with (
|
|
126
|
+
signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
|
|
127
|
+
maybe_live(not general_cfg.quiet) as live,
|
|
128
|
+
):
|
|
129
|
+
audio_data = await asr.record_audio_with_manual_stop(
|
|
130
|
+
input_device_index,
|
|
131
|
+
stop_event,
|
|
132
|
+
LOGGER,
|
|
133
|
+
live=live,
|
|
134
|
+
quiet=general_cfg.quiet,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if not audio_data:
|
|
138
|
+
if not general_cfg.quiet:
|
|
139
|
+
print_with_style("No audio recorded", style="yellow")
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
instruction = await get_instruction_from_audio(
|
|
143
|
+
audio_data=audio_data,
|
|
144
|
+
provider_cfg=provider_cfg,
|
|
145
|
+
audio_input_cfg=audio_in_cfg,
|
|
146
|
+
wyoming_asr_cfg=wyoming_asr_cfg,
|
|
147
|
+
openai_asr_cfg=openai_asr_cfg,
|
|
148
|
+
gemini_asr_cfg=gemini_asr_cfg,
|
|
149
|
+
ollama_cfg=ollama_cfg,
|
|
150
|
+
logger=LOGGER,
|
|
151
|
+
quiet=general_cfg.quiet,
|
|
152
|
+
)
|
|
153
|
+
if not instruction:
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
return await process_instruction_and_respond(
|
|
157
|
+
instruction=instruction,
|
|
158
|
+
original_text=original_text,
|
|
159
|
+
provider_cfg=provider_cfg,
|
|
160
|
+
general_cfg=general_cfg,
|
|
161
|
+
ollama_cfg=ollama_cfg,
|
|
162
|
+
openai_llm_cfg=openai_llm_cfg,
|
|
163
|
+
gemini_llm_cfg=gemini_llm_cfg,
|
|
164
|
+
audio_output_cfg=audio_out_cfg,
|
|
165
|
+
wyoming_tts_cfg=wyoming_tts_cfg,
|
|
166
|
+
openai_tts_cfg=openai_tts_cfg,
|
|
167
|
+
kokoro_tts_cfg=kokoro_tts_cfg,
|
|
168
|
+
gemini_tts_cfg=gemini_tts_cfg,
|
|
169
|
+
system_prompt=SYSTEM_PROMPT,
|
|
170
|
+
agent_instructions=AGENT_INSTRUCTIONS,
|
|
171
|
+
live=live,
|
|
172
|
+
logger=LOGGER,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@app.command("voice-edit", rich_help_panel="Voice Commands")
|
|
177
|
+
@requires_extras("audio", "llm")
|
|
178
|
+
def voice_edit(
|
|
179
|
+
*,
|
|
180
|
+
# --- Provider Selection ---
|
|
181
|
+
asr_provider: str = opts.ASR_PROVIDER,
|
|
182
|
+
llm_provider: str = opts.LLM_PROVIDER,
|
|
183
|
+
tts_provider: str = opts.TTS_PROVIDER,
|
|
184
|
+
# --- ASR (Audio) Configuration ---
|
|
185
|
+
input_device_index: int | None = opts.INPUT_DEVICE_INDEX,
|
|
186
|
+
input_device_name: str | None = opts.INPUT_DEVICE_NAME,
|
|
187
|
+
asr_wyoming_ip: str = opts.ASR_WYOMING_IP,
|
|
188
|
+
asr_wyoming_port: int = opts.ASR_WYOMING_PORT,
|
|
189
|
+
asr_openai_model: str = opts.ASR_OPENAI_MODEL,
|
|
190
|
+
asr_gemini_model: str = opts.ASR_GEMINI_MODEL,
|
|
191
|
+
# --- LLM Configuration ---
|
|
192
|
+
llm_ollama_model: str = opts.LLM_OLLAMA_MODEL,
|
|
193
|
+
llm_ollama_host: str = opts.LLM_OLLAMA_HOST,
|
|
194
|
+
llm_openai_model: str = opts.LLM_OPENAI_MODEL,
|
|
195
|
+
openai_api_key: str | None = opts.OPENAI_API_KEY,
|
|
196
|
+
openai_base_url: str | None = opts.OPENAI_BASE_URL,
|
|
197
|
+
llm_gemini_model: str = opts.LLM_GEMINI_MODEL,
|
|
198
|
+
gemini_api_key: str | None = opts.GEMINI_API_KEY,
|
|
199
|
+
# --- TTS Configuration ---
|
|
200
|
+
enable_tts: bool = opts.ENABLE_TTS,
|
|
201
|
+
output_device_index: int | None = opts.OUTPUT_DEVICE_INDEX,
|
|
202
|
+
output_device_name: str | None = opts.OUTPUT_DEVICE_NAME,
|
|
203
|
+
tts_speed: float = opts.TTS_SPEED,
|
|
204
|
+
tts_wyoming_ip: str = opts.TTS_WYOMING_IP,
|
|
205
|
+
tts_wyoming_port: int = opts.TTS_WYOMING_PORT,
|
|
206
|
+
tts_wyoming_voice: str | None = opts.TTS_WYOMING_VOICE,
|
|
207
|
+
tts_wyoming_language: str | None = opts.TTS_WYOMING_LANGUAGE,
|
|
208
|
+
tts_wyoming_speaker: str | None = opts.TTS_WYOMING_SPEAKER,
|
|
209
|
+
tts_openai_model: str = opts.TTS_OPENAI_MODEL,
|
|
210
|
+
tts_openai_voice: str = opts.TTS_OPENAI_VOICE,
|
|
211
|
+
tts_openai_base_url: str | None = opts.TTS_OPENAI_BASE_URL,
|
|
212
|
+
tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
|
|
213
|
+
tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
|
|
214
|
+
tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
|
|
215
|
+
tts_gemini_model: str = opts.TTS_GEMINI_MODEL,
|
|
216
|
+
tts_gemini_voice: str = opts.TTS_GEMINI_VOICE,
|
|
217
|
+
# --- Process Management ---
|
|
218
|
+
stop: bool = opts.STOP,
|
|
219
|
+
status: bool = opts.STATUS,
|
|
220
|
+
toggle: bool = opts.TOGGLE,
|
|
221
|
+
# --- General Options ---
|
|
222
|
+
save_file: Path | None = opts.SAVE_FILE,
|
|
223
|
+
clipboard: bool = opts.CLIPBOARD,
|
|
224
|
+
log_level: opts.LogLevel = opts.LOG_LEVEL,
|
|
225
|
+
log_file: str | None = opts.LOG_FILE,
|
|
226
|
+
list_devices: bool = opts.LIST_DEVICES,
|
|
227
|
+
quiet: bool = opts.QUIET,
|
|
228
|
+
json_output: bool = opts.JSON_OUTPUT,
|
|
229
|
+
config_file: str | None = opts.CONFIG_FILE,
|
|
230
|
+
print_args: bool = opts.PRINT_ARGS,
|
|
231
|
+
) -> None:
|
|
232
|
+
"""Interact with clipboard text via a voice command using local or remote services.
|
|
233
|
+
|
|
234
|
+
Usage:
|
|
235
|
+
- Run in foreground: agent-cli voice-edit --input-device-index 1
|
|
236
|
+
- Run in background: agent-cli voice-edit --input-device-index 1 &
|
|
237
|
+
- Check status: agent-cli voice-edit --status
|
|
238
|
+
- Stop background process: agent-cli voice-edit --stop
|
|
239
|
+
- List output devices: agent-cli voice-edit --list-output-devices
|
|
240
|
+
- Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
|
|
241
|
+
"""
|
|
242
|
+
if print_args:
|
|
243
|
+
print_command_line_args(locals())
|
|
244
|
+
|
|
245
|
+
effective_quiet = quiet or json_output
|
|
246
|
+
if json_output:
|
|
247
|
+
enable_json_mode()
|
|
248
|
+
|
|
249
|
+
setup_logging(log_level, log_file, quiet=effective_quiet)
|
|
250
|
+
general_cfg = config.General(
|
|
251
|
+
log_level=log_level,
|
|
252
|
+
log_file=log_file,
|
|
253
|
+
quiet=effective_quiet,
|
|
254
|
+
list_devices=list_devices,
|
|
255
|
+
clipboard=clipboard,
|
|
256
|
+
save_file=save_file,
|
|
257
|
+
)
|
|
258
|
+
process_name = "voice-edit"
|
|
259
|
+
if stop_or_status_or_toggle(
|
|
260
|
+
process_name,
|
|
261
|
+
"voice assistant",
|
|
262
|
+
stop,
|
|
263
|
+
status,
|
|
264
|
+
toggle,
|
|
265
|
+
quiet=general_cfg.quiet,
|
|
266
|
+
):
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
with process.pid_file_context(process_name), suppress(KeyboardInterrupt):
|
|
270
|
+
cfgs = config.create_provider_configs_from_locals(locals())
|
|
271
|
+
|
|
272
|
+
result = asyncio.run(
|
|
273
|
+
_async_main(
|
|
274
|
+
provider_cfg=cfgs.provider,
|
|
275
|
+
general_cfg=general_cfg,
|
|
276
|
+
audio_in_cfg=cfgs.audio_in,
|
|
277
|
+
wyoming_asr_cfg=cfgs.wyoming_asr,
|
|
278
|
+
openai_asr_cfg=cfgs.openai_asr,
|
|
279
|
+
gemini_asr_cfg=cfgs.gemini_asr,
|
|
280
|
+
ollama_cfg=cfgs.ollama,
|
|
281
|
+
openai_llm_cfg=cfgs.openai_llm,
|
|
282
|
+
gemini_llm_cfg=cfgs.gemini_llm,
|
|
283
|
+
audio_out_cfg=cfgs.audio_out,
|
|
284
|
+
wyoming_tts_cfg=cfgs.wyoming_tts,
|
|
285
|
+
openai_tts_cfg=cfgs.openai_tts,
|
|
286
|
+
kokoro_tts_cfg=cfgs.kokoro_tts,
|
|
287
|
+
gemini_tts_cfg=cfgs.gemini_tts,
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
if json_output:
|
|
291
|
+
print(json.dumps({"result": result}))
|
agent_cli/api.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""FastAPI web service for Agent CLI transcription.
|
|
2
|
+
|
|
3
|
+
This module re-exports from agent_cli.server.proxy.api for backwards compatibility.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from agent_cli.server.proxy.api import (
|
|
7
|
+
HealthResponse,
|
|
8
|
+
TranscriptionRequest,
|
|
9
|
+
TranscriptionResponse,
|
|
10
|
+
app,
|
|
11
|
+
health_check,
|
|
12
|
+
transcribe_audio,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"HealthResponse",
|
|
17
|
+
"TranscriptionRequest",
|
|
18
|
+
"TranscriptionResponse",
|
|
19
|
+
"app",
|
|
20
|
+
"health_check",
|
|
21
|
+
"transcribe_audio",
|
|
22
|
+
]
|
agent_cli/cli.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Shared CLI functionality for the Agent CLI tools."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
from . import __version__
|
|
13
|
+
from .config import load_config, normalize_provider_defaults
|
|
14
|
+
from .core.process import set_process_title
|
|
15
|
+
from .core.utils import console
|
|
16
|
+
|
|
17
|
+
app = typer.Typer(
|
|
18
|
+
name="agent-cli",
|
|
19
|
+
help="A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.",
|
|
20
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
21
|
+
add_completion=True,
|
|
22
|
+
rich_markup_mode="markdown",
|
|
23
|
+
no_args_is_help=True,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _version_callback(value: bool) -> None:
|
|
28
|
+
if value:
|
|
29
|
+
path = Path(__file__).parent
|
|
30
|
+
data = [
|
|
31
|
+
("agent-cli version", __version__),
|
|
32
|
+
("agent-cli location", str(path)),
|
|
33
|
+
("Python version", sys.version),
|
|
34
|
+
("Python executable", sys.executable),
|
|
35
|
+
]
|
|
36
|
+
table = Table(show_header=False)
|
|
37
|
+
table.add_column("Property", style="cyan")
|
|
38
|
+
table.add_column("Value", style="magenta")
|
|
39
|
+
for prop, val in data:
|
|
40
|
+
table.add_row(prop, val)
|
|
41
|
+
console.print(table)
|
|
42
|
+
raise typer.Exit
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@app.callback(invoke_without_command=True)
|
|
46
|
+
def main(
|
|
47
|
+
ctx: typer.Context,
|
|
48
|
+
version: Annotated[ # noqa: ARG001
|
|
49
|
+
bool,
|
|
50
|
+
typer.Option(
|
|
51
|
+
"-v",
|
|
52
|
+
"--version",
|
|
53
|
+
callback=_version_callback,
|
|
54
|
+
is_eager=True,
|
|
55
|
+
help="Show version and exit.",
|
|
56
|
+
),
|
|
57
|
+
] = False,
|
|
58
|
+
) -> None:
|
|
59
|
+
"""A suite of AI-powered tools."""
|
|
60
|
+
if ctx.invoked_subcommand is None:
|
|
61
|
+
console.print("[bold red]No command specified.[/bold red]")
|
|
62
|
+
console.print("[bold yellow]Running --help for your convenience.[/bold yellow]")
|
|
63
|
+
console.print(ctx.get_help())
|
|
64
|
+
raise typer.Exit
|
|
65
|
+
import dotenv # noqa: PLC0415
|
|
66
|
+
|
|
67
|
+
dotenv.load_dotenv()
|
|
68
|
+
|
|
69
|
+
# Set process title for identification in ps output
|
|
70
|
+
set_process_title(ctx.invoked_subcommand)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def set_config_defaults(ctx: typer.Context, config_file: str | None) -> None:
|
|
74
|
+
"""Set the default values for the CLI based on the config file."""
|
|
75
|
+
config = load_config(config_file)
|
|
76
|
+
wildcard_config = normalize_provider_defaults(config.get("defaults", {}))
|
|
77
|
+
|
|
78
|
+
command_key = ctx.command.name or ""
|
|
79
|
+
if not command_key:
|
|
80
|
+
ctx.default_map = wildcard_config
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
# For nested subcommands (e.g., "memory proxy"), build "memory.proxy"
|
|
84
|
+
if ctx.parent and ctx.parent.command.name and ctx.parent.command.name != "agent-cli":
|
|
85
|
+
command_key = f"{ctx.parent.command.name}.{command_key}"
|
|
86
|
+
|
|
87
|
+
command_config = normalize_provider_defaults(config.get(command_key, {}))
|
|
88
|
+
ctx.default_map = {**wildcard_config, **command_config}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# Import commands from other modules to register them
|
|
92
|
+
from . import config_cmd # noqa: E402, F401
|
|
93
|
+
from .agents import ( # noqa: E402, F401
|
|
94
|
+
assistant,
|
|
95
|
+
autocorrect,
|
|
96
|
+
chat,
|
|
97
|
+
memory,
|
|
98
|
+
rag_proxy,
|
|
99
|
+
speak,
|
|
100
|
+
transcribe,
|
|
101
|
+
transcribe_daemon,
|
|
102
|
+
voice_edit,
|
|
103
|
+
)
|
|
104
|
+
from .dev import cli as dev_cli # noqa: E402, F401
|
|
105
|
+
from .install import extras, hotkeys, services # noqa: E402, F401
|
|
106
|
+
from .server import cli as server_cli # noqa: E402, F401
|