agent-cli 0.70.5__py3-none-any.whl → 0.72.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agent_cli/_extras.json CHANGED
@@ -2,8 +2,8 @@
2
2
  "wyoming": ["Wyoming protocol for ASR/TTS servers", ["wyoming"]],
3
3
  "audio": ["Local audio recording/playback", ["numpy", "sounddevice", "wyoming"]],
4
4
  "llm": ["LLM framework (pydantic-ai)", ["pydantic_ai"]],
5
- "memory": ["Long-term memory proxy", ["chromadb", "yaml"]],
6
- "rag": ["RAG proxy (ChromaDB, embeddings)", ["chromadb"]],
5
+ "memory": ["Long-term memory proxy", ["chromadb", "openai", "yaml"]],
6
+ "rag": ["RAG proxy (ChromaDB, embeddings)", ["chromadb", "openai"]],
7
7
  "server": ["FastAPI server components", ["fastapi"]],
8
8
  "speed": ["Audio speed adjustment (audiostretchy)", ["audiostretchy"]],
9
9
  "piper": ["Local Piper TTS", ["piper"]],
@@ -7,6 +7,7 @@ annotated-types==0.7.0
7
7
  anyio==4.12.1
8
8
  # via
9
9
  # httpx
10
+ # openai
10
11
  # starlette
11
12
  # watchfiles
12
13
  attrs==25.4.0
@@ -45,7 +46,9 @@ colorama==0.4.6 ; os_name == 'nt' or sys_platform == 'win32'
45
46
  coloredlogs==15.0.1
46
47
  # via onnxruntime
47
48
  distro==1.9.0
48
- # via posthog
49
+ # via
50
+ # openai
51
+ # posthog
49
52
  dnspython==2.8.0
50
53
  # via email-validator
51
54
  dotenv==0.9.9
@@ -96,6 +99,7 @@ httpx==0.28.1
96
99
  # chromadb
97
100
  # fastapi
98
101
  # fastapi-cloud-cli
102
+ # openai
99
103
  huggingface-hub==0.36.0
100
104
  # via
101
105
  # agent-cli
@@ -115,6 +119,8 @@ importlib-resources==6.5.2
115
119
  # via chromadb
116
120
  jinja2==3.1.6
117
121
  # via fastapi
122
+ jiter==0.12.0
123
+ # via openai
118
124
  jsonschema==4.26.0
119
125
  # via chromadb
120
126
  jsonschema-specifications==2025.9.1
@@ -142,6 +148,8 @@ onnxruntime==1.20.1
142
148
  # via
143
149
  # agent-cli
144
150
  # chromadb
151
+ openai==2.15.0
152
+ # via agent-cli
145
153
  opentelemetry-api==1.39.1
146
154
  # via
147
155
  # chromadb
@@ -195,6 +203,7 @@ pydantic==2.12.5
195
203
  # chromadb
196
204
  # fastapi
197
205
  # fastapi-cloud-cli
206
+ # openai
198
207
  # pydantic-extra-types
199
208
  # pydantic-settings
200
209
  pydantic-core==2.41.5
@@ -281,6 +290,8 @@ six==1.17.0
281
290
  # kubernetes
282
291
  # posthog
283
292
  # python-dateutil
293
+ sniffio==1.3.1
294
+ # via openai
284
295
  starlette==0.50.0
285
296
  # via fastapi
286
297
  sympy==1.14.0
@@ -295,6 +306,7 @@ tqdm==4.67.1
295
306
  # via
296
307
  # chromadb
297
308
  # huggingface-hub
309
+ # openai
298
310
  # transformers
299
311
  transformers==4.57.5
300
312
  # via agent-cli
@@ -313,6 +325,7 @@ typing-extensions==4.15.0
313
325
  # fastapi
314
326
  # grpcio
315
327
  # huggingface-hub
328
+ # openai
316
329
  # opentelemetry-api
317
330
  # opentelemetry-exporter-otlp-proto-grpc
318
331
  # opentelemetry-sdk
@@ -7,6 +7,7 @@ annotated-types==0.7.0
7
7
  anyio==4.12.1
8
8
  # via
9
9
  # httpx
10
+ # openai
10
11
  # starlette
11
12
  # watchfiles
12
13
  attrs==25.4.0
@@ -61,7 +62,9 @@ cryptography==46.0.3
61
62
  defusedxml==0.7.1
62
63
  # via markitdown
63
64
  distro==1.9.0
64
- # via posthog
65
+ # via
66
+ # openai
67
+ # posthog
65
68
  dnspython==2.8.0
66
69
  # via email-validator
67
70
  dotenv==0.9.9
@@ -112,6 +115,7 @@ httpx==0.28.1
112
115
  # chromadb
113
116
  # fastapi
114
117
  # fastapi-cloud-cli
118
+ # openai
115
119
  huggingface-hub==0.36.0
116
120
  # via
117
121
  # agent-cli
@@ -131,6 +135,8 @@ importlib-resources==6.5.2
131
135
  # via chromadb
132
136
  jinja2==3.1.6
133
137
  # via fastapi
138
+ jiter==0.12.0
139
+ # via openai
134
140
  jsonschema==4.26.0
135
141
  # via chromadb
136
142
  jsonschema-specifications==2025.9.1
@@ -173,6 +179,8 @@ onnxruntime==1.20.1
173
179
  # chromadb
174
180
  # magika
175
181
  # markitdown
182
+ openai==2.15.0
183
+ # via agent-cli
176
184
  opentelemetry-api==1.39.1
177
185
  # via
178
186
  # chromadb
@@ -232,6 +240,7 @@ pydantic==2.12.5
232
240
  # chromadb
233
241
  # fastapi
234
242
  # fastapi-cloud-cli
243
+ # openai
235
244
  # pydantic-extra-types
236
245
  # pydantic-settings
237
246
  pydantic-core==2.41.5
@@ -322,6 +331,8 @@ six==1.17.0
322
331
  # markdownify
323
332
  # posthog
324
333
  # python-dateutil
334
+ sniffio==1.3.1
335
+ # via openai
325
336
  soupsieve==2.8.1
326
337
  # via beautifulsoup4
327
338
  starlette==0.50.0
@@ -338,6 +349,7 @@ tqdm==4.67.1
338
349
  # via
339
350
  # chromadb
340
351
  # huggingface-hub
352
+ # openai
341
353
  # transformers
342
354
  transformers==4.57.5
343
355
  # via agent-cli
@@ -357,6 +369,7 @@ typing-extensions==4.15.0
357
369
  # fastapi
358
370
  # grpcio
359
371
  # huggingface-hub
372
+ # openai
360
373
  # opentelemetry-api
361
374
  # opentelemetry-exporter-otlp-proto-grpc
362
375
  # opentelemetry-sdk
@@ -14,89 +14,22 @@ click==8.3.1
14
14
  # typer-slim
15
15
  colorama==0.4.6 ; sys_platform == 'win32'
16
16
  # via click
17
- coloredlogs==15.0.1
18
- # via onnxruntime
19
17
  dotenv==0.9.9
20
18
  # via agent-cli
21
- filelock==3.20.3
22
- # via torch
23
- flatbuffers==25.12.19
24
- # via onnxruntime
25
- fsspec==2026.1.0
26
- # via torch
27
19
  h11==0.16.0
28
20
  # via httpcore
29
21
  httpcore==1.0.9
30
22
  # via httpx
31
23
  httpx==0.28.1
32
24
  # via agent-cli
33
- humanfriendly==10.0
34
- # via coloredlogs
35
25
  idna==3.11
36
26
  # via
37
27
  # anyio
38
28
  # httpx
39
- jinja2==3.1.6
40
- # via torch
41
29
  markdown-it-py==4.0.0
42
30
  # via rich
43
- markupsafe==3.0.3
44
- # via jinja2
45
31
  mdurl==0.1.2
46
32
  # via markdown-it-py
47
- mpmath==1.3.0
48
- # via sympy
49
- networkx==3.6.1
50
- # via torch
51
- numpy==2.3.5
52
- # via onnxruntime
53
- nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
54
- # via
55
- # nvidia-cudnn-cu12
56
- # nvidia-cusolver-cu12
57
- # torch
58
- nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
59
- # via torch
60
- nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
61
- # via torch
62
- nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
63
- # via torch
64
- nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
65
- # via torch
66
- nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
67
- # via torch
68
- nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
69
- # via torch
70
- nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
71
- # via torch
72
- nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
73
- # via torch
74
- nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
75
- # via
76
- # nvidia-cusolver-cu12
77
- # torch
78
- nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
79
- # via torch
80
- nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
81
- # via torch
82
- nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
83
- # via
84
- # nvidia-cufft-cu12
85
- # nvidia-cusolver-cu12
86
- # nvidia-cusparse-cu12
87
- # torch
88
- nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
89
- # via torch
90
- nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
91
- # via torch
92
- onnxruntime==1.20.1
93
- # via silero-vad
94
- packaging==25.0
95
- # via
96
- # onnxruntime
97
- # silero-vad
98
- protobuf==6.33.4
99
- # via onnxruntime
100
33
  psutil==7.2.1 ; sys_platform == 'win32'
101
34
  # via agent-cli
102
35
  pydantic==2.12.5
@@ -107,8 +40,6 @@ pygments==2.19.2
107
40
  # via rich
108
41
  pyperclip==1.11.0
109
42
  # via agent-cli
110
- pyreadline3==3.5.4 ; sys_platform == 'win32'
111
- # via humanfriendly
112
43
  python-dotenv==1.2.1
113
44
  # via dotenv
114
45
  rich==14.2.0
@@ -118,26 +49,12 @@ rich==14.2.0
118
49
  # typer-slim
119
50
  setproctitle==1.3.7
120
51
  # via agent-cli
121
- setuptools==80.9.0 ; python_full_version >= '3.12'
122
- # via torch
123
52
  shellingham==1.5.4
124
53
  # via
125
54
  # typer
126
55
  # typer-slim
127
- silero-vad==6.2.0
56
+ silero-vad-lite==0.2.1
128
57
  # via agent-cli
129
- sympy==1.14.0
130
- # via
131
- # onnxruntime
132
- # torch
133
- torch==2.9.1
134
- # via
135
- # silero-vad
136
- # torchaudio
137
- torchaudio==2.9.1
138
- # via silero-vad
139
- triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
140
- # via torch
141
58
  typer==0.21.1
142
59
  # via agent-cli
143
60
  typer-slim==0.21.1
@@ -147,7 +64,6 @@ typing-extensions==4.15.0
147
64
  # anyio
148
65
  # pydantic
149
66
  # pydantic-core
150
- # torch
151
67
  # typer
152
68
  # typer-slim
153
69
  # typing-inspection
@@ -1,29 +1,4 @@
1
- r"""Wake word-based voice assistant that records when wake word is detected.
2
-
3
- This agent uses Wyoming wake word detection to implement a hands-free voice assistant that:
4
- 1. Continuously listens for a wake word
5
- 2. When the wake word is detected, starts recording user speech
6
- 3. When the wake word is detected again, stops recording and processes the speech
7
- 4. Sends the recorded speech to ASR for transcription
8
- 5. Optionally processes the transcript with an LLM and speaks the response
9
-
10
- WORKFLOW:
11
- 1. Agent starts listening for the specified wake word
12
- 2. First wake word detection -> start recording user speech
13
- 3. Second wake word detection -> stop recording and process the speech
14
- 4. Transcribe the recorded speech using Wyoming ASR
15
- 5. Optionally process with LLM and respond with TTS
16
-
17
- USAGE:
18
- - Start the agent: assistant --wake-word "ok_nabu" --input-device-index 1
19
- - The agent runs continuously until stopped with Ctrl+C or --stop
20
- - Uses background process management for daemon-like operation
21
-
22
- REQUIREMENTS:
23
- - Wyoming wake word server (e.g., wyoming-openwakeword)
24
- - Wyoming ASR server (e.g., wyoming-whisper)
25
- - Optional: Wyoming TTS server for responses
26
- """
1
+ """Wake word-based voice assistant using Wyoming protocol services."""
27
2
 
28
3
  from __future__ import annotations
29
4
 
@@ -313,7 +288,28 @@ def assistant(
313
288
  config_file: str | None = opts.CONFIG_FILE,
314
289
  print_args: bool = opts.PRINT_ARGS,
315
290
  ) -> None:
316
- """Wake word-based voice assistant using local or remote services."""
291
+ """Hands-free voice assistant using wake word detection.
292
+
293
+ Continuously listens for a wake word, then records your speech until you say
294
+ the wake word again. The recording is transcribed and sent to an LLM for a
295
+ conversational response, optionally spoken back via TTS.
296
+
297
+ **Conversation flow:**
298
+ 1. Say wake word → starts recording
299
+ 2. Speak your question/command
300
+ 3. Say wake word again → stops recording and processes
301
+
302
+ The assistant runs in a loop, ready for the next command after each response.
303
+ Stop with Ctrl+C or `--stop`.
304
+
305
+ **Requirements:**
306
+ - Wyoming wake word server (e.g., wyoming-openwakeword on port 10400)
307
+ - Wyoming ASR server (e.g., wyoming-whisper on port 10300)
308
+ - Optional: TTS server for spoken responses (enable with `--tts`)
309
+
310
+ **Example:**
311
+ `assistant --wake-word ok_nabu --tts --input-device-name USB`
312
+ """
317
313
  if print_args:
318
314
  print_command_line_args(locals())
319
315
 
@@ -1,4 +1,4 @@
1
- """Read text from clipboard, correct it using a local or remote LLM, and write the result back to the clipboard."""
1
+ """Fix grammar, spelling, and punctuation in text using an LLM."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -216,7 +216,7 @@ def autocorrect(
216
216
  *,
217
217
  text: str | None = typer.Argument(
218
218
  None,
219
- help="The text to correct. If not provided, reads from clipboard.",
219
+ help="Text to correct. If omitted, reads from system clipboard.",
220
220
  rich_help_panel="General Options",
221
221
  ),
222
222
  # --- Provider Selection ---
@@ -240,7 +240,33 @@ def autocorrect(
240
240
  config_file: str | None = opts.CONFIG_FILE,
241
241
  print_args: bool = opts.PRINT_ARGS,
242
242
  ) -> None:
243
- """Correct text from clipboard using a local or remote LLM."""
243
+ """Fix grammar, spelling, and punctuation using an LLM.
244
+
245
+ Reads text from clipboard (or argument), sends to LLM for correction,
246
+ and copies the result back to clipboard. Only makes technical corrections
247
+ without changing meaning or tone.
248
+
249
+ **Workflow:**
250
+ 1. Read text from clipboard (or `TEXT` argument)
251
+ 2. Send to LLM for grammar/spelling/punctuation fixes
252
+ 3. Copy corrected text to clipboard (unless `--json`)
253
+ 4. Display result
254
+
255
+ **Examples:**
256
+ ```bash
257
+ # Correct text from clipboard (default)
258
+ agent-cli autocorrect
259
+
260
+ # Correct specific text
261
+ agent-cli autocorrect "this is incorect"
262
+
263
+ # Use OpenAI instead of local Ollama
264
+ agent-cli autocorrect --llm-provider openai
265
+
266
+ # Get JSON output for scripting (disables clipboard)
267
+ agent-cli autocorrect --json
268
+ ```
269
+ """
244
270
  if print_args:
245
271
  print_command_line_args(locals())
246
272
 
agent_cli/agents/chat.py CHANGED
@@ -1,13 +1,15 @@
1
- """An chat agent that you can talk to.
2
-
3
- This agent will:
4
- - Listen for your voice command.
5
- - Transcribe the command.
6
- - Send the transcription to an LLM.
7
- - Speak the LLM's response.
8
- - Remember the conversation history.
9
- - Attach timestamps to the saved conversation.
10
- - Format timestamps as "ago" when sending to the LLM.
1
+ """Voice-based conversational chat agent with memory and tools.
2
+
3
+ Runs an interactive voice loop: listens for speech, transcribes it,
4
+ sends to the LLM (with conversation context), and optionally speaks the response.
5
+
6
+ **Available tools** (automatically used by the LLM when relevant):
7
+ - `add_memory`/`search_memory`/`update_memory` - persistent long-term memory
8
+ - `duckduckgo_search` - web search for current information
9
+ - `read_file`/`execute_code` - file access and shell commands
10
+
11
+ **Process management**: Use `--toggle` to start/stop via hotkey, `--stop` to terminate,
12
+ or `--status` to check if running. Useful for binding to a keyboard shortcut.
11
13
  """
12
14
 
13
15
  from __future__ import annotations
@@ -425,14 +427,15 @@ def chat(
425
427
  history_dir: Path = typer.Option( # noqa: B008
426
428
  "~/.config/agent-cli/history",
427
429
  "--history-dir",
428
- help="Directory to store conversation history.",
430
+ help="Directory for conversation history and long-term memory. "
431
+ "Both `conversation.json` and `long_term_memory.json` are stored here.",
429
432
  rich_help_panel="History Options",
430
433
  ),
431
434
  last_n_messages: int = typer.Option(
432
435
  50,
433
436
  "--last-n-messages",
434
- help="Number of messages to include in the conversation history."
435
- " Set to 0 to disable history.",
437
+ help="Number of past messages to include as context for the LLM. "
438
+ "Set to 0 to start fresh each session (memory tools still persist).",
436
439
  rich_help_panel="History Options",
437
440
  ),
438
441
  # --- General Options ---
@@ -444,7 +447,34 @@ def chat(
444
447
  config_file: str | None = opts.CONFIG_FILE,
445
448
  print_args: bool = opts.PRINT_ARGS,
446
449
  ) -> None:
447
- """An chat agent that you can talk to."""
450
+ """Voice-based conversational chat agent with memory and tools.
451
+
452
+ Runs an interactive loop: listen → transcribe → LLM → speak response.
453
+ Conversation history is persisted and included as context for continuity.
454
+
455
+ **Built-in tools** (LLM uses automatically when relevant):
456
+
457
+ - `add_memory`/`search_memory`/`update_memory` - persistent long-term memory
458
+ - `duckduckgo_search` - web search for current information
459
+ - `read_file`/`execute_code` - file access and shell commands
460
+
461
+ **Process management**: Use `--toggle` to start/stop via hotkey (bind to
462
+ a keyboard shortcut), `--stop` to terminate, or `--status` to check state.
463
+
464
+ **Examples**:
465
+
466
+ Use OpenAI-compatible providers for speech and LLM, with TTS enabled:
467
+
468
+ agent-cli chat --asr-provider openai --llm-provider openai --tts
469
+
470
+ Start in background mode (toggle on/off with hotkey):
471
+
472
+ agent-cli chat --toggle
473
+
474
+ Use local Ollama LLM with Wyoming ASR:
475
+
476
+ agent-cli chat --llm-provider ollama
477
+ """
448
478
  if print_args:
449
479
  print_command_line_args(locals())
450
480
 
@@ -9,7 +9,25 @@ from agent_cli.core.process import set_process_title
9
9
 
10
10
  memory_app = typer.Typer(
11
11
  name="memory",
12
- help="Memory system operations (add, proxy, etc.).",
12
+ help="""Long-term memory system for AI chat applications.
13
+
14
+ Provides persistent memory across conversations by storing facts and context
15
+ in Markdown files, with automatic vector indexing for semantic retrieval.
16
+
17
+ **Subcommands:**
18
+
19
+ - `proxy`: Start an OpenAI-compatible proxy that injects relevant memories
20
+ into chat requests and extracts new facts from responses
21
+ - `add`: Manually add facts/memories without going through LLM extraction
22
+
23
+ **Quick Start:**
24
+
25
+ # Start the memory proxy (point your chat client at localhost:8100)
26
+ agent-cli memory proxy --openai-base-url http://localhost:11434/v1
27
+
28
+ # Manually seed some memories
29
+ agent-cli memory add "User prefers dark mode" "User is a Python developer"
30
+ """,
13
31
  add_completion=True,
14
32
  rich_markup_mode="markdown",
15
33
  no_args_is_help=True,
@@ -127,17 +127,17 @@ def add(
127
127
  "default",
128
128
  "--conversation-id",
129
129
  "-c",
130
- help="Conversation ID to add memories to.",
130
+ help="Conversation namespace for these memories. Memories are retrieved per-conversation unless shared globally.",
131
131
  ),
132
132
  memory_path: Path = typer.Option( # noqa: B008
133
133
  "./memory_db",
134
134
  "--memory-path",
135
- help="Path to the memory store.",
135
+ help="Directory for memory storage (same as `memory proxy --memory-path`).",
136
136
  ),
137
137
  git_versioning: bool = typer.Option(
138
138
  True, # noqa: FBT003
139
139
  "--git-versioning/--no-git-versioning",
140
- help="Commit changes to git.",
140
+ help="Auto-commit changes to git for version history.",
141
141
  ),
142
142
  quiet: bool = opts.QUIET,
143
143
  config_file: str | None = opts.CONFIG_FILE,
@@ -19,7 +19,7 @@ from agent_cli.core.utils import console, print_command_line_args
19
19
  def proxy(
20
20
  memory_path: Path = typer.Option( # noqa: B008
21
21
  "./memory_db",
22
- help="Path to the memory store (files + derived vector index).",
22
+ help="Directory for memory storage. Contains `entries/` (Markdown files) and `chroma/` (vector index). Created automatically if it doesn't exist.",
23
23
  rich_help_panel="Memory Configuration",
24
24
  ),
25
25
  openai_base_url: str | None = opts.OPENAI_BASE_URL,
@@ -27,7 +27,7 @@ def proxy(
27
27
  openai_api_key: str | None = opts.OPENAI_API_KEY,
28
28
  default_top_k: int = typer.Option(
29
29
  5,
30
- help="Number of memory entries to retrieve per query.",
30
+ help="Number of relevant memories to inject into each request. Higher values provide more context but increase token usage.",
31
31
  rich_help_panel="Memory Configuration",
32
32
  ),
33
33
  host: str = opts.SERVER_HOST,
@@ -38,7 +38,7 @@ def proxy(
38
38
  ),
39
39
  max_entries: int = typer.Option(
40
40
  500,
41
- help="Maximum stored memory entries per conversation (excluding summary).",
41
+ help="Maximum entries per conversation before oldest are evicted. Summaries are preserved separately.",
42
42
  rich_help_panel="Memory Configuration",
43
43
  ),
44
44
  mmr_lambda: float = typer.Option(
@@ -48,7 +48,7 @@ def proxy(
48
48
  ),
49
49
  recency_weight: float = typer.Option(
50
50
  0.2,
51
- help="Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance).",
51
+ help="Weight for recency vs semantic relevance (0.0-1.0). At 0.2: 20% recency, 80% semantic similarity.",
52
52
  rich_help_panel="Memory Configuration",
53
53
  ),
54
54
  score_threshold: float = typer.Option(
@@ -59,16 +59,16 @@ def proxy(
59
59
  summarization: bool = typer.Option(
60
60
  True, # noqa: FBT003
61
61
  "--summarization/--no-summarization",
62
- help="Enable automatic fact extraction and summaries.",
62
+ help="Extract facts and generate summaries after each turn using the LLM. Disable to only store raw conversation turns.",
63
63
  rich_help_panel="Memory Configuration",
64
64
  ),
65
65
  git_versioning: bool = typer.Option(
66
66
  True, # noqa: FBT003
67
67
  "--git-versioning/--no-git-versioning",
68
- help="Enable automatic git commit of memory changes.",
68
+ help="Auto-commit memory changes to git. Initializes a repo in `--memory-path` if needed. Provides full history of memory evolution.",
69
69
  rich_help_panel="Memory Configuration",
70
70
  ),
71
- log_level: opts.LogLevel = opts.LOG_LEVEL,
71
+ log_level: opts.LogLevel = opts.SERVER_LOG_LEVEL,
72
72
  config_file: str | None = opts.CONFIG_FILE,
73
73
  print_args: bool = opts.PRINT_ARGS,
74
74
  ) -> None:
@@ -78,7 +78,7 @@ def proxy(
78
78
  CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
79
79
  Ollama, vLLM).
80
80
 
81
- Key Features:
81
+ **Key Features:**
82
82
 
83
83
  - **Simple Markdown Files:** Memories are stored as human-readable Markdown
84
84
  files, serving as the ultimate source of truth.
@@ -89,7 +89,7 @@ def proxy(
89
89
  - **Proxy Middleware:** Works transparently with any OpenAI-compatible
90
90
  `/chat/completions` endpoint.
91
91
 
92
- How it works:
92
+ **How it works:**
93
93
 
94
94
  1. Intercepts `POST /v1/chat/completions` requests.
95
95
  2. **Retrieves** relevant memories (facts, previous conversations) from a
@@ -99,8 +99,17 @@ def proxy(
99
99
  5. **Extracts** new facts from the conversation in the background and
100
100
  updates the long-term memory store (including handling contradictions).
101
101
 
102
- Use this to give "long-term memory" to any OpenAI-compatible application.
103
- Point your client's base URL to `http://localhost:8100/v1`.
102
+ **Example:**
103
+
104
+ # Start proxy pointing to local Ollama
105
+ agent-cli memory proxy --openai-base-url http://localhost:11434/v1
106
+
107
+ # Then configure your chat client to use http://localhost:8100/v1
108
+ # as its OpenAI base URL. All requests flow through the memory proxy.
109
+
110
+ **Per-request overrides:** Clients can include these fields in the request
111
+ body: `memory_id` (conversation ID), `memory_top_k`, `memory_recency_weight`,
112
+ `memory_score_threshold`.
104
113
  """
105
114
  if print_args:
106
115
  print_command_line_args(locals())