agent-cli 0.70.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/__init__.py +5 -0
- agent_cli/__main__.py +6 -0
- agent_cli/_extras.json +14 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/_tools.py +368 -0
- agent_cli/agents/__init__.py +23 -0
- agent_cli/agents/_voice_agent_common.py +136 -0
- agent_cli/agents/assistant.py +383 -0
- agent_cli/agents/autocorrect.py +284 -0
- agent_cli/agents/chat.py +496 -0
- agent_cli/agents/memory/__init__.py +31 -0
- agent_cli/agents/memory/add.py +190 -0
- agent_cli/agents/memory/proxy.py +160 -0
- agent_cli/agents/rag_proxy.py +128 -0
- agent_cli/agents/speak.py +209 -0
- agent_cli/agents/transcribe.py +671 -0
- agent_cli/agents/transcribe_daemon.py +499 -0
- agent_cli/agents/voice_edit.py +291 -0
- agent_cli/api.py +22 -0
- agent_cli/cli.py +106 -0
- agent_cli/config.py +503 -0
- agent_cli/config_cmd.py +307 -0
- agent_cli/constants.py +27 -0
- agent_cli/core/__init__.py +1 -0
- agent_cli/core/audio.py +461 -0
- agent_cli/core/audio_format.py +299 -0
- agent_cli/core/chroma.py +88 -0
- agent_cli/core/deps.py +191 -0
- agent_cli/core/openai_proxy.py +139 -0
- agent_cli/core/process.py +195 -0
- agent_cli/core/reranker.py +120 -0
- agent_cli/core/sse.py +87 -0
- agent_cli/core/transcription_logger.py +70 -0
- agent_cli/core/utils.py +526 -0
- agent_cli/core/vad.py +175 -0
- agent_cli/core/watch.py +65 -0
- agent_cli/dev/__init__.py +14 -0
- agent_cli/dev/cli.py +1588 -0
- agent_cli/dev/coding_agents/__init__.py +19 -0
- agent_cli/dev/coding_agents/aider.py +24 -0
- agent_cli/dev/coding_agents/base.py +167 -0
- agent_cli/dev/coding_agents/claude.py +39 -0
- agent_cli/dev/coding_agents/codex.py +24 -0
- agent_cli/dev/coding_agents/continue_dev.py +15 -0
- agent_cli/dev/coding_agents/copilot.py +24 -0
- agent_cli/dev/coding_agents/cursor_agent.py +48 -0
- agent_cli/dev/coding_agents/gemini.py +28 -0
- agent_cli/dev/coding_agents/opencode.py +15 -0
- agent_cli/dev/coding_agents/registry.py +49 -0
- agent_cli/dev/editors/__init__.py +19 -0
- agent_cli/dev/editors/base.py +89 -0
- agent_cli/dev/editors/cursor.py +15 -0
- agent_cli/dev/editors/emacs.py +46 -0
- agent_cli/dev/editors/jetbrains.py +56 -0
- agent_cli/dev/editors/nano.py +31 -0
- agent_cli/dev/editors/neovim.py +33 -0
- agent_cli/dev/editors/registry.py +59 -0
- agent_cli/dev/editors/sublime.py +20 -0
- agent_cli/dev/editors/vim.py +42 -0
- agent_cli/dev/editors/vscode.py +15 -0
- agent_cli/dev/editors/zed.py +20 -0
- agent_cli/dev/project.py +568 -0
- agent_cli/dev/registry.py +52 -0
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/terminals/__init__.py +19 -0
- agent_cli/dev/terminals/apple_terminal.py +82 -0
- agent_cli/dev/terminals/base.py +56 -0
- agent_cli/dev/terminals/gnome.py +51 -0
- agent_cli/dev/terminals/iterm2.py +84 -0
- agent_cli/dev/terminals/kitty.py +77 -0
- agent_cli/dev/terminals/registry.py +48 -0
- agent_cli/dev/terminals/tmux.py +58 -0
- agent_cli/dev/terminals/warp.py +132 -0
- agent_cli/dev/terminals/zellij.py +78 -0
- agent_cli/dev/worktree.py +856 -0
- agent_cli/docs_gen.py +417 -0
- agent_cli/example-config.toml +185 -0
- agent_cli/install/__init__.py +5 -0
- agent_cli/install/common.py +89 -0
- agent_cli/install/extras.py +174 -0
- agent_cli/install/hotkeys.py +48 -0
- agent_cli/install/services.py +87 -0
- agent_cli/memory/__init__.py +7 -0
- agent_cli/memory/_files.py +250 -0
- agent_cli/memory/_filters.py +63 -0
- agent_cli/memory/_git.py +157 -0
- agent_cli/memory/_indexer.py +142 -0
- agent_cli/memory/_ingest.py +408 -0
- agent_cli/memory/_persistence.py +182 -0
- agent_cli/memory/_prompt.py +91 -0
- agent_cli/memory/_retrieval.py +294 -0
- agent_cli/memory/_store.py +169 -0
- agent_cli/memory/_streaming.py +44 -0
- agent_cli/memory/_tasks.py +48 -0
- agent_cli/memory/api.py +113 -0
- agent_cli/memory/client.py +272 -0
- agent_cli/memory/engine.py +361 -0
- agent_cli/memory/entities.py +43 -0
- agent_cli/memory/models.py +112 -0
- agent_cli/opts.py +433 -0
- agent_cli/py.typed +0 -0
- agent_cli/rag/__init__.py +3 -0
- agent_cli/rag/_indexer.py +67 -0
- agent_cli/rag/_indexing.py +226 -0
- agent_cli/rag/_prompt.py +30 -0
- agent_cli/rag/_retriever.py +156 -0
- agent_cli/rag/_store.py +48 -0
- agent_cli/rag/_utils.py +218 -0
- agent_cli/rag/api.py +175 -0
- agent_cli/rag/client.py +299 -0
- agent_cli/rag/engine.py +302 -0
- agent_cli/rag/models.py +55 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/__init__.py +1 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/linux-hotkeys/README.md +63 -0
- agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
- agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
- agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
- agent_cli/scripts/macos-hotkeys/README.md +45 -0
- agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
- agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
- agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
- agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
- agent_cli/scripts/nvidia-asr-server/README.md +99 -0
- agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
- agent_cli/scripts/nvidia-asr-server/server.py +255 -0
- agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
- agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
- agent_cli/scripts/run-openwakeword.sh +11 -0
- agent_cli/scripts/run-piper-windows.ps1 +30 -0
- agent_cli/scripts/run-piper.sh +24 -0
- agent_cli/scripts/run-whisper-linux.sh +40 -0
- agent_cli/scripts/run-whisper-macos.sh +6 -0
- agent_cli/scripts/run-whisper-windows.ps1 +51 -0
- agent_cli/scripts/run-whisper.sh +9 -0
- agent_cli/scripts/run_faster_whisper_server.py +136 -0
- agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
- agent_cli/scripts/setup-linux.sh +108 -0
- agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
- agent_cli/scripts/setup-macos.sh +76 -0
- agent_cli/scripts/setup-windows.ps1 +63 -0
- agent_cli/scripts/start-all-services-windows.ps1 +53 -0
- agent_cli/scripts/start-all-services.sh +178 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/__init__.py +3 -0
- agent_cli/server/cli.py +721 -0
- agent_cli/server/common.py +222 -0
- agent_cli/server/model_manager.py +288 -0
- agent_cli/server/model_registry.py +225 -0
- agent_cli/server/proxy/__init__.py +3 -0
- agent_cli/server/proxy/api.py +444 -0
- agent_cli/server/streaming.py +67 -0
- agent_cli/server/tts/__init__.py +3 -0
- agent_cli/server/tts/api.py +335 -0
- agent_cli/server/tts/backends/__init__.py +82 -0
- agent_cli/server/tts/backends/base.py +139 -0
- agent_cli/server/tts/backends/kokoro.py +403 -0
- agent_cli/server/tts/backends/piper.py +253 -0
- agent_cli/server/tts/model_manager.py +201 -0
- agent_cli/server/tts/model_registry.py +28 -0
- agent_cli/server/tts/wyoming_handler.py +249 -0
- agent_cli/server/whisper/__init__.py +3 -0
- agent_cli/server/whisper/api.py +413 -0
- agent_cli/server/whisper/backends/__init__.py +89 -0
- agent_cli/server/whisper/backends/base.py +97 -0
- agent_cli/server/whisper/backends/faster_whisper.py +225 -0
- agent_cli/server/whisper/backends/mlx.py +270 -0
- agent_cli/server/whisper/languages.py +116 -0
- agent_cli/server/whisper/model_manager.py +157 -0
- agent_cli/server/whisper/model_registry.py +28 -0
- agent_cli/server/whisper/wyoming_handler.py +203 -0
- agent_cli/services/__init__.py +343 -0
- agent_cli/services/_wyoming_utils.py +64 -0
- agent_cli/services/asr.py +506 -0
- agent_cli/services/llm.py +228 -0
- agent_cli/services/tts.py +450 -0
- agent_cli/services/wake_word.py +142 -0
- agent_cli-0.70.5.dist-info/METADATA +2118 -0
- agent_cli-0.70.5.dist-info/RECORD +196 -0
- agent_cli-0.70.5.dist-info/WHEEL +4 -0
- agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
- agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""Core memory engine logic."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from time import perf_counter
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
|
|
11
|
+
from fastapi.responses import StreamingResponse
|
|
12
|
+
|
|
13
|
+
from agent_cli.core.openai_proxy import forward_chat_request
|
|
14
|
+
from agent_cli.memory import _streaming
|
|
15
|
+
from agent_cli.memory._git import commit_changes
|
|
16
|
+
from agent_cli.memory._ingest import extract_and_store_facts_and_summaries
|
|
17
|
+
from agent_cli.memory._persistence import evict_if_needed, persist_entries
|
|
18
|
+
from agent_cli.memory._retrieval import augment_chat_request
|
|
19
|
+
from agent_cli.memory._tasks import run_in_background
|
|
20
|
+
from agent_cli.memory.entities import Turn
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from collections.abc import AsyncGenerator, Mapping
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from chromadb import Collection
|
|
27
|
+
|
|
28
|
+
from agent_cli.core.reranker import OnnxCrossEncoder
|
|
29
|
+
from agent_cli.memory.models import ChatRequest
|
|
30
|
+
|
|
31
|
+
LOGGER = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
_DEFAULT_MAX_ENTRIES = 500
|
|
34
|
+
_DEFAULT_MMR_LAMBDA = 0.7
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _elapsed_ms(start: float) -> float:
|
|
38
|
+
"""Return elapsed milliseconds since start."""
|
|
39
|
+
return (perf_counter() - start) * 1000
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _latest_user_message(request: ChatRequest) -> str | None:
|
|
43
|
+
"""Return the most recent user message, if any."""
|
|
44
|
+
return next((m.content for m in reversed(request.messages) if m.role == "user"), None)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _assistant_reply_content(response: Mapping[str, Any]) -> str | None:
|
|
48
|
+
"""Extract assistant content from a chat completion response."""
|
|
49
|
+
choices = response.get("choices", [])
|
|
50
|
+
if not choices:
|
|
51
|
+
return None
|
|
52
|
+
message = choices[0].get("message")
|
|
53
|
+
assert message is not None
|
|
54
|
+
return message.get("content")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _persist_turns(
|
|
58
|
+
collection: Collection,
|
|
59
|
+
*,
|
|
60
|
+
memory_root: Path,
|
|
61
|
+
conversation_id: str,
|
|
62
|
+
user_message: str | None,
|
|
63
|
+
assistant_message: str | None,
|
|
64
|
+
user_turn_id: str | None = None,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Persist the latest user/assistant exchanges."""
|
|
67
|
+
now = datetime.now(UTC)
|
|
68
|
+
entries: list[Turn | None] = []
|
|
69
|
+
|
|
70
|
+
if user_message:
|
|
71
|
+
entries.append(
|
|
72
|
+
Turn(
|
|
73
|
+
id=user_turn_id or str(uuid4()),
|
|
74
|
+
conversation_id=conversation_id,
|
|
75
|
+
role="user",
|
|
76
|
+
content=user_message,
|
|
77
|
+
created_at=now,
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if assistant_message:
|
|
82
|
+
entries.append(
|
|
83
|
+
Turn(
|
|
84
|
+
id=str(uuid4()),
|
|
85
|
+
conversation_id=conversation_id,
|
|
86
|
+
role="assistant",
|
|
87
|
+
content=assistant_message,
|
|
88
|
+
created_at=now,
|
|
89
|
+
),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
persist_entries(
|
|
93
|
+
collection,
|
|
94
|
+
memory_root=memory_root,
|
|
95
|
+
conversation_id=conversation_id,
|
|
96
|
+
entries=entries, # type: ignore[arg-type]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def _postprocess_after_turn(
|
|
101
|
+
*,
|
|
102
|
+
collection: Collection,
|
|
103
|
+
memory_root: Path,
|
|
104
|
+
conversation_id: str,
|
|
105
|
+
user_message: str | None,
|
|
106
|
+
assistant_message: str | None,
|
|
107
|
+
openai_base_url: str,
|
|
108
|
+
api_key: str | None,
|
|
109
|
+
enable_summarization: bool,
|
|
110
|
+
model: str,
|
|
111
|
+
max_entries: int,
|
|
112
|
+
enable_git_versioning: bool,
|
|
113
|
+
user_turn_id: str | None = None,
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Run summarization/fact extraction and eviction."""
|
|
116
|
+
post_start = perf_counter()
|
|
117
|
+
summary_start = perf_counter()
|
|
118
|
+
await extract_and_store_facts_and_summaries(
|
|
119
|
+
collection=collection,
|
|
120
|
+
memory_root=memory_root,
|
|
121
|
+
conversation_id=conversation_id,
|
|
122
|
+
user_message=user_message,
|
|
123
|
+
assistant_message=assistant_message,
|
|
124
|
+
openai_base_url=openai_base_url,
|
|
125
|
+
api_key=api_key,
|
|
126
|
+
model=model,
|
|
127
|
+
enable_git_versioning=enable_git_versioning,
|
|
128
|
+
source_id=user_turn_id,
|
|
129
|
+
enable_summarization=enable_summarization,
|
|
130
|
+
)
|
|
131
|
+
LOGGER.info(
|
|
132
|
+
"Updated facts and summaries in %.1f ms (conversation=%s)",
|
|
133
|
+
_elapsed_ms(summary_start),
|
|
134
|
+
conversation_id,
|
|
135
|
+
)
|
|
136
|
+
eviction_start = perf_counter()
|
|
137
|
+
evict_if_needed(collection, memory_root, conversation_id, max_entries)
|
|
138
|
+
LOGGER.info(
|
|
139
|
+
"Eviction check completed in %.1f ms (conversation=%s)",
|
|
140
|
+
_elapsed_ms(eviction_start),
|
|
141
|
+
conversation_id,
|
|
142
|
+
)
|
|
143
|
+
LOGGER.info(
|
|
144
|
+
"Post-processing finished in %.1f ms (conversation=%s, summarization=%s)",
|
|
145
|
+
_elapsed_ms(post_start),
|
|
146
|
+
conversation_id,
|
|
147
|
+
"enabled" if enable_summarization else "disabled",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if enable_git_versioning:
|
|
151
|
+
await commit_changes(memory_root, f"Update memory for conversation {conversation_id}")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
async def _stream_and_persist_response(
|
|
155
|
+
*,
|
|
156
|
+
forward_payload: dict[str, Any],
|
|
157
|
+
collection: Collection,
|
|
158
|
+
memory_root: Path,
|
|
159
|
+
conversation_id: str,
|
|
160
|
+
user_message: str | None,
|
|
161
|
+
openai_base_url: str,
|
|
162
|
+
api_key: str | None,
|
|
163
|
+
enable_summarization: bool,
|
|
164
|
+
model: str,
|
|
165
|
+
max_entries: int,
|
|
166
|
+
enable_git_versioning: bool,
|
|
167
|
+
user_turn_id: str | None = None,
|
|
168
|
+
) -> StreamingResponse:
|
|
169
|
+
"""Forward streaming request, tee assistant text, and persist after completion."""
|
|
170
|
+
headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
|
|
171
|
+
stream_start = perf_counter()
|
|
172
|
+
|
|
173
|
+
async def _persist_stream_result(assistant_message: str | None) -> None:
|
|
174
|
+
post_start = perf_counter()
|
|
175
|
+
_persist_turns(
|
|
176
|
+
collection,
|
|
177
|
+
memory_root=memory_root,
|
|
178
|
+
conversation_id=conversation_id,
|
|
179
|
+
user_message=None,
|
|
180
|
+
assistant_message=assistant_message,
|
|
181
|
+
user_turn_id=None, # Assistant turn doesn't reuse user ID
|
|
182
|
+
)
|
|
183
|
+
await _postprocess_after_turn(
|
|
184
|
+
collection=collection,
|
|
185
|
+
memory_root=memory_root,
|
|
186
|
+
conversation_id=conversation_id,
|
|
187
|
+
user_message=user_message,
|
|
188
|
+
assistant_message=assistant_message,
|
|
189
|
+
openai_base_url=openai_base_url,
|
|
190
|
+
api_key=api_key,
|
|
191
|
+
enable_summarization=enable_summarization,
|
|
192
|
+
model=model,
|
|
193
|
+
max_entries=max_entries,
|
|
194
|
+
enable_git_versioning=enable_git_versioning,
|
|
195
|
+
user_turn_id=user_turn_id,
|
|
196
|
+
)
|
|
197
|
+
LOGGER.info(
|
|
198
|
+
"Stream post-processing completed in %.1f ms (conversation=%s)",
|
|
199
|
+
_elapsed_ms(post_start),
|
|
200
|
+
conversation_id,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
async def tee_and_accumulate() -> AsyncGenerator[str, None]:
|
|
204
|
+
assistant_chunks: list[str] = []
|
|
205
|
+
async for line in _streaming.stream_chat_sse(
|
|
206
|
+
openai_base_url=openai_base_url,
|
|
207
|
+
payload=forward_payload,
|
|
208
|
+
headers=headers,
|
|
209
|
+
):
|
|
210
|
+
_streaming.accumulate_assistant_text(line, assistant_chunks)
|
|
211
|
+
yield line + "\n\n"
|
|
212
|
+
assistant_message = "".join(assistant_chunks).strip() or None
|
|
213
|
+
if assistant_message:
|
|
214
|
+
run_in_background(
|
|
215
|
+
_persist_stream_result(assistant_message),
|
|
216
|
+
label=f"stream-postprocess-{conversation_id}",
|
|
217
|
+
)
|
|
218
|
+
LOGGER.info(
|
|
219
|
+
"Streaming response finished in %.1f ms (conversation=%s)",
|
|
220
|
+
_elapsed_ms(stream_start),
|
|
221
|
+
conversation_id,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return StreamingResponse(tee_and_accumulate(), media_type="text/event-stream")
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
async def process_chat_request(
|
|
228
|
+
request: ChatRequest,
|
|
229
|
+
collection: Collection,
|
|
230
|
+
memory_root: Path,
|
|
231
|
+
openai_base_url: str,
|
|
232
|
+
reranker_model: OnnxCrossEncoder,
|
|
233
|
+
default_top_k: int = 5,
|
|
234
|
+
api_key: str | None = None,
|
|
235
|
+
enable_summarization: bool = True,
|
|
236
|
+
max_entries: int = _DEFAULT_MAX_ENTRIES,
|
|
237
|
+
mmr_lambda: float = _DEFAULT_MMR_LAMBDA,
|
|
238
|
+
recency_weight: float = 0.2,
|
|
239
|
+
score_threshold: float | None = None,
|
|
240
|
+
postprocess_in_background: bool = True,
|
|
241
|
+
enable_git_versioning: bool = False,
|
|
242
|
+
filters: dict[str, Any] | None = None,
|
|
243
|
+
) -> Any:
|
|
244
|
+
"""Process a chat request with long-term memory support."""
|
|
245
|
+
overall_start = perf_counter()
|
|
246
|
+
retrieval_start = perf_counter()
|
|
247
|
+
aug_request, retrieval, conversation_id, _summaries = await augment_chat_request(
|
|
248
|
+
request,
|
|
249
|
+
collection,
|
|
250
|
+
reranker_model=reranker_model,
|
|
251
|
+
default_top_k=default_top_k,
|
|
252
|
+
include_global=True,
|
|
253
|
+
mmr_lambda=mmr_lambda,
|
|
254
|
+
recency_weight=recency_weight,
|
|
255
|
+
score_threshold=score_threshold,
|
|
256
|
+
filters=filters,
|
|
257
|
+
)
|
|
258
|
+
retrieval_ms = _elapsed_ms(retrieval_start)
|
|
259
|
+
hit_count = len(retrieval.entries) if retrieval else 0
|
|
260
|
+
LOGGER.info(
|
|
261
|
+
"Memory retrieval completed in %.1f ms (conversation=%s, hits=%d, top_k=%d)",
|
|
262
|
+
retrieval_ms,
|
|
263
|
+
conversation_id,
|
|
264
|
+
hit_count,
|
|
265
|
+
request.memory_top_k if request.memory_top_k is not None else default_top_k,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
user_turn_id = str(uuid4())
|
|
269
|
+
|
|
270
|
+
if request.stream:
|
|
271
|
+
LOGGER.info(
|
|
272
|
+
"Forwarding streaming request (conversation=%s, model=%s)",
|
|
273
|
+
conversation_id,
|
|
274
|
+
request.model,
|
|
275
|
+
)
|
|
276
|
+
user_message = _latest_user_message(request)
|
|
277
|
+
_persist_turns(
|
|
278
|
+
collection,
|
|
279
|
+
memory_root=memory_root,
|
|
280
|
+
conversation_id=conversation_id,
|
|
281
|
+
user_message=user_message,
|
|
282
|
+
assistant_message=None,
|
|
283
|
+
user_turn_id=user_turn_id,
|
|
284
|
+
)
|
|
285
|
+
forward_payload = aug_request.model_dump(exclude={"memory_id", "memory_top_k"})
|
|
286
|
+
return await _stream_and_persist_response(
|
|
287
|
+
forward_payload=forward_payload,
|
|
288
|
+
collection=collection,
|
|
289
|
+
memory_root=memory_root,
|
|
290
|
+
conversation_id=conversation_id,
|
|
291
|
+
user_message=user_message,
|
|
292
|
+
openai_base_url=openai_base_url,
|
|
293
|
+
api_key=api_key,
|
|
294
|
+
enable_summarization=enable_summarization,
|
|
295
|
+
model=request.model,
|
|
296
|
+
max_entries=max_entries,
|
|
297
|
+
enable_git_versioning=enable_git_versioning,
|
|
298
|
+
user_turn_id=user_turn_id,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
llm_start = perf_counter()
|
|
302
|
+
response = await forward_chat_request(
|
|
303
|
+
aug_request,
|
|
304
|
+
openai_base_url,
|
|
305
|
+
api_key,
|
|
306
|
+
exclude_fields={"memory_id", "memory_top_k"},
|
|
307
|
+
)
|
|
308
|
+
LOGGER.info(
|
|
309
|
+
"LLM completion finished in %.1f ms (conversation=%s, model=%s)",
|
|
310
|
+
_elapsed_ms(llm_start),
|
|
311
|
+
conversation_id,
|
|
312
|
+
request.model,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
if not isinstance(response, dict):
|
|
316
|
+
return response
|
|
317
|
+
|
|
318
|
+
user_message = _latest_user_message(request)
|
|
319
|
+
assistant_message = _assistant_reply_content(response)
|
|
320
|
+
|
|
321
|
+
_persist_turns(
|
|
322
|
+
collection,
|
|
323
|
+
memory_root=memory_root,
|
|
324
|
+
conversation_id=conversation_id,
|
|
325
|
+
user_message=user_message,
|
|
326
|
+
assistant_message=assistant_message,
|
|
327
|
+
user_turn_id=user_turn_id,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
async def run_postprocess() -> None:
|
|
331
|
+
await _postprocess_after_turn(
|
|
332
|
+
collection=collection,
|
|
333
|
+
memory_root=memory_root,
|
|
334
|
+
conversation_id=conversation_id,
|
|
335
|
+
user_message=user_message,
|
|
336
|
+
assistant_message=assistant_message,
|
|
337
|
+
openai_base_url=openai_base_url,
|
|
338
|
+
api_key=api_key,
|
|
339
|
+
enable_summarization=enable_summarization,
|
|
340
|
+
model=request.model,
|
|
341
|
+
max_entries=max_entries,
|
|
342
|
+
enable_git_versioning=enable_git_versioning,
|
|
343
|
+
user_turn_id=user_turn_id,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
if postprocess_in_background:
|
|
347
|
+
run_in_background(run_postprocess(), label=f"postprocess-{conversation_id}")
|
|
348
|
+
else:
|
|
349
|
+
await run_postprocess()
|
|
350
|
+
|
|
351
|
+
response["memory_hits"] = (
|
|
352
|
+
[entry.model_dump() for entry in retrieval.entries] if retrieval else []
|
|
353
|
+
)
|
|
354
|
+
LOGGER.info(
|
|
355
|
+
"Request finished in %.1f ms (conversation=%s, hits=%d)",
|
|
356
|
+
_elapsed_ms(overall_start),
|
|
357
|
+
conversation_id,
|
|
358
|
+
hit_count,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
return response
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Domain entities for the memory system.
|
|
2
|
+
|
|
3
|
+
These models represent the "Truth" of the system with strict validation.
|
|
4
|
+
Unlike the storage models (files/DB), these entities do not have optional fields
|
|
5
|
+
where they shouldn't.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from datetime import datetime # noqa: TC003
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Turn(BaseModel):
|
|
17
|
+
"""A single user or assistant message in the conversation."""
|
|
18
|
+
|
|
19
|
+
id: str = Field(..., description="Unique UUID for this turn")
|
|
20
|
+
conversation_id: str
|
|
21
|
+
role: Literal["user", "assistant"]
|
|
22
|
+
content: str
|
|
23
|
+
created_at: datetime
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Fact(BaseModel):
|
|
27
|
+
"""An atomic piece of information extracted from a user message."""
|
|
28
|
+
|
|
29
|
+
id: str = Field(..., description="Unique UUID for this fact")
|
|
30
|
+
conversation_id: str
|
|
31
|
+
content: str
|
|
32
|
+
source_id: str = Field(..., description="UUID of the Turn this fact was extracted from")
|
|
33
|
+
created_at: datetime
|
|
34
|
+
# Facts are always role="memory" implicitly in the storage layer
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Summary(BaseModel):
|
|
38
|
+
"""The rolling summary of a conversation."""
|
|
39
|
+
|
|
40
|
+
conversation_id: str
|
|
41
|
+
content: str
|
|
42
|
+
created_at: datetime
|
|
43
|
+
# Summaries are role="summary" implicitly
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Memory data models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, field_validator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Message(BaseModel):
|
|
11
|
+
"""Chat message model."""
|
|
12
|
+
|
|
13
|
+
role: str
|
|
14
|
+
content: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ChatRequest(BaseModel):
|
|
18
|
+
"""Chat completion request model with long-term memory support."""
|
|
19
|
+
|
|
20
|
+
model_config = ConfigDict(extra="allow")
|
|
21
|
+
|
|
22
|
+
model: str
|
|
23
|
+
messages: list[Message]
|
|
24
|
+
temperature: float | None = 0.7
|
|
25
|
+
max_tokens: int | None = 1000
|
|
26
|
+
stream: bool | None = False
|
|
27
|
+
memory_id: str | None = None
|
|
28
|
+
memory_top_k: int | None = None
|
|
29
|
+
memory_recency_weight: float | None = None
|
|
30
|
+
memory_score_threshold: float | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class MemoryEntry(BaseModel):
|
|
34
|
+
"""Stored memory entry."""
|
|
35
|
+
|
|
36
|
+
role: str
|
|
37
|
+
content: str
|
|
38
|
+
created_at: str
|
|
39
|
+
score: float | None = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MemoryMetadata(BaseModel):
|
|
43
|
+
"""Metadata for a stored memory document."""
|
|
44
|
+
|
|
45
|
+
conversation_id: str
|
|
46
|
+
role: str
|
|
47
|
+
created_at: str
|
|
48
|
+
summary_kind: str | None = None
|
|
49
|
+
replaced_by: str | None = None
|
|
50
|
+
source_id: str | None = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SummaryOutput(BaseModel):
|
|
54
|
+
"""Structured summary returned by the LLM."""
|
|
55
|
+
|
|
56
|
+
summary: str
|
|
57
|
+
|
|
58
|
+
@field_validator("summary")
|
|
59
|
+
@classmethod
|
|
60
|
+
def _not_empty(cls, v: str) -> str:
|
|
61
|
+
if not v or not str(v).strip():
|
|
62
|
+
msg = "field must be non-empty"
|
|
63
|
+
raise ValueError(msg)
|
|
64
|
+
return str(v).strip()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class StoredMemory(BaseModel):
|
|
68
|
+
"""Memory document as stored in the vector DB."""
|
|
69
|
+
|
|
70
|
+
id: str
|
|
71
|
+
content: str
|
|
72
|
+
metadata: MemoryMetadata
|
|
73
|
+
distance: float | None = None
|
|
74
|
+
embedding: list[float] | None = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class MemoryRetrieval(BaseModel):
|
|
78
|
+
"""Result of a memory retrieval operation."""
|
|
79
|
+
|
|
80
|
+
entries: list[MemoryEntry]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class MemoryAdd(BaseModel):
|
|
84
|
+
"""Add a new memory fact."""
|
|
85
|
+
|
|
86
|
+
event: Literal["ADD"] = "ADD"
|
|
87
|
+
text: str
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class MemoryUpdate(BaseModel):
|
|
91
|
+
"""Update an existing memory fact."""
|
|
92
|
+
|
|
93
|
+
event: Literal["UPDATE"] = "UPDATE"
|
|
94
|
+
id: int
|
|
95
|
+
text: str
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class MemoryDelete(BaseModel):
|
|
99
|
+
"""Delete an existing memory fact."""
|
|
100
|
+
|
|
101
|
+
event: Literal["DELETE"] = "DELETE"
|
|
102
|
+
id: int
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class MemoryIgnore(BaseModel):
|
|
106
|
+
"""Keep an existing memory as is."""
|
|
107
|
+
|
|
108
|
+
event: Literal["NONE"] = "NONE"
|
|
109
|
+
id: int
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
MemoryDecision = MemoryAdd | MemoryUpdate | MemoryDelete | MemoryIgnore
|