agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. agent_cli/__init__.py +5 -0
  2. agent_cli/__main__.py +6 -0
  3. agent_cli/_extras.json +14 -0
  4. agent_cli/_requirements/.gitkeep +0 -0
  5. agent_cli/_requirements/audio.txt +79 -0
  6. agent_cli/_requirements/faster-whisper.txt +215 -0
  7. agent_cli/_requirements/kokoro.txt +425 -0
  8. agent_cli/_requirements/llm.txt +183 -0
  9. agent_cli/_requirements/memory.txt +355 -0
  10. agent_cli/_requirements/mlx-whisper.txt +222 -0
  11. agent_cli/_requirements/piper.txt +176 -0
  12. agent_cli/_requirements/rag.txt +402 -0
  13. agent_cli/_requirements/server.txt +154 -0
  14. agent_cli/_requirements/speed.txt +77 -0
  15. agent_cli/_requirements/vad.txt +155 -0
  16. agent_cli/_requirements/wyoming.txt +71 -0
  17. agent_cli/_tools.py +368 -0
  18. agent_cli/agents/__init__.py +23 -0
  19. agent_cli/agents/_voice_agent_common.py +136 -0
  20. agent_cli/agents/assistant.py +383 -0
  21. agent_cli/agents/autocorrect.py +284 -0
  22. agent_cli/agents/chat.py +496 -0
  23. agent_cli/agents/memory/__init__.py +31 -0
  24. agent_cli/agents/memory/add.py +190 -0
  25. agent_cli/agents/memory/proxy.py +160 -0
  26. agent_cli/agents/rag_proxy.py +128 -0
  27. agent_cli/agents/speak.py +209 -0
  28. agent_cli/agents/transcribe.py +671 -0
  29. agent_cli/agents/transcribe_daemon.py +499 -0
  30. agent_cli/agents/voice_edit.py +291 -0
  31. agent_cli/api.py +22 -0
  32. agent_cli/cli.py +106 -0
  33. agent_cli/config.py +503 -0
  34. agent_cli/config_cmd.py +307 -0
  35. agent_cli/constants.py +27 -0
  36. agent_cli/core/__init__.py +1 -0
  37. agent_cli/core/audio.py +461 -0
  38. agent_cli/core/audio_format.py +299 -0
  39. agent_cli/core/chroma.py +88 -0
  40. agent_cli/core/deps.py +191 -0
  41. agent_cli/core/openai_proxy.py +139 -0
  42. agent_cli/core/process.py +195 -0
  43. agent_cli/core/reranker.py +120 -0
  44. agent_cli/core/sse.py +87 -0
  45. agent_cli/core/transcription_logger.py +70 -0
  46. agent_cli/core/utils.py +526 -0
  47. agent_cli/core/vad.py +175 -0
  48. agent_cli/core/watch.py +65 -0
  49. agent_cli/dev/__init__.py +14 -0
  50. agent_cli/dev/cli.py +1588 -0
  51. agent_cli/dev/coding_agents/__init__.py +19 -0
  52. agent_cli/dev/coding_agents/aider.py +24 -0
  53. agent_cli/dev/coding_agents/base.py +167 -0
  54. agent_cli/dev/coding_agents/claude.py +39 -0
  55. agent_cli/dev/coding_agents/codex.py +24 -0
  56. agent_cli/dev/coding_agents/continue_dev.py +15 -0
  57. agent_cli/dev/coding_agents/copilot.py +24 -0
  58. agent_cli/dev/coding_agents/cursor_agent.py +48 -0
  59. agent_cli/dev/coding_agents/gemini.py +28 -0
  60. agent_cli/dev/coding_agents/opencode.py +15 -0
  61. agent_cli/dev/coding_agents/registry.py +49 -0
  62. agent_cli/dev/editors/__init__.py +19 -0
  63. agent_cli/dev/editors/base.py +89 -0
  64. agent_cli/dev/editors/cursor.py +15 -0
  65. agent_cli/dev/editors/emacs.py +46 -0
  66. agent_cli/dev/editors/jetbrains.py +56 -0
  67. agent_cli/dev/editors/nano.py +31 -0
  68. agent_cli/dev/editors/neovim.py +33 -0
  69. agent_cli/dev/editors/registry.py +59 -0
  70. agent_cli/dev/editors/sublime.py +20 -0
  71. agent_cli/dev/editors/vim.py +42 -0
  72. agent_cli/dev/editors/vscode.py +15 -0
  73. agent_cli/dev/editors/zed.py +20 -0
  74. agent_cli/dev/project.py +568 -0
  75. agent_cli/dev/registry.py +52 -0
  76. agent_cli/dev/skill/SKILL.md +141 -0
  77. agent_cli/dev/skill/examples.md +571 -0
  78. agent_cli/dev/terminals/__init__.py +19 -0
  79. agent_cli/dev/terminals/apple_terminal.py +82 -0
  80. agent_cli/dev/terminals/base.py +56 -0
  81. agent_cli/dev/terminals/gnome.py +51 -0
  82. agent_cli/dev/terminals/iterm2.py +84 -0
  83. agent_cli/dev/terminals/kitty.py +77 -0
  84. agent_cli/dev/terminals/registry.py +48 -0
  85. agent_cli/dev/terminals/tmux.py +58 -0
  86. agent_cli/dev/terminals/warp.py +132 -0
  87. agent_cli/dev/terminals/zellij.py +78 -0
  88. agent_cli/dev/worktree.py +856 -0
  89. agent_cli/docs_gen.py +417 -0
  90. agent_cli/example-config.toml +185 -0
  91. agent_cli/install/__init__.py +5 -0
  92. agent_cli/install/common.py +89 -0
  93. agent_cli/install/extras.py +174 -0
  94. agent_cli/install/hotkeys.py +48 -0
  95. agent_cli/install/services.py +87 -0
  96. agent_cli/memory/__init__.py +7 -0
  97. agent_cli/memory/_files.py +250 -0
  98. agent_cli/memory/_filters.py +63 -0
  99. agent_cli/memory/_git.py +157 -0
  100. agent_cli/memory/_indexer.py +142 -0
  101. agent_cli/memory/_ingest.py +408 -0
  102. agent_cli/memory/_persistence.py +182 -0
  103. agent_cli/memory/_prompt.py +91 -0
  104. agent_cli/memory/_retrieval.py +294 -0
  105. agent_cli/memory/_store.py +169 -0
  106. agent_cli/memory/_streaming.py +44 -0
  107. agent_cli/memory/_tasks.py +48 -0
  108. agent_cli/memory/api.py +113 -0
  109. agent_cli/memory/client.py +272 -0
  110. agent_cli/memory/engine.py +361 -0
  111. agent_cli/memory/entities.py +43 -0
  112. agent_cli/memory/models.py +112 -0
  113. agent_cli/opts.py +433 -0
  114. agent_cli/py.typed +0 -0
  115. agent_cli/rag/__init__.py +3 -0
  116. agent_cli/rag/_indexer.py +67 -0
  117. agent_cli/rag/_indexing.py +226 -0
  118. agent_cli/rag/_prompt.py +30 -0
  119. agent_cli/rag/_retriever.py +156 -0
  120. agent_cli/rag/_store.py +48 -0
  121. agent_cli/rag/_utils.py +218 -0
  122. agent_cli/rag/api.py +175 -0
  123. agent_cli/rag/client.py +299 -0
  124. agent_cli/rag/engine.py +302 -0
  125. agent_cli/rag/models.py +55 -0
  126. agent_cli/scripts/.runtime/.gitkeep +0 -0
  127. agent_cli/scripts/__init__.py +1 -0
  128. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  129. agent_cli/scripts/linux-hotkeys/README.md +63 -0
  130. agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
  131. agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
  132. agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
  133. agent_cli/scripts/macos-hotkeys/README.md +45 -0
  134. agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
  135. agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
  136. agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
  137. agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
  138. agent_cli/scripts/nvidia-asr-server/README.md +99 -0
  139. agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
  140. agent_cli/scripts/nvidia-asr-server/server.py +255 -0
  141. agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
  142. agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
  143. agent_cli/scripts/run-openwakeword.sh +11 -0
  144. agent_cli/scripts/run-piper-windows.ps1 +30 -0
  145. agent_cli/scripts/run-piper.sh +24 -0
  146. agent_cli/scripts/run-whisper-linux.sh +40 -0
  147. agent_cli/scripts/run-whisper-macos.sh +6 -0
  148. agent_cli/scripts/run-whisper-windows.ps1 +51 -0
  149. agent_cli/scripts/run-whisper.sh +9 -0
  150. agent_cli/scripts/run_faster_whisper_server.py +136 -0
  151. agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
  152. agent_cli/scripts/setup-linux.sh +108 -0
  153. agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
  154. agent_cli/scripts/setup-macos.sh +76 -0
  155. agent_cli/scripts/setup-windows.ps1 +63 -0
  156. agent_cli/scripts/start-all-services-windows.ps1 +53 -0
  157. agent_cli/scripts/start-all-services.sh +178 -0
  158. agent_cli/scripts/sync_extras.py +138 -0
  159. agent_cli/server/__init__.py +3 -0
  160. agent_cli/server/cli.py +721 -0
  161. agent_cli/server/common.py +222 -0
  162. agent_cli/server/model_manager.py +288 -0
  163. agent_cli/server/model_registry.py +225 -0
  164. agent_cli/server/proxy/__init__.py +3 -0
  165. agent_cli/server/proxy/api.py +444 -0
  166. agent_cli/server/streaming.py +67 -0
  167. agent_cli/server/tts/__init__.py +3 -0
  168. agent_cli/server/tts/api.py +335 -0
  169. agent_cli/server/tts/backends/__init__.py +82 -0
  170. agent_cli/server/tts/backends/base.py +139 -0
  171. agent_cli/server/tts/backends/kokoro.py +403 -0
  172. agent_cli/server/tts/backends/piper.py +253 -0
  173. agent_cli/server/tts/model_manager.py +201 -0
  174. agent_cli/server/tts/model_registry.py +28 -0
  175. agent_cli/server/tts/wyoming_handler.py +249 -0
  176. agent_cli/server/whisper/__init__.py +3 -0
  177. agent_cli/server/whisper/api.py +413 -0
  178. agent_cli/server/whisper/backends/__init__.py +89 -0
  179. agent_cli/server/whisper/backends/base.py +97 -0
  180. agent_cli/server/whisper/backends/faster_whisper.py +225 -0
  181. agent_cli/server/whisper/backends/mlx.py +270 -0
  182. agent_cli/server/whisper/languages.py +116 -0
  183. agent_cli/server/whisper/model_manager.py +157 -0
  184. agent_cli/server/whisper/model_registry.py +28 -0
  185. agent_cli/server/whisper/wyoming_handler.py +203 -0
  186. agent_cli/services/__init__.py +343 -0
  187. agent_cli/services/_wyoming_utils.py +64 -0
  188. agent_cli/services/asr.py +506 -0
  189. agent_cli/services/llm.py +228 -0
  190. agent_cli/services/tts.py +450 -0
  191. agent_cli/services/wake_word.py +142 -0
  192. agent_cli-0.70.5.dist-info/METADATA +2118 -0
  193. agent_cli-0.70.5.dist-info/RECORD +196 -0
  194. agent_cli-0.70.5.dist-info/WHEEL +4 -0
  195. agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
  196. agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,361 @@
1
+ """Core memory engine logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from datetime import UTC, datetime
7
+ from time import perf_counter
8
+ from typing import TYPE_CHECKING, Any
9
+ from uuid import uuid4
10
+
11
+ from fastapi.responses import StreamingResponse
12
+
13
+ from agent_cli.core.openai_proxy import forward_chat_request
14
+ from agent_cli.memory import _streaming
15
+ from agent_cli.memory._git import commit_changes
16
+ from agent_cli.memory._ingest import extract_and_store_facts_and_summaries
17
+ from agent_cli.memory._persistence import evict_if_needed, persist_entries
18
+ from agent_cli.memory._retrieval import augment_chat_request
19
+ from agent_cli.memory._tasks import run_in_background
20
+ from agent_cli.memory.entities import Turn
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import AsyncGenerator, Mapping
24
+ from pathlib import Path
25
+
26
+ from chromadb import Collection
27
+
28
+ from agent_cli.core.reranker import OnnxCrossEncoder
29
+ from agent_cli.memory.models import ChatRequest
30
+
31
+ LOGGER = logging.getLogger(__name__)
32
+
33
+ _DEFAULT_MAX_ENTRIES = 500
34
+ _DEFAULT_MMR_LAMBDA = 0.7
35
+
36
+
37
+ def _elapsed_ms(start: float) -> float:
38
+ """Return elapsed milliseconds since start."""
39
+ return (perf_counter() - start) * 1000
40
+
41
+
42
+ def _latest_user_message(request: ChatRequest) -> str | None:
43
+ """Return the most recent user message, if any."""
44
+ return next((m.content for m in reversed(request.messages) if m.role == "user"), None)
45
+
46
+
47
+ def _assistant_reply_content(response: Mapping[str, Any]) -> str | None:
48
+ """Extract assistant content from a chat completion response."""
49
+ choices = response.get("choices", [])
50
+ if not choices:
51
+ return None
52
+ message = choices[0].get("message")
53
+ assert message is not None
54
+ return message.get("content")
55
+
56
+
57
+ def _persist_turns(
58
+ collection: Collection,
59
+ *,
60
+ memory_root: Path,
61
+ conversation_id: str,
62
+ user_message: str | None,
63
+ assistant_message: str | None,
64
+ user_turn_id: str | None = None,
65
+ ) -> None:
66
+ """Persist the latest user/assistant exchanges."""
67
+ now = datetime.now(UTC)
68
+ entries: list[Turn | None] = []
69
+
70
+ if user_message:
71
+ entries.append(
72
+ Turn(
73
+ id=user_turn_id or str(uuid4()),
74
+ conversation_id=conversation_id,
75
+ role="user",
76
+ content=user_message,
77
+ created_at=now,
78
+ ),
79
+ )
80
+
81
+ if assistant_message:
82
+ entries.append(
83
+ Turn(
84
+ id=str(uuid4()),
85
+ conversation_id=conversation_id,
86
+ role="assistant",
87
+ content=assistant_message,
88
+ created_at=now,
89
+ ),
90
+ )
91
+
92
+ persist_entries(
93
+ collection,
94
+ memory_root=memory_root,
95
+ conversation_id=conversation_id,
96
+ entries=entries, # type: ignore[arg-type]
97
+ )
98
+
99
+
100
+ async def _postprocess_after_turn(
101
+ *,
102
+ collection: Collection,
103
+ memory_root: Path,
104
+ conversation_id: str,
105
+ user_message: str | None,
106
+ assistant_message: str | None,
107
+ openai_base_url: str,
108
+ api_key: str | None,
109
+ enable_summarization: bool,
110
+ model: str,
111
+ max_entries: int,
112
+ enable_git_versioning: bool,
113
+ user_turn_id: str | None = None,
114
+ ) -> None:
115
+ """Run summarization/fact extraction and eviction."""
116
+ post_start = perf_counter()
117
+ summary_start = perf_counter()
118
+ await extract_and_store_facts_and_summaries(
119
+ collection=collection,
120
+ memory_root=memory_root,
121
+ conversation_id=conversation_id,
122
+ user_message=user_message,
123
+ assistant_message=assistant_message,
124
+ openai_base_url=openai_base_url,
125
+ api_key=api_key,
126
+ model=model,
127
+ enable_git_versioning=enable_git_versioning,
128
+ source_id=user_turn_id,
129
+ enable_summarization=enable_summarization,
130
+ )
131
+ LOGGER.info(
132
+ "Updated facts and summaries in %.1f ms (conversation=%s)",
133
+ _elapsed_ms(summary_start),
134
+ conversation_id,
135
+ )
136
+ eviction_start = perf_counter()
137
+ evict_if_needed(collection, memory_root, conversation_id, max_entries)
138
+ LOGGER.info(
139
+ "Eviction check completed in %.1f ms (conversation=%s)",
140
+ _elapsed_ms(eviction_start),
141
+ conversation_id,
142
+ )
143
+ LOGGER.info(
144
+ "Post-processing finished in %.1f ms (conversation=%s, summarization=%s)",
145
+ _elapsed_ms(post_start),
146
+ conversation_id,
147
+ "enabled" if enable_summarization else "disabled",
148
+ )
149
+
150
+ if enable_git_versioning:
151
+ await commit_changes(memory_root, f"Update memory for conversation {conversation_id}")
152
+
153
+
154
+ async def _stream_and_persist_response(
155
+ *,
156
+ forward_payload: dict[str, Any],
157
+ collection: Collection,
158
+ memory_root: Path,
159
+ conversation_id: str,
160
+ user_message: str | None,
161
+ openai_base_url: str,
162
+ api_key: str | None,
163
+ enable_summarization: bool,
164
+ model: str,
165
+ max_entries: int,
166
+ enable_git_versioning: bool,
167
+ user_turn_id: str | None = None,
168
+ ) -> StreamingResponse:
169
+ """Forward streaming request, tee assistant text, and persist after completion."""
170
+ headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
171
+ stream_start = perf_counter()
172
+
173
+ async def _persist_stream_result(assistant_message: str | None) -> None:
174
+ post_start = perf_counter()
175
+ _persist_turns(
176
+ collection,
177
+ memory_root=memory_root,
178
+ conversation_id=conversation_id,
179
+ user_message=None,
180
+ assistant_message=assistant_message,
181
+ user_turn_id=None, # Assistant turn doesn't reuse user ID
182
+ )
183
+ await _postprocess_after_turn(
184
+ collection=collection,
185
+ memory_root=memory_root,
186
+ conversation_id=conversation_id,
187
+ user_message=user_message,
188
+ assistant_message=assistant_message,
189
+ openai_base_url=openai_base_url,
190
+ api_key=api_key,
191
+ enable_summarization=enable_summarization,
192
+ model=model,
193
+ max_entries=max_entries,
194
+ enable_git_versioning=enable_git_versioning,
195
+ user_turn_id=user_turn_id,
196
+ )
197
+ LOGGER.info(
198
+ "Stream post-processing completed in %.1f ms (conversation=%s)",
199
+ _elapsed_ms(post_start),
200
+ conversation_id,
201
+ )
202
+
203
+ async def tee_and_accumulate() -> AsyncGenerator[str, None]:
204
+ assistant_chunks: list[str] = []
205
+ async for line in _streaming.stream_chat_sse(
206
+ openai_base_url=openai_base_url,
207
+ payload=forward_payload,
208
+ headers=headers,
209
+ ):
210
+ _streaming.accumulate_assistant_text(line, assistant_chunks)
211
+ yield line + "\n\n"
212
+ assistant_message = "".join(assistant_chunks).strip() or None
213
+ if assistant_message:
214
+ run_in_background(
215
+ _persist_stream_result(assistant_message),
216
+ label=f"stream-postprocess-{conversation_id}",
217
+ )
218
+ LOGGER.info(
219
+ "Streaming response finished in %.1f ms (conversation=%s)",
220
+ _elapsed_ms(stream_start),
221
+ conversation_id,
222
+ )
223
+
224
+ return StreamingResponse(tee_and_accumulate(), media_type="text/event-stream")
225
+
226
+
227
+ async def process_chat_request(
228
+ request: ChatRequest,
229
+ collection: Collection,
230
+ memory_root: Path,
231
+ openai_base_url: str,
232
+ reranker_model: OnnxCrossEncoder,
233
+ default_top_k: int = 5,
234
+ api_key: str | None = None,
235
+ enable_summarization: bool = True,
236
+ max_entries: int = _DEFAULT_MAX_ENTRIES,
237
+ mmr_lambda: float = _DEFAULT_MMR_LAMBDA,
238
+ recency_weight: float = 0.2,
239
+ score_threshold: float | None = None,
240
+ postprocess_in_background: bool = True,
241
+ enable_git_versioning: bool = False,
242
+ filters: dict[str, Any] | None = None,
243
+ ) -> Any:
244
+ """Process a chat request with long-term memory support."""
245
+ overall_start = perf_counter()
246
+ retrieval_start = perf_counter()
247
+ aug_request, retrieval, conversation_id, _summaries = await augment_chat_request(
248
+ request,
249
+ collection,
250
+ reranker_model=reranker_model,
251
+ default_top_k=default_top_k,
252
+ include_global=True,
253
+ mmr_lambda=mmr_lambda,
254
+ recency_weight=recency_weight,
255
+ score_threshold=score_threshold,
256
+ filters=filters,
257
+ )
258
+ retrieval_ms = _elapsed_ms(retrieval_start)
259
+ hit_count = len(retrieval.entries) if retrieval else 0
260
+ LOGGER.info(
261
+ "Memory retrieval completed in %.1f ms (conversation=%s, hits=%d, top_k=%d)",
262
+ retrieval_ms,
263
+ conversation_id,
264
+ hit_count,
265
+ request.memory_top_k if request.memory_top_k is not None else default_top_k,
266
+ )
267
+
268
+ user_turn_id = str(uuid4())
269
+
270
+ if request.stream:
271
+ LOGGER.info(
272
+ "Forwarding streaming request (conversation=%s, model=%s)",
273
+ conversation_id,
274
+ request.model,
275
+ )
276
+ user_message = _latest_user_message(request)
277
+ _persist_turns(
278
+ collection,
279
+ memory_root=memory_root,
280
+ conversation_id=conversation_id,
281
+ user_message=user_message,
282
+ assistant_message=None,
283
+ user_turn_id=user_turn_id,
284
+ )
285
+ forward_payload = aug_request.model_dump(exclude={"memory_id", "memory_top_k"})
286
+ return await _stream_and_persist_response(
287
+ forward_payload=forward_payload,
288
+ collection=collection,
289
+ memory_root=memory_root,
290
+ conversation_id=conversation_id,
291
+ user_message=user_message,
292
+ openai_base_url=openai_base_url,
293
+ api_key=api_key,
294
+ enable_summarization=enable_summarization,
295
+ model=request.model,
296
+ max_entries=max_entries,
297
+ enable_git_versioning=enable_git_versioning,
298
+ user_turn_id=user_turn_id,
299
+ )
300
+
301
+ llm_start = perf_counter()
302
+ response = await forward_chat_request(
303
+ aug_request,
304
+ openai_base_url,
305
+ api_key,
306
+ exclude_fields={"memory_id", "memory_top_k"},
307
+ )
308
+ LOGGER.info(
309
+ "LLM completion finished in %.1f ms (conversation=%s, model=%s)",
310
+ _elapsed_ms(llm_start),
311
+ conversation_id,
312
+ request.model,
313
+ )
314
+
315
+ if not isinstance(response, dict):
316
+ return response
317
+
318
+ user_message = _latest_user_message(request)
319
+ assistant_message = _assistant_reply_content(response)
320
+
321
+ _persist_turns(
322
+ collection,
323
+ memory_root=memory_root,
324
+ conversation_id=conversation_id,
325
+ user_message=user_message,
326
+ assistant_message=assistant_message,
327
+ user_turn_id=user_turn_id,
328
+ )
329
+
330
+ async def run_postprocess() -> None:
331
+ await _postprocess_after_turn(
332
+ collection=collection,
333
+ memory_root=memory_root,
334
+ conversation_id=conversation_id,
335
+ user_message=user_message,
336
+ assistant_message=assistant_message,
337
+ openai_base_url=openai_base_url,
338
+ api_key=api_key,
339
+ enable_summarization=enable_summarization,
340
+ model=request.model,
341
+ max_entries=max_entries,
342
+ enable_git_versioning=enable_git_versioning,
343
+ user_turn_id=user_turn_id,
344
+ )
345
+
346
+ if postprocess_in_background:
347
+ run_in_background(run_postprocess(), label=f"postprocess-{conversation_id}")
348
+ else:
349
+ await run_postprocess()
350
+
351
+ response["memory_hits"] = (
352
+ [entry.model_dump() for entry in retrieval.entries] if retrieval else []
353
+ )
354
+ LOGGER.info(
355
+ "Request finished in %.1f ms (conversation=%s, hits=%d)",
356
+ _elapsed_ms(overall_start),
357
+ conversation_id,
358
+ hit_count,
359
+ )
360
+
361
+ return response
@@ -0,0 +1,43 @@
1
+ """Domain entities for the memory system.
2
+
3
+ These models represent the "Truth" of the system with strict validation.
4
+ Unlike the storage models (files/DB), these entities do not have optional fields
5
+ where they shouldn't.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from datetime import datetime # noqa: TC003
11
+ from typing import Literal
12
+
13
+ from pydantic import BaseModel, Field
14
+
15
+
16
+ class Turn(BaseModel):
17
+ """A single user or assistant message in the conversation."""
18
+
19
+ id: str = Field(..., description="Unique UUID for this turn")
20
+ conversation_id: str
21
+ role: Literal["user", "assistant"]
22
+ content: str
23
+ created_at: datetime
24
+
25
+
26
+ class Fact(BaseModel):
27
+ """An atomic piece of information extracted from a user message."""
28
+
29
+ id: str = Field(..., description="Unique UUID for this fact")
30
+ conversation_id: str
31
+ content: str
32
+ source_id: str = Field(..., description="UUID of the Turn this fact was extracted from")
33
+ created_at: datetime
34
+ # Facts are always role="memory" implicitly in the storage layer
35
+
36
+
37
+ class Summary(BaseModel):
38
+ """The rolling summary of a conversation."""
39
+
40
+ conversation_id: str
41
+ content: str
42
+ created_at: datetime
43
+ # Summaries are role="summary" implicitly
@@ -0,0 +1,112 @@
1
+ """Memory data models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal
6
+
7
+ from pydantic import BaseModel, ConfigDict, field_validator
8
+
9
+
10
+ class Message(BaseModel):
11
+ """Chat message model."""
12
+
13
+ role: str
14
+ content: str
15
+
16
+
17
+ class ChatRequest(BaseModel):
18
+ """Chat completion request model with long-term memory support."""
19
+
20
+ model_config = ConfigDict(extra="allow")
21
+
22
+ model: str
23
+ messages: list[Message]
24
+ temperature: float | None = 0.7
25
+ max_tokens: int | None = 1000
26
+ stream: bool | None = False
27
+ memory_id: str | None = None
28
+ memory_top_k: int | None = None
29
+ memory_recency_weight: float | None = None
30
+ memory_score_threshold: float | None = None
31
+
32
+
33
+ class MemoryEntry(BaseModel):
34
+ """Stored memory entry."""
35
+
36
+ role: str
37
+ content: str
38
+ created_at: str
39
+ score: float | None = None
40
+
41
+
42
+ class MemoryMetadata(BaseModel):
43
+ """Metadata for a stored memory document."""
44
+
45
+ conversation_id: str
46
+ role: str
47
+ created_at: str
48
+ summary_kind: str | None = None
49
+ replaced_by: str | None = None
50
+ source_id: str | None = None
51
+
52
+
53
+ class SummaryOutput(BaseModel):
54
+ """Structured summary returned by the LLM."""
55
+
56
+ summary: str
57
+
58
+ @field_validator("summary")
59
+ @classmethod
60
+ def _not_empty(cls, v: str) -> str:
61
+ if not v or not str(v).strip():
62
+ msg = "field must be non-empty"
63
+ raise ValueError(msg)
64
+ return str(v).strip()
65
+
66
+
67
+ class StoredMemory(BaseModel):
68
+ """Memory document as stored in the vector DB."""
69
+
70
+ id: str
71
+ content: str
72
+ metadata: MemoryMetadata
73
+ distance: float | None = None
74
+ embedding: list[float] | None = None
75
+
76
+
77
+ class MemoryRetrieval(BaseModel):
78
+ """Result of a memory retrieval operation."""
79
+
80
+ entries: list[MemoryEntry]
81
+
82
+
83
+ class MemoryAdd(BaseModel):
84
+ """Add a new memory fact."""
85
+
86
+ event: Literal["ADD"] = "ADD"
87
+ text: str
88
+
89
+
90
+ class MemoryUpdate(BaseModel):
91
+ """Update an existing memory fact."""
92
+
93
+ event: Literal["UPDATE"] = "UPDATE"
94
+ id: int
95
+ text: str
96
+
97
+
98
+ class MemoryDelete(BaseModel):
99
+ """Delete an existing memory fact."""
100
+
101
+ event: Literal["DELETE"] = "DELETE"
102
+ id: int
103
+
104
+
105
+ class MemoryIgnore(BaseModel):
106
+ """Keep an existing memory as is."""
107
+
108
+ event: Literal["NONE"] = "NONE"
109
+ id: int
110
+
111
+
112
+ MemoryDecision = MemoryAdd | MemoryUpdate | MemoryDelete | MemoryIgnore