PyPI - agent-cli - Versions diffs - 0.70.5__py3-none-any.whl - Mend

agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

agent_cli/__init__.py +5 -0
agent_cli/__main__.py +6 -0
agent_cli/_extras.json +14 -0
agent_cli/_requirements/.gitkeep +0 -0
agent_cli/_requirements/audio.txt +79 -0
agent_cli/_requirements/faster-whisper.txt +215 -0
agent_cli/_requirements/kokoro.txt +425 -0
agent_cli/_requirements/llm.txt +183 -0
agent_cli/_requirements/memory.txt +355 -0
agent_cli/_requirements/mlx-whisper.txt +222 -0
agent_cli/_requirements/piper.txt +176 -0
agent_cli/_requirements/rag.txt +402 -0
agent_cli/_requirements/server.txt +154 -0
agent_cli/_requirements/speed.txt +77 -0
agent_cli/_requirements/vad.txt +155 -0
agent_cli/_requirements/wyoming.txt +71 -0
agent_cli/_tools.py +368 -0
agent_cli/agents/__init__.py +23 -0
agent_cli/agents/_voice_agent_common.py +136 -0
agent_cli/agents/assistant.py +383 -0
agent_cli/agents/autocorrect.py +284 -0
agent_cli/agents/chat.py +496 -0
agent_cli/agents/memory/__init__.py +31 -0
agent_cli/agents/memory/add.py +190 -0
agent_cli/agents/memory/proxy.py +160 -0
agent_cli/agents/rag_proxy.py +128 -0
agent_cli/agents/speak.py +209 -0
agent_cli/agents/transcribe.py +671 -0
agent_cli/agents/transcribe_daemon.py +499 -0
agent_cli/agents/voice_edit.py +291 -0
agent_cli/api.py +22 -0
agent_cli/cli.py +106 -0
agent_cli/config.py +503 -0
agent_cli/config_cmd.py +307 -0
agent_cli/constants.py +27 -0
agent_cli/core/__init__.py +1 -0
agent_cli/core/audio.py +461 -0
agent_cli/core/audio_format.py +299 -0
agent_cli/core/chroma.py +88 -0
agent_cli/core/deps.py +191 -0
agent_cli/core/openai_proxy.py +139 -0
agent_cli/core/process.py +195 -0
agent_cli/core/reranker.py +120 -0
agent_cli/core/sse.py +87 -0
agent_cli/core/transcription_logger.py +70 -0
agent_cli/core/utils.py +526 -0
agent_cli/core/vad.py +175 -0
agent_cli/core/watch.py +65 -0
agent_cli/dev/__init__.py +14 -0
agent_cli/dev/cli.py +1588 -0
agent_cli/dev/coding_agents/__init__.py +19 -0
agent_cli/dev/coding_agents/aider.py +24 -0
agent_cli/dev/coding_agents/base.py +167 -0
agent_cli/dev/coding_agents/claude.py +39 -0
agent_cli/dev/coding_agents/codex.py +24 -0
agent_cli/dev/coding_agents/continue_dev.py +15 -0
agent_cli/dev/coding_agents/copilot.py +24 -0
agent_cli/dev/coding_agents/cursor_agent.py +48 -0
agent_cli/dev/coding_agents/gemini.py +28 -0
agent_cli/dev/coding_agents/opencode.py +15 -0
agent_cli/dev/coding_agents/registry.py +49 -0
agent_cli/dev/editors/__init__.py +19 -0
agent_cli/dev/editors/base.py +89 -0
agent_cli/dev/editors/cursor.py +15 -0
agent_cli/dev/editors/emacs.py +46 -0
agent_cli/dev/editors/jetbrains.py +56 -0
agent_cli/dev/editors/nano.py +31 -0
agent_cli/dev/editors/neovim.py +33 -0
agent_cli/dev/editors/registry.py +59 -0
agent_cli/dev/editors/sublime.py +20 -0
agent_cli/dev/editors/vim.py +42 -0
agent_cli/dev/editors/vscode.py +15 -0
agent_cli/dev/editors/zed.py +20 -0
agent_cli/dev/project.py +568 -0
agent_cli/dev/registry.py +52 -0
agent_cli/dev/skill/SKILL.md +141 -0
agent_cli/dev/skill/examples.md +571 -0
agent_cli/dev/terminals/__init__.py +19 -0
agent_cli/dev/terminals/apple_terminal.py +82 -0
agent_cli/dev/terminals/base.py +56 -0
agent_cli/dev/terminals/gnome.py +51 -0
agent_cli/dev/terminals/iterm2.py +84 -0
agent_cli/dev/terminals/kitty.py +77 -0
agent_cli/dev/terminals/registry.py +48 -0
agent_cli/dev/terminals/tmux.py +58 -0
agent_cli/dev/terminals/warp.py +132 -0
agent_cli/dev/terminals/zellij.py +78 -0
agent_cli/dev/worktree.py +856 -0
agent_cli/docs_gen.py +417 -0
agent_cli/example-config.toml +185 -0
agent_cli/install/__init__.py +5 -0
agent_cli/install/common.py +89 -0
agent_cli/install/extras.py +174 -0
agent_cli/install/hotkeys.py +48 -0
agent_cli/install/services.py +87 -0
agent_cli/memory/__init__.py +7 -0
agent_cli/memory/_files.py +250 -0
agent_cli/memory/_filters.py +63 -0
agent_cli/memory/_git.py +157 -0
agent_cli/memory/_indexer.py +142 -0
agent_cli/memory/_ingest.py +408 -0
agent_cli/memory/_persistence.py +182 -0
agent_cli/memory/_prompt.py +91 -0
agent_cli/memory/_retrieval.py +294 -0
agent_cli/memory/_store.py +169 -0
agent_cli/memory/_streaming.py +44 -0
agent_cli/memory/_tasks.py +48 -0
agent_cli/memory/api.py +113 -0
agent_cli/memory/client.py +272 -0
agent_cli/memory/engine.py +361 -0
agent_cli/memory/entities.py +43 -0
agent_cli/memory/models.py +112 -0
agent_cli/opts.py +433 -0
agent_cli/py.typed +0 -0
agent_cli/rag/__init__.py +3 -0
agent_cli/rag/_indexer.py +67 -0
agent_cli/rag/_indexing.py +226 -0
agent_cli/rag/_prompt.py +30 -0
agent_cli/rag/_retriever.py +156 -0
agent_cli/rag/_store.py +48 -0
agent_cli/rag/_utils.py +218 -0
agent_cli/rag/api.py +175 -0
agent_cli/rag/client.py +299 -0
agent_cli/rag/engine.py +302 -0
agent_cli/rag/models.py +55 -0
agent_cli/scripts/.runtime/.gitkeep +0 -0
agent_cli/scripts/__init__.py +1 -0
agent_cli/scripts/check_plugin_skill_sync.py +50 -0
agent_cli/scripts/linux-hotkeys/README.md +63 -0
agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
agent_cli/scripts/macos-hotkeys/README.md +45 -0
agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
agent_cli/scripts/nvidia-asr-server/README.md +99 -0
agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
agent_cli/scripts/nvidia-asr-server/server.py +255 -0
agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
agent_cli/scripts/run-openwakeword.sh +11 -0
agent_cli/scripts/run-piper-windows.ps1 +30 -0
agent_cli/scripts/run-piper.sh +24 -0
agent_cli/scripts/run-whisper-linux.sh +40 -0
agent_cli/scripts/run-whisper-macos.sh +6 -0
agent_cli/scripts/run-whisper-windows.ps1 +51 -0
agent_cli/scripts/run-whisper.sh +9 -0
agent_cli/scripts/run_faster_whisper_server.py +136 -0
agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
agent_cli/scripts/setup-linux.sh +108 -0
agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
agent_cli/scripts/setup-macos.sh +76 -0
agent_cli/scripts/setup-windows.ps1 +63 -0
agent_cli/scripts/start-all-services-windows.ps1 +53 -0
agent_cli/scripts/start-all-services.sh +178 -0
agent_cli/scripts/sync_extras.py +138 -0
agent_cli/server/__init__.py +3 -0
agent_cli/server/cli.py +721 -0
agent_cli/server/common.py +222 -0
agent_cli/server/model_manager.py +288 -0
agent_cli/server/model_registry.py +225 -0
agent_cli/server/proxy/__init__.py +3 -0
agent_cli/server/proxy/api.py +444 -0
agent_cli/server/streaming.py +67 -0
agent_cli/server/tts/__init__.py +3 -0
agent_cli/server/tts/api.py +335 -0
agent_cli/server/tts/backends/__init__.py +82 -0
agent_cli/server/tts/backends/base.py +139 -0
agent_cli/server/tts/backends/kokoro.py +403 -0
agent_cli/server/tts/backends/piper.py +253 -0
agent_cli/server/tts/model_manager.py +201 -0
agent_cli/server/tts/model_registry.py +28 -0
agent_cli/server/tts/wyoming_handler.py +249 -0
agent_cli/server/whisper/__init__.py +3 -0
agent_cli/server/whisper/api.py +413 -0
agent_cli/server/whisper/backends/__init__.py +89 -0
agent_cli/server/whisper/backends/base.py +97 -0
agent_cli/server/whisper/backends/faster_whisper.py +225 -0
agent_cli/server/whisper/backends/mlx.py +270 -0
agent_cli/server/whisper/languages.py +116 -0
agent_cli/server/whisper/model_manager.py +157 -0
agent_cli/server/whisper/model_registry.py +28 -0
agent_cli/server/whisper/wyoming_handler.py +203 -0
agent_cli/services/__init__.py +343 -0
agent_cli/services/_wyoming_utils.py +64 -0
agent_cli/services/asr.py +506 -0
agent_cli/services/llm.py +228 -0
agent_cli/services/tts.py +450 -0
agent_cli/services/wake_word.py +142 -0
agent_cli-0.70.5.dist-info/METADATA +2118 -0
agent_cli-0.70.5.dist-info/RECORD +196 -0
agent_cli-0.70.5.dist-info/WHEEL +4 -0
agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0

agent_cli/memory/_persistence.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""Persistence logic for memory entries (File + Vector DB)."""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING
+from agent_cli.memory._files import (
+    _DELETED_DIRNAME,
+    ensure_store_dirs,
+    load_snapshot,
+    read_memory_file,
+    soft_delete_memory_file,
+    write_memory_file,
+    write_snapshot,
+)
+from agent_cli.memory._store import delete_entries, list_conversation_entries, upsert_memories
+from agent_cli.memory.entities import Fact, Summary, Turn
+if TYPE_CHECKING:
+    from pathlib import Path
+    from chromadb import Collection
+    from agent_cli.memory.models import MemoryMetadata
+LOGGER = logging.getLogger(__name__)
+_SUMMARY_DOC_ID_SUFFIX = "::summary"
+def _safe_identifier(value: str) -> str:
+    """File/ID safe token preserving readability."""
+    safe = "".join(ch if ch.isalnum() or ch in "-._" else "_" for ch in value)
+    return safe or "entry"
+def persist_entries(
+    collection: Collection,
+    *,
+    memory_root: Path,
+    conversation_id: str,
+    entries: list[Turn | Fact | None],
+) -> None:
+    """Persist a batch of entries to disk and Chroma."""
+    ids: list[str] = []
+    contents: list[str] = []
+    metadatas: list[MemoryMetadata] = []
+    for item in entries:
+        if item is None:
+            continue
+        if isinstance(item, Turn):
+            role: str = item.role
+            source_id = None
+        elif isinstance(item, Fact):
+            role = "memory"
+            source_id = item.source_id
+        else:
+            LOGGER.warning("Unknown entity type in persist_entries: %s", type(item))
+            continue
+        record = write_memory_file(
+            memory_root,
+            conversation_id=conversation_id,
+            role=role,
+            created_at=item.created_at.isoformat(),
+            content=item.content,
+            doc_id=item.id,
+            source_id=source_id,
+        )
+        LOGGER.info("Persisted memory file: %s", record.path)
+        ids.append(record.id)
+        contents.append(record.content)
+        metadatas.append(record.metadata)
+    if ids:
+        upsert_memories(collection, ids=ids, contents=contents, metadatas=metadatas)
+def persist_summary(
+    collection: Collection,
+    *,
+    memory_root: Path,
+    summary: Summary,
+) -> None:
+    """Persist a summary to disk and Chroma."""
+    doc_id = _safe_identifier(f"{summary.conversation_id}{_SUMMARY_DOC_ID_SUFFIX}-summary")
+    record = write_memory_file(
+        memory_root,
+        conversation_id=summary.conversation_id,
+        role="summary",
+        created_at=summary.created_at.isoformat(),
+        content=summary.content,
+        summary_kind="summary",
+        doc_id=doc_id,
+    )
+    upsert_memories(
+        collection,
+        ids=[record.id],
+        contents=[record.content],
+        metadatas=[record.metadata],
+    )
+def delete_memory_files(
+    memory_root: Path,
+    conversation_id: str,
+    ids: list[str],
+    replacement_map: dict[str, str] | None = None,
+) -> None:
+    """Delete markdown files (move to tombstone) and snapshot entries matching the given ids."""
+    if not ids:
+        return
+    entries_dir, snapshot_path = ensure_store_dirs(memory_root)
+    # Ensure we use the correct base for relative paths in soft_delete
+    base_entries_dir = entries_dir
+    conv_dir = entries_dir / _safe_identifier(conversation_id)
+    snapshot = load_snapshot(snapshot_path)
+    replacements = replacement_map or {}
+    removed_ids: set[str] = set()
+    # Prefer precise paths from the snapshot.
+    for doc_id in ids:
+        rec = snapshot.get(doc_id)
+        if rec:
+            soft_delete_memory_file(
+                rec.path,
+                base_entries_dir,
+                replaced_by=replacements.get(doc_id),
+            )
+            snapshot.pop(doc_id, None)
+            removed_ids.add(doc_id)
+    remaining = {doc_id for doc_id in ids if doc_id not in removed_ids}
+    # Fallback: scan the conversation folder for anything not in the snapshot.
+    if remaining and conv_dir.exists():
+        for path in conv_dir.rglob("*.md"):
+            if _DELETED_DIRNAME in path.parts:
+                continue
+            rec = read_memory_file(path)
+            if rec and rec.id in remaining:
+                soft_delete_memory_file(
+                    path,
+                    base_entries_dir,
+                    replaced_by=replacements.get(rec.id),
+                )
+                snapshot.pop(rec.id, None)
+                removed_ids.add(rec.id)
+                remaining.remove(rec.id)
+                if not remaining:
+                    break
+    if removed_ids:
+        write_snapshot(snapshot_path, snapshot.values())
+def evict_if_needed(
+    collection: Collection,
+    memory_root: Path,
+    conversation_id: str,
+    max_entries: int,
+) -> None:
+    """Evict oldest non-summary entries beyond the max budget."""
+    if max_entries <= 0:
+        return
+    entries = list_conversation_entries(collection, conversation_id, include_summary=False)
+    if len(entries) <= max_entries:
+        return
+    # Sort by created_at asc
+    sorted_entries = sorted(
+        entries,
+        key=lambda e: e.metadata.created_at,
+    )
+    overflow = sorted_entries[:-max_entries]
+    ids_to_remove = [e.id for e in overflow]
+    delete_entries(collection, ids_to_remove)
+    delete_memory_files(memory_root, conversation_id, ids_to_remove)

agent_cli/memory/_prompt.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Centralized prompts for memory LLM calls."""
+FACT_SYSTEM_PROMPT = """
+You are a memory extractor. From the latest exchange, return 1-3 concise fact sentences based ONLY on user messages.
+Guidelines:
+- If there is no meaningful fact, return [].
+- Ignore assistant/system content completely.
+- Facts must be short, readable sentences (e.g., "The user's wife is Anne.", "Planning a trip to Japan next spring.").
+- Do not return acknowledgements, questions, or meta statements; only factual statements from the user.
+- NEVER output refusals like "I cannot..." or "I don't know..." or "I don't have that information". If you can't extract a fact, return [].
+- Return a JSON list of strings.
+Few-shots:
+- Input: User: "Hi." / Assistant: "Hello" -> []
+- Input: User: "My wife is Anne." / Assistant: "Got it." -> ["The user's wife is Anne."]
+- Input: User: "I like biking on weekends." / Assistant: "Cool!" -> ["User likes biking on weekends."]
+""".strip()
+FACT_INSTRUCTIONS = """
+Return only factual sentences grounded in the user text. No assistant acknowledgements or meta-text.
+""".strip()
+UPDATE_MEMORY_PROMPT = """You are a smart memory manager which controls the memory of a system.
+You can perform four operations: (1) ADD into the memory, (2) UPDATE the memory, (3) DELETE from the memory, and (4) NONE (no change).
+Compare new facts with existing memory. For each new fact, decide whether to:
+- ADD: Add it to the memory as a new element (new information not present in any existing memory)
+- UPDATE: Update an existing memory element (only if facts are about THE SAME TOPIC, e.g., both about pizza preferences)
+- DELETE: Delete an existing memory element (if new fact explicitly contradicts it)
+- NONE: Make no change (if fact is already present, a duplicate, or the existing memory is unrelated to new facts)
+**Guidelines:**
+1. **ADD**: If the new fact contains new information not present in any existing memory, add it with a new ID.
+   - Existing unrelated memories should have event "NONE".
+- **Example**:
+    - Current memory: [{"id": 0, "text": "User is a software engineer"}]
+    - New facts: ["Name is John"]
+    - Output: [
+        {"id": 0, "text": "User is a software engineer", "event": "NONE"},
+        {"id": 1, "text": "Name is John", "event": "ADD"}
+      ]
+2. **UPDATE**: Only if the new fact refines/expands an existing memory about THE SAME TOPIC.
+   - Keep the same ID, update the text.
+   - Example: "User likes pizza" + "User loves pepperoni pizza" → UPDATE (same topic: pizza)
+   - Example: "Met Sarah today" + "Went running" → NOT same topic, do NOT update!
+- **Example**:
+    - Current memory: [{"id": 0, "text": "User likes pizza"}]
+    - New facts: ["User loves pepperoni pizza"]
+    - Output: [{"id": 0, "text": "User loves pepperoni pizza", "event": "UPDATE"}]
+3. **DELETE**: If the new fact explicitly contradicts an existing memory.
+- **Example**:
+    - Current memory: [{"id": 0, "text": "Loves pizza"}, {"id": 1, "text": "Name is John"}]
+    - New facts: ["Hates pizza"]
+    - Output: [
+        {"id": 0, "text": "Loves pizza", "event": "DELETE"},
+        {"id": 1, "text": "Name is John", "event": "NONE"},
+        {"id": 2, "text": "Hates pizza", "event": "ADD"}
+      ]
+4. **NONE**: If the new fact is already present or existing memory is unrelated to new facts.
+- **Example**:
+    - Current memory: [{"id": 0, "text": "Name is John"}]
+    - New facts: ["Name is John"]
+    - Output: [{"id": 0, "text": "Name is John", "event": "NONE"}]
+5. **IMPORTANT - Unrelated topics example**:
+    - Current memory: [{"id": 0, "text": "Met Sarah to discuss quantum computing"}]
+    - New facts: ["Went for a 5km run"]
+    - These are COMPLETELY DIFFERENT topics (meeting vs running). Do NOT use UPDATE!
+    - Output: [
+        {"id": 0, "text": "Met Sarah to discuss quantum computing", "event": "NONE"},
+        {"id": 1, "text": "Went for a 5km run", "event": "ADD"}
+      ]
+**CRITICAL RULES:**
+- You MUST return ALL memories (existing + new) in your response.
+- Each existing memory MUST have an event (NONE, UPDATE, or DELETE).
+- Each genuinely NEW fact (not related to any existing memory) MUST be ADDed with a new ID.
+- Do NOT use UPDATE for unrelated topics! "Met Sarah" and "Went running" are DIFFERENT topics → use NONE for existing + ADD for new.
+Return ONLY a JSON list. No prose or code fences.""".strip()
+SUMMARY_PROMPT = """
+You are a concise conversation summarizer. Update the running summary with the new facts.
+Keep it brief, factual, and focused on durable information; do not restate transient chit-chat.
+Prefer aggregating related facts into compact statements; drop redundancies.
+""".strip()

agent_cli/memory/_retrieval.py ADDED Viewed

@@ -0,0 +1,294 @@
+"""Retrieval logic for memory (Reading, Reranking, MMR)."""
+from __future__ import annotations
+import logging
+import math
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Any
+from agent_cli.core.reranker import OnnxCrossEncoder, predict_relevance
+from agent_cli.memory._store import get_summary_entry, query_memories
+from agent_cli.memory.models import (
+    ChatRequest,
+    MemoryEntry,
+    MemoryMetadata,
+    MemoryRetrieval,
+    Message,
+    StoredMemory,
+)
+if TYPE_CHECKING:
+    from chromadb import Collection
+LOGGER = logging.getLogger(__name__)
+_DEFAULT_MMR_LAMBDA = 0.7
+_SUMMARY_ROLE = "summary"
+_MIN_MAX_EPSILON = 1e-8  # Avoid division by zero in min-max normalization
+def gather_relevant_existing_memories(
+    collection: Collection,
+    conversation_id: str,
+    new_facts: list[str],
+    *,
+    neighborhood: int = 5,
+) -> list[StoredMemory]:
+    """Retrieve a small neighborhood of existing memories per new fact, deduped by id."""
+    if not new_facts:
+        return []
+    filters = [
+        {"conversation_id": conversation_id},
+        {"role": "memory"},
+        {"role": {"$ne": "summary"}},
+    ]
+    seen: set[str] = set()
+    results: list[StoredMemory] = []
+    for fact in new_facts:
+        raw = collection.query(query_texts=[fact], n_results=neighborhood, where={"$and": filters})
+        docs = raw.get("documents", [[]])[0] or []
+        metas = raw.get("metadatas", [[]])[0] or []
+        ids = raw.get("ids", [[]])[0] or []
+        distances = raw.get("distances", [[]])[0] or []
+        for doc, meta, doc_id, dist in zip(docs, metas, ids, distances, strict=False):
+            assert doc_id is not None
+            if doc_id in seen:
+                continue
+            seen.add(doc_id)
+            norm_meta = MemoryMetadata(**dict(meta))
+            results.append(
+                StoredMemory(
+                    id=doc_id,
+                    content=doc,
+                    metadata=norm_meta,
+                    distance=float(dist) if dist is not None else None,
+                ),
+            )
+    return results
+def mmr_select(
+    candidates: list[StoredMemory],
+    scores: list[float],
+    *,
+    max_items: int,
+    lambda_mult: float,
+) -> list[tuple[StoredMemory, float]]:
+    """Apply Maximal Marginal Relevance to promote diversity."""
+    if not candidates or max_items <= 0:
+        return []
+    def _normalize(vec: list[float] | None) -> list[float] | None:
+        if not vec:
+            return None
+        norm = sum(x * x for x in vec) ** 0.5
+        if norm == 0:
+            return None
+        return [x / norm for x in vec]
+    def _cosine(a: list[float] | None, b: list[float] | None) -> float:
+        if not a or not b or len(a) != len(b):
+            return 0.0
+        return sum(x * y for x, y in zip(a, b, strict=False))
+    normalized_embeddings: list[list[float] | None] = [
+        _normalize(mem.embedding) for mem in candidates
+    ]
+    selected: list[int] = []
+    candidate_indices = list(range(len(candidates)))
+    # Start with top scorer
+    first_idx = max(candidate_indices, key=lambda i: scores[i])
+    selected.append(first_idx)
+    candidate_indices.remove(first_idx)
+    while candidate_indices and len(selected) < max_items:
+        best_idx = None
+        best_score = float("-inf")
+        for idx in candidate_indices:
+            relevance = scores[idx]
+            redundancy = max(
+                (_cosine(normalized_embeddings[idx], normalized_embeddings[s]) for s in selected),
+                default=0.0,
+            )
+            mmr_score = lambda_mult * relevance - (1 - lambda_mult) * redundancy
+            if mmr_score > best_score:
+                best_score = mmr_score
+                best_idx = idx
+        if best_idx is None:
+            break
+        selected.append(best_idx)
+        candidate_indices.remove(best_idx)
+    return [(candidates[i], scores[i]) for i in selected]
+def retrieve_memory(
+    collection: Collection,
+    *,
+    conversation_id: str,
+    query: str,
+    top_k: int,
+    reranker_model: OnnxCrossEncoder,
+    include_global: bool = True,
+    include_summary: bool = True,
+    mmr_lambda: float = _DEFAULT_MMR_LAMBDA,
+    recency_weight: float = 0.2,
+    score_threshold: float | None = None,
+    filters: dict[str, Any] | None = None,
+) -> tuple[MemoryRetrieval, list[str]]:
+    """Execute search + rerank + recency + MMR."""
+    candidate_conversations = [conversation_id]
+    if include_global and conversation_id != "global":
+        candidate_conversations.append("global")
+    raw_candidates: list[StoredMemory] = []
+    seen_ids: set[str] = set()
+    for cid in candidate_conversations:
+        records = query_memories(
+            collection,
+            conversation_id=cid,
+            text=query,
+            n_results=top_k * 3,
+            filters=filters,
+        )
+        for rec in records:
+            rec_id = rec.id
+            if rec_id in seen_ids:
+                continue
+            seen_ids.add(rec_id)
+            raw_candidates.append(rec)
+    def _min_max_normalize(scores: list[float]) -> list[float]:
+        """Normalize scores to 0-1 range using min-max scaling."""
+        if not scores:
+            return scores
+        min_score = min(scores)
+        max_score = max(scores)
+        if max_score - min_score < _MIN_MAX_EPSILON:
+            return [0.5] * len(scores)  # All scores equal
+        return [(s - min_score) / (max_score - min_score) for s in scores]
+    def recency_score(meta: MemoryMetadata) -> float:
+        dt = datetime.fromisoformat(meta.created_at)
+        age_days = max((datetime.now(UTC) - dt).total_seconds() / 86400.0, 0.0)
+        # Exponential decay: ~0.36 score at 30 days
+        return math.exp(-age_days / 30.0)
+    final_candidates: list[StoredMemory] = []
+    scores: list[float] = []
+    if raw_candidates:
+        pairs = [(query, mem.content) for mem in raw_candidates]
+        rr_scores = predict_relevance(reranker_model, pairs)
+        # Normalize raw reranker scores to 0-1 range
+        normalized_scores = _min_max_normalize(rr_scores)
+        for mem, relevance in zip(raw_candidates, normalized_scores, strict=False):
+            # Filter out low-relevance memories if threshold is set
+            if score_threshold is not None and relevance < score_threshold:
+                continue
+            recency = recency_score(mem.metadata)
+            # Weighted blend
+            total = (1.0 - recency_weight) * relevance + recency_weight * recency
+            scores.append(total)
+            final_candidates.append(mem)
+    selected = mmr_select(final_candidates, scores, max_items=top_k, lambda_mult=mmr_lambda)
+    entries: list[MemoryEntry] = [
+        MemoryEntry(
+            role=mem.metadata.role,
+            content=mem.content,
+            created_at=mem.metadata.created_at,
+            score=score,
+        )
+        for mem, score in selected
+    ]
+    summaries: list[str] = []
+    if include_summary:
+        summary_entry = get_summary_entry(collection, conversation_id, role=_SUMMARY_ROLE)
+        if summary_entry:
+            summaries.append(f"Conversation summary:\n{summary_entry.content}")
+    return MemoryRetrieval(entries=entries), summaries
+def format_augmented_content(
+    *,
+    user_message: str,
+    summaries: list[str],
+    memories: list[MemoryEntry],
+) -> str:
+    """Format the prompt content with injected memories."""
+    parts: list[str] = []
+    if summaries:
+        parts.append("Conversation summaries:\n" + "\n\n".join(summaries))
+    if memories:
+        memory_block = "\n\n---\n\n".join(f"[{m.role}] {m.content}" for m in memories)
+        parts.append(f"Long-term memory (most relevant first):\n{memory_block}")
+    parts.append(f"Current message: {user_message}")
+    return "\n\n---\n\n".join(parts)
+async def augment_chat_request(
+    request: ChatRequest,
+    collection: Collection,
+    reranker_model: OnnxCrossEncoder,
+    default_top_k: int = 5,
+    default_memory_id: str = "default",
+    include_global: bool = True,
+    mmr_lambda: float = _DEFAULT_MMR_LAMBDA,
+    recency_weight: float = 0.2,
+    score_threshold: float | None = None,
+    filters: dict[str, Any] | None = None,
+) -> tuple[ChatRequest, MemoryRetrieval | None, str, list[str]]:
+    """Retrieve memory context and augment the chat request."""
+    user_message = next(
+        (m.content for m in reversed(request.messages) if m.role == "user"),
+        None,
+    )
+    if not user_message:
+        return request, None, default_memory_id, []
+    conversation_id = request.memory_id or default_memory_id
+    top_k = request.memory_top_k if request.memory_top_k is not None else default_top_k
+    if top_k <= 0:
+        LOGGER.info("Memory retrieval disabled for this request (top_k=%s)", top_k)
+        return request, None, conversation_id, []
+    retrieval, summaries = retrieve_memory(
+        collection,
+        conversation_id=conversation_id,
+        query=user_message,
+        top_k=top_k,
+        reranker_model=reranker_model,
+        include_global=include_global,
+        mmr_lambda=mmr_lambda,
+        recency_weight=recency_weight,
+        score_threshold=score_threshold,
+        filters=filters,
+    )
+    if not retrieval.entries and not summaries:
+        return request, None, conversation_id, summaries
+    augmented_content = format_augmented_content(
+        user_message=user_message,
+        summaries=summaries,
+        memories=retrieval.entries,
+    )
+    augmented_messages = list(request.messages[:-1])
+    augmented_messages.append(Message(role="user", content=augmented_content))
+    aug_request = request.model_copy()
+    aug_request.messages = augmented_messages
+    return aug_request, retrieval, conversation_id, summaries