bareagent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. bareagent/__init__.py +10 -0
  2. bareagent/concurrency/__init__.py +6 -0
  3. bareagent/concurrency/background.py +97 -0
  4. bareagent/concurrency/notification.py +61 -0
  5. bareagent/concurrency/scheduler.py +136 -0
  6. bareagent/config.toml +299 -0
  7. bareagent/core/__init__.py +1 -0
  8. bareagent/core/config_paths.py +49 -0
  9. bareagent/core/context.py +127 -0
  10. bareagent/core/fileutil.py +103 -0
  11. bareagent/core/goal.py +214 -0
  12. bareagent/core/handlers/__init__.py +1 -0
  13. bareagent/core/handlers/bash.py +79 -0
  14. bareagent/core/handlers/file_edit.py +47 -0
  15. bareagent/core/handlers/file_read.py +270 -0
  16. bareagent/core/handlers/file_write.py +34 -0
  17. bareagent/core/handlers/glob_search.py +30 -0
  18. bareagent/core/handlers/goal.py +60 -0
  19. bareagent/core/handlers/grep_search.py +52 -0
  20. bareagent/core/handlers/memory.py +71 -0
  21. bareagent/core/handlers/plan.py +106 -0
  22. bareagent/core/handlers/search_utils.py +77 -0
  23. bareagent/core/handlers/skill.py +87 -0
  24. bareagent/core/handlers/subagent_send.py +70 -0
  25. bareagent/core/handlers/web_fetch.py +126 -0
  26. bareagent/core/handlers/web_search.py +165 -0
  27. bareagent/core/handlers/workflow.py +190 -0
  28. bareagent/core/loop.py +535 -0
  29. bareagent/core/retry.py +131 -0
  30. bareagent/core/sandbox.py +27 -0
  31. bareagent/core/schema.py +21 -0
  32. bareagent/core/tools.py +779 -0
  33. bareagent/core/workflow.py +517 -0
  34. bareagent/core/workflow_registry.py +219 -0
  35. bareagent/debug/__init__.py +0 -0
  36. bareagent/debug/interaction_log.py +263 -0
  37. bareagent/debug/viewer.html +1750 -0
  38. bareagent/debug/web_viewer.py +157 -0
  39. bareagent/hooks/__init__.py +32 -0
  40. bareagent/hooks/config.py +118 -0
  41. bareagent/hooks/engine.py +197 -0
  42. bareagent/hooks/errors.py +14 -0
  43. bareagent/hooks/events.py +22 -0
  44. bareagent/lsp/__init__.py +63 -0
  45. bareagent/lsp/config.py +134 -0
  46. bareagent/lsp/coord.py +118 -0
  47. bareagent/lsp/diagnostics.py +240 -0
  48. bareagent/lsp/errors.py +24 -0
  49. bareagent/lsp/manager.py +866 -0
  50. bareagent/lsp/tools.py +629 -0
  51. bareagent/lsp/workspace_edit.py +305 -0
  52. bareagent/main.py +4205 -0
  53. bareagent/mcp/__init__.py +69 -0
  54. bareagent/mcp/_sse.py +69 -0
  55. bareagent/mcp/client.py +341 -0
  56. bareagent/mcp/config.py +169 -0
  57. bareagent/mcp/errors.py +32 -0
  58. bareagent/mcp/manager.py +318 -0
  59. bareagent/mcp/protocol.py +187 -0
  60. bareagent/mcp/registry.py +557 -0
  61. bareagent/mcp/transport/__init__.py +15 -0
  62. bareagent/mcp/transport/base.py +149 -0
  63. bareagent/mcp/transport/http_legacy.py +192 -0
  64. bareagent/mcp/transport/http_streamable.py +217 -0
  65. bareagent/mcp/transport/stdio.py +202 -0
  66. bareagent/memory/__init__.py +1 -0
  67. bareagent/memory/compact.py +203 -0
  68. bareagent/memory/conversation_io.py +226 -0
  69. bareagent/memory/embedding.py +194 -0
  70. bareagent/memory/persistent.py +515 -0
  71. bareagent/memory/token_counter.py +67 -0
  72. bareagent/memory/token_tracker.py +262 -0
  73. bareagent/memory/transcript.py +100 -0
  74. bareagent/permission/__init__.py +1 -0
  75. bareagent/permission/guard.py +329 -0
  76. bareagent/permission/rules.py +19 -0
  77. bareagent/planning/__init__.py +19 -0
  78. bareagent/planning/agent_types.py +169 -0
  79. bareagent/planning/skill_gen.py +141 -0
  80. bareagent/planning/skill_store.py +173 -0
  81. bareagent/planning/skills.py +146 -0
  82. bareagent/planning/subagent.py +355 -0
  83. bareagent/planning/subagent_registry.py +77 -0
  84. bareagent/planning/tasks.py +348 -0
  85. bareagent/planning/todo.py +153 -0
  86. bareagent/planning/worktree.py +122 -0
  87. bareagent/provider/__init__.py +1 -0
  88. bareagent/provider/anthropic.py +348 -0
  89. bareagent/provider/base.py +136 -0
  90. bareagent/provider/factory.py +130 -0
  91. bareagent/provider/openai.py +881 -0
  92. bareagent/provider/presets.py +72 -0
  93. bareagent/provider/setup.py +356 -0
  94. bareagent/skills/.gitkeep +1 -0
  95. bareagent/skills/code-review/SKILL.md +68 -0
  96. bareagent/skills/git/SKILL.md +68 -0
  97. bareagent/skills/test/SKILL.md +70 -0
  98. bareagent/team/__init__.py +17 -0
  99. bareagent/team/autonomous.py +193 -0
  100. bareagent/team/mailbox.py +239 -0
  101. bareagent/team/manager.py +155 -0
  102. bareagent/team/protocols.py +129 -0
  103. bareagent/tracing/__init__.py +12 -0
  104. bareagent/tracing/_api.py +92 -0
  105. bareagent/tracing/_proxy.py +60 -0
  106. bareagent/tracing/composite.py +115 -0
  107. bareagent/tracing/json_file.py +115 -0
  108. bareagent/tracing/langfuse.py +139 -0
  109. bareagent/tracing/otel.py +107 -0
  110. bareagent/tracing/setup.py +85 -0
  111. bareagent/ui/__init__.py +24 -0
  112. bareagent/ui/console.py +167 -0
  113. bareagent/ui/prompt.py +78 -0
  114. bareagent/ui/protocol.py +24 -0
  115. bareagent/ui/stream.py +66 -0
  116. bareagent/ui/theme.py +240 -0
  117. bareagent_cli-0.1.0.dist-info/METADATA +331 -0
  118. bareagent_cli-0.1.0.dist-info/RECORD +121 -0
  119. bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
  120. bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
  121. bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,515 @@
1
+ """Persistent, file-based agent memory.
2
+
3
+ A memory store is a private directory of Markdown files plus a ``MEMORY.md``
4
+ index. The :class:`MemoryManager` exposes six text-editor-style commands
5
+ (``view`` / ``create`` / ``str_replace`` / ``insert`` / ``delete`` /
6
+ ``rename``) whose contract mirrors the Anthropic memory tool, but it is wired
7
+ as an ordinary client tool so every provider (Anthropic / OpenAI / DeepSeek)
8
+ can use it. The tool itself is content-agnostic; the *meaning* of memory
9
+ (frontmatter classification, the index, the "view before acting" habit) is
10
+ carried to the model through :data:`MEMORY_PROTOCOL`, injected into the system
11
+ prompt.
12
+
13
+ All paths handed to the manager are resolved relative to the memory root and
14
+ validated through :func:`bareagent.core.sandbox.safe_path`, so the model can never
15
+ read or write outside its memory directory.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import re
22
+ import shutil
23
+ import threading
24
+ from dataclasses import dataclass
25
+ from enum import StrEnum
26
+ from pathlib import Path
27
+
28
+ from bareagent.core.fileutil import atomic_write_text
29
+ from bareagent.core.sandbox import safe_path
30
+ from bareagent.memory.embedding import Embedder, EmbeddingCache, cosine, text_hash
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ _INDEX_FILE = "MEMORY.md"
35
+ _EMBED_CACHE_FILE = ".embedding-cache.json"
36
+ # Prefixes the model may prepend out of habit (the native Anthropic tool uses
37
+ # absolute ``/memories/...`` paths). Strip them so paths resolve cleanly under
38
+ # the memory root via ``safe_path``.
39
+ _STRIP_PREFIXES = ("/memories/", "memories/", "/memory/", "memory/")
40
+
41
+
42
+ class MemoryType(StrEnum):
43
+ """Frontmatter ``metadata.type`` classification for a memory file."""
44
+
45
+ USER = "user"
46
+ FEEDBACK = "feedback"
47
+ PROJECT = "project"
48
+ REFERENCE = "reference"
49
+
50
+
51
+ _TYPE_LIST = ", ".join(member.value for member in MemoryType)
52
+
53
+ MEMORY_PROTOCOL = (
54
+ "<memory-protocol>\n"
55
+ "You have a persistent, cross-session memory stored as Markdown files in a "
56
+ "private memory directory. Use the `memory` tool to read and maintain it.\n"
57
+ "Commands: view, create, str_replace, insert, delete, rename. Paths are "
58
+ 'relative to the memory root (e.g. "MEMORY.md", "user/role.md").\n'
59
+ "Protocol:\n"
60
+ "- Before a non-trivial task, `view` the memory directory and read any "
61
+ "relevant memory files.\n"
62
+ "- Persist durable facts the user will rely on across sessions; skip secrets "
63
+ "and details that only matter to the current conversation.\n"
64
+ f"- Each memory is one .md file with YAML frontmatter: name, description, and "
65
+ f"metadata.type (one of: {_TYPE_LIST}).\n"
66
+ "- Keep MEMORY.md as the index: one line per memory "
67
+ "(`- [title](file.md) — hook`). Update it whenever you create, rename, or "
68
+ "delete a memory.\n"
69
+ "</memory-protocol>"
70
+ )
71
+
72
+
73
+ def derive_memory_slug(workspace: Path) -> str:
74
+ """Derive a filesystem-safe slug from a workspace path.
75
+
76
+ ``D:\\code\\BareAgent`` -> ``D-code-BareAgent``. Used to give each project
77
+ its own memory directory under the shared global root.
78
+ """
79
+ resolved = str(workspace.expanduser().resolve())
80
+ slug = re.sub(r"[:/\\]+", "-", resolved).strip("-")
81
+ return slug or "default"
82
+
83
+
84
+ def default_memory_root(workspace: Path) -> Path:
85
+ """Per-project memory directory under the user-global BareAgent home."""
86
+ return Path.home() / ".bareagent" / "projects" / derive_memory_slug(workspace) / "memory"
87
+
88
+
89
+ def resolve_memory_root(workspace: Path, configured_dir: str) -> Path:
90
+ """Resolve the effective memory root from config.
91
+
92
+ Empty ``configured_dir`` falls back to :func:`default_memory_root`. A
93
+ relative override is taken relative to the workspace; an absolute one is
94
+ used as-is.
95
+ """
96
+ configured = configured_dir.strip()
97
+ if not configured:
98
+ return default_memory_root(workspace)
99
+ candidate = Path(configured).expanduser()
100
+ if not candidate.is_absolute():
101
+ candidate = workspace / candidate
102
+ return candidate
103
+
104
+
105
+ def parse_frontmatter(text: str) -> dict[str, str]:
106
+ """Extract top-level string keys from a memory file's YAML frontmatter.
107
+
108
+ This is a deliberately minimal, dependency-free parser (no PyYAML): it only
109
+ understands the shape BareAgent writes — a ``---`` fenced block of
110
+ ``key: value`` lines at the very start of the file. Nested blocks such as
111
+ ``metadata:`` are skipped (only flat top-level scalars are returned).
112
+
113
+ Returns ``{}`` for any input that does not start with a frontmatter fence,
114
+ or whose fence is never closed. Frontmatter is best-effort metadata, not a
115
+ contract, so malformed input degrades to empty rather than raising.
116
+ """
117
+ if not text.startswith("---\n"):
118
+ return {}
119
+ rest = text[len("---\n") :]
120
+ end = rest.find("\n---")
121
+ if end == -1:
122
+ return {}
123
+ block = rest[:end]
124
+ result: dict[str, str] = {}
125
+ for line in block.split("\n"):
126
+ # Only flat top-level keys: indented lines belong to nested blocks
127
+ # (e.g. under ``metadata:``) and are ignored.
128
+ if not line or line[0] in (" ", "\t"):
129
+ continue
130
+ key, sep, value = line.partition(":")
131
+ if not sep:
132
+ continue
133
+ key = key.strip()
134
+ value = value.strip()
135
+ if key and value:
136
+ result[key] = value
137
+ return result
138
+
139
+
140
+ _CJK_RE = re.compile(r"[一-鿿]+")
141
+ _ASCII_WORD_RE = re.compile(r"[a-z0-9]+")
142
+
143
+
144
+ def _lexical_terms(s: str) -> set[str]:
145
+ """Tokenize a string into a bag of lexical terms for relevance scoring.
146
+
147
+ ASCII words are lowercased whole tokens; CJK runs are split into sliding
148
+ bigrams (single-character runs are kept as-is) so that Chinese queries match
149
+ without a real word segmenter.
150
+ """
151
+ lowered = s.lower()
152
+ terms: set[str] = set(_ASCII_WORD_RE.findall(lowered))
153
+ for run in _CJK_RE.findall(s):
154
+ if len(run) < 2:
155
+ terms.add(run)
156
+ continue
157
+ for i in range(len(run) - 1):
158
+ terms.add(run[i : i + 2])
159
+ return terms
160
+
161
+
162
+ def _relevance(query: str, text: str) -> int:
163
+ """Number of lexical terms shared between ``query`` and ``text``."""
164
+ return len(_lexical_terms(query) & _lexical_terms(text))
165
+
166
+
167
+ @dataclass(frozen=True, slots=True)
168
+ class RecalledMemory:
169
+ """A memory file selected by relevance to a query.
170
+
171
+ ``score`` is an integer lexical-overlap count on the lexical path and a
172
+ float cosine similarity on the semantic path; callers only sort on it.
173
+ """
174
+
175
+ path: str
176
+ description: str
177
+ score: float
178
+
179
+
180
+ class MemoryManager:
181
+ """Sandboxed file store backing the ``memory`` tool.
182
+
183
+ Methods raise stdlib exceptions (``FileNotFoundError``, ``ValueError``,
184
+ ``PermissionError``, ...) on predictable failures; the tool handler
185
+ (:func:`bareagent.core.handlers.memory.run_memory`) translates them into
186
+ ``Error:`` strings for the LLM.
187
+ """
188
+
189
+ def __init__(
190
+ self,
191
+ root: Path,
192
+ *,
193
+ max_index_lines: int = 200,
194
+ embedder: Embedder | None = None,
195
+ ) -> None:
196
+ self._root = root.expanduser().resolve()
197
+ self._max_index_lines = max_index_lines
198
+ # Optional semantic-recall backend; None keeps the lexical-only path.
199
+ self._embedder = embedder
200
+ # A single lock serializes read-modify-write commands (str_replace /
201
+ # insert / rename). The agent's memory working set is tiny, so a
202
+ # per-store lock is simpler than per-file and never a bottleneck.
203
+ self._lock = threading.Lock()
204
+ self._root.mkdir(parents=True, exist_ok=True)
205
+
206
+ @property
207
+ def root(self) -> Path:
208
+ return self._root
209
+
210
+ # -- path handling ----------------------------------------------------
211
+
212
+ def _resolve(self, path: str) -> Path:
213
+ """Normalize a model-supplied path and confine it to the memory root."""
214
+ normalized = path.strip().replace("\\", "/")
215
+ for prefix in _STRIP_PREFIXES:
216
+ if normalized.lower().startswith(prefix):
217
+ normalized = normalized[len(prefix) :]
218
+ break
219
+ normalized = normalized.lstrip("/").strip()
220
+ return safe_path(normalized or ".", self._root)
221
+
222
+ def _relative(self, resolved: Path) -> str:
223
+ try:
224
+ rel = resolved.relative_to(self._root).as_posix()
225
+ except ValueError:
226
+ rel = resolved.name
227
+ return rel or "."
228
+
229
+ # -- commands ---------------------------------------------------------
230
+
231
+ def view(self, path: str, view_range: list[int] | None = None) -> str:
232
+ resolved = self._resolve(path)
233
+ rel = self._relative(resolved)
234
+ if resolved.is_dir():
235
+ return self._list_dir(resolved, rel)
236
+ if not resolved.exists():
237
+ raise FileNotFoundError(f"memory path not found: {rel}")
238
+ return self._read_file(resolved, rel, view_range)
239
+
240
+ def create(self, path: str, file_text: str) -> str:
241
+ resolved = self._resolve(path)
242
+ if resolved == self._root:
243
+ raise ValueError("cannot create over the memory root")
244
+ with self._lock:
245
+ atomic_write_text(resolved, file_text)
246
+ return f"Created {self._relative(resolved)} ({len(file_text)} chars)"
247
+
248
+ def str_replace(self, path: str, old_str: str, new_str: str) -> str:
249
+ resolved = self._resolve(path)
250
+ rel = self._relative(resolved)
251
+ if not resolved.is_file():
252
+ raise FileNotFoundError(f"memory file not found: {rel}")
253
+ with self._lock:
254
+ content = resolved.read_text(encoding="utf-8")
255
+ count = content.count(old_str)
256
+ if count == 0:
257
+ raise ValueError(f"old_str not found in {rel}")
258
+ if count > 1:
259
+ raise ValueError(
260
+ f"old_str is not unique in {rel} (found {count} occurrences); "
261
+ "add surrounding context to make it unique"
262
+ )
263
+ atomic_write_text(resolved, content.replace(old_str, new_str, 1))
264
+ return f"Edited {rel}"
265
+
266
+ def insert(self, path: str, insert_line: int, insert_text: str) -> str:
267
+ resolved = self._resolve(path)
268
+ rel = self._relative(resolved)
269
+ if not resolved.is_file():
270
+ raise FileNotFoundError(f"memory file not found: {rel}")
271
+ with self._lock:
272
+ lines = resolved.read_text(encoding="utf-8").split("\n")
273
+ if insert_line < 0 or insert_line > len(lines):
274
+ raise ValueError(
275
+ f"insert_line {insert_line} out of range for {rel} (0..{len(lines)})"
276
+ )
277
+ new_lines = insert_text.split("\n")
278
+ updated = lines[:insert_line] + new_lines + lines[insert_line:]
279
+ atomic_write_text(resolved, "\n".join(updated))
280
+ return f"Inserted {len(new_lines)} line(s) into {rel} after line {insert_line}"
281
+
282
+ def delete(self, path: str) -> str:
283
+ resolved = self._resolve(path)
284
+ rel = self._relative(resolved)
285
+ if resolved == self._root:
286
+ raise ValueError("cannot delete the memory root")
287
+ if not resolved.exists():
288
+ raise FileNotFoundError(f"memory path not found: {rel}")
289
+ with self._lock:
290
+ if resolved.is_dir():
291
+ shutil.rmtree(resolved)
292
+ else:
293
+ resolved.unlink()
294
+ return f"Deleted {rel}"
295
+
296
+ def rename(self, old_path: str, new_path: str) -> str:
297
+ source = self._resolve(old_path)
298
+ target = self._resolve(new_path)
299
+ source_rel = self._relative(source)
300
+ target_rel = self._relative(target)
301
+ if not source.exists():
302
+ raise FileNotFoundError(f"memory path not found: {source_rel}")
303
+ if self._root in (source, target):
304
+ raise ValueError("cannot rename the memory root")
305
+ if target.exists():
306
+ raise ValueError(f"destination already exists: {target_rel}")
307
+ with self._lock:
308
+ target.parent.mkdir(parents=True, exist_ok=True)
309
+ source.replace(target)
310
+ return f"Renamed {source_rel} -> {target_rel}"
311
+
312
+ # -- system-prompt injection -----------------------------------------
313
+
314
+ def system_prompt_section(self) -> str:
315
+ """Return the ``<memory>`` block injected at session start.
316
+
317
+ This is the single seam where memory retrieval is decided: today it
318
+ emits the protocol plus the head of MEMORY.md. A future vector-backed
319
+ store would swap the index head for semantically-selected entries here
320
+ without changing the tool surface.
321
+ """
322
+ index_path = self._root / _INDEX_FILE
323
+ try:
324
+ raw = index_path.read_text(encoding="utf-8").strip()
325
+ except OSError:
326
+ raw = ""
327
+ if raw:
328
+ index_block = "\n".join(raw.split("\n")[: self._max_index_lines])
329
+ else:
330
+ index_block = "(no memories saved yet)"
331
+ return (
332
+ "<memory>\n"
333
+ f"{MEMORY_PROTOCOL}\n"
334
+ f'<memory-index file="{_INDEX_FILE}">\n'
335
+ f"{index_block}\n"
336
+ "</memory-index>\n"
337
+ "</memory>"
338
+ )
339
+
340
+ # -- recall (lexical retrieval) --------------------------------------
341
+
342
+ def recall(self, query: str, k: int = 5) -> list[RecalledMemory]:
343
+ """Return up to ``k`` memories most relevant to ``query``.
344
+
345
+ When a semantic ``embedder`` is configured, ranks by embedding cosine
346
+ similarity (so paraphrases match even without shared terms); otherwise,
347
+ or if embedding fails at call time, falls back to lexical term overlap
348
+ (ASCII words + CJK bigrams). The whole store is rescanned per call — the
349
+ working set is tiny. The return shape is identical on both paths.
350
+ """
351
+ if not query.strip():
352
+ return []
353
+ if self._embedder is not None:
354
+ try:
355
+ return self._semantic_recall(query, k)
356
+ except Exception:
357
+ logger.warning(
358
+ "Semantic recall failed; falling back to lexical recall.",
359
+ exc_info=True,
360
+ )
361
+ return self._lexical_recall(query, k)
362
+
363
+ def _collect_scoring_docs(self) -> list[tuple[str, str, str]]:
364
+ """Scan the store into ``(relpath, scoring_text, description)`` tuples."""
365
+ docs: list[tuple[str, str, str]] = []
366
+ for file in self._root.rglob("*.md"):
367
+ if not file.is_file() or file.name == _INDEX_FILE:
368
+ continue
369
+ try:
370
+ content = file.read_text(encoding="utf-8")
371
+ except OSError:
372
+ continue
373
+ meta = parse_frontmatter(content)
374
+ name = meta.get("name", "")
375
+ description = meta.get("description", "")
376
+ if name or description:
377
+ scoring_text = f"{name} {description}".strip()
378
+ else:
379
+ scoring_text = content[:200]
380
+ rel = file.relative_to(self._root).as_posix()
381
+ docs.append((rel, scoring_text, description or name))
382
+ return docs
383
+
384
+ def _lexical_recall(self, query: str, k: int) -> list[RecalledMemory]:
385
+ scored: list[RecalledMemory] = []
386
+ for rel, scoring_text, description in self._collect_scoring_docs():
387
+ score = _relevance(query, scoring_text)
388
+ if score <= 0:
389
+ continue
390
+ scored.append(
391
+ RecalledMemory(path=rel, description=description, score=float(score))
392
+ )
393
+ # Highest score first; stable secondary sort on path keeps output
394
+ # deterministic when scores tie.
395
+ scored.sort(key=lambda m: (-m.score, m.path))
396
+ return scored[:k]
397
+
398
+ def _semantic_recall(self, query: str, k: int) -> list[RecalledMemory]:
399
+ """Embedding cosine ranking. Raises on embed failure (caller falls back)."""
400
+ docs = self._collect_scoring_docs()
401
+ if not docs:
402
+ return []
403
+ cache = EmbeddingCache(self._root / _EMBED_CACHE_FILE, self._embedder.identity)
404
+ # Embed only cache misses / changed files, in one batch.
405
+ pending: list[tuple[str, str]] = [] # (relpath, content_hash)
406
+ pending_texts: list[str] = []
407
+ for rel, scoring_text, _desc in docs:
408
+ digest = text_hash(scoring_text)
409
+ cached = cache.get(rel)
410
+ if cached is None or cached[0] != digest:
411
+ pending.append((rel, digest))
412
+ pending_texts.append(scoring_text)
413
+ if pending_texts:
414
+ vectors = self._embedder.embed(pending_texts)
415
+ for (rel, digest), vector in zip(pending, vectors, strict=True):
416
+ cache.put(rel, digest, vector)
417
+ cache.prune({rel for rel, _t, _d in docs})
418
+ cache.save()
419
+ query_vector = self._embedder.embed([query])[0]
420
+ scored: list[RecalledMemory] = []
421
+ for rel, _scoring_text, description in docs:
422
+ entry = cache.get(rel)
423
+ if entry is None:
424
+ continue
425
+ score = cosine(query_vector, entry[1])
426
+ if score <= 0:
427
+ continue
428
+ scored.append(
429
+ RecalledMemory(path=rel, description=description, score=score)
430
+ )
431
+ scored.sort(key=lambda m: (-m.score, m.path))
432
+ return scored[:k]
433
+
434
+ def recall_section(self, query: str, k: int = 5) -> str:
435
+ """Render :meth:`recall` results as a ``<memory-recall>`` block.
436
+
437
+ Returns ``""`` when nothing is relevant, so callers can skip injection.
438
+ """
439
+ hits = self.recall(query, k)
440
+ if not hits:
441
+ return ""
442
+ lines = [
443
+ "<memory-recall>",
444
+ "Memories relevant to the current request — read the file with the "
445
+ "memory tool's view command for full content:",
446
+ ]
447
+ for hit in hits:
448
+ lines.append(f"- {hit.path} — {hit.description}")
449
+ lines.append("</memory-recall>")
450
+ return "\n".join(lines)
451
+
452
+ # -- helpers ----------------------------------------------------------
453
+
454
+ def _list_dir(self, resolved: Path, rel: str) -> str:
455
+ header = "Memory root:" if rel == "." else f"Directory {rel}:"
456
+ entries = sorted(resolved.iterdir(), key=lambda p: (p.is_file(), p.name))
457
+ if not entries:
458
+ return f"{header}\n(empty)"
459
+ lines = [header]
460
+ for entry in entries:
461
+ suffix = "/" if entry.is_dir() else ""
462
+ lines.append(f"- {entry.name}{suffix}")
463
+ return "\n".join(lines)
464
+
465
+ def _read_file(self, resolved: Path, rel: str, view_range: list[int] | None) -> str:
466
+ lines = resolved.read_text(encoding="utf-8").split("\n")
467
+ start, end = 1, len(lines)
468
+ if view_range is not None:
469
+ if len(view_range) != 2:
470
+ raise ValueError("view_range must be [start, end]")
471
+ start, end = int(view_range[0]), int(view_range[1])
472
+ if start < 1 or start > len(lines):
473
+ raise ValueError(
474
+ f"view_range start {start} out of range for {rel} (1..{len(lines)})"
475
+ )
476
+ if end != -1 and end < start:
477
+ raise ValueError("view_range end must be >= start")
478
+ if end == -1 or end > len(lines):
479
+ end = len(lines)
480
+ numbered = [f"{i}\t{lines[i - 1]}" for i in range(start, end + 1)]
481
+ return "\n".join(numbered)
482
+
483
+
484
+ def build_remember_instruction(text: str) -> str:
485
+ """Build the user-turn instruction for the ``/remember`` command."""
486
+ if text:
487
+ return (
488
+ "Use the `memory` tool to persist the following information to long-term "
489
+ "memory. Distill it into a concise entry, classify it "
490
+ "(user/feedback/project/reference), create the .md file with proper "
491
+ "frontmatter, and update the MEMORY.md index.\n\n"
492
+ f"Information to remember:\n{text}"
493
+ )
494
+ return (
495
+ "Review our recent conversation and use the `memory` tool to persist anything "
496
+ "worth remembering across sessions (user preferences, feedback, project "
497
+ "context, references). Create concise memory files with frontmatter and update "
498
+ "the MEMORY.md index."
499
+ )
500
+
501
+
502
+ def build_forget_instruction(text: str) -> str:
503
+ """Build the user-turn instruction for the ``/forget`` command."""
504
+ if text:
505
+ return (
506
+ "Use the `memory` tool to forget the following. First `view` MEMORY.md and "
507
+ "the memory directory to locate matching entries, then `delete` the matching "
508
+ "memory file(s) and remove their lines from the MEMORY.md index.\n\n"
509
+ f"What to forget:\n{text}"
510
+ )
511
+ return (
512
+ "Use the `memory` tool to review the memory directory (`view` MEMORY.md) and "
513
+ "remove any memories that are now outdated or incorrect, updating the MEMORY.md "
514
+ "index accordingly."
515
+ )
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import re
5
+ from typing import Any
6
+
7
+ from bareagent.core.fileutil import stringify
8
+
9
+ _CJK_PATTERN = re.compile(
10
+ "["
11
+ "\u3000-\u303f"
12
+ "\u3040-\u309f"
13
+ "\u30a0-\u30ff"
14
+ "\u3400-\u4dbf"
15
+ "\u4e00-\u9fff"
16
+ "\uac00-\ud7af"
17
+ "\uf900-\ufaff"
18
+ "\U00020000-\U0002a6df"
19
+ "\U0002f800-\U0002fa1f"
20
+ "]"
21
+ )
22
+ _ASCII_ALNUM_PATTERN = re.compile(r"[A-Za-z0-9]")
23
+ _WHITESPACE_PATTERN = re.compile(r"\s")
24
+
25
+
26
+ def estimate_tokens(messages: list[dict[str, Any]]) -> int:
27
+ """Estimate token usage with a lightweight character heuristic."""
28
+ total = 0.0
29
+ for message in messages:
30
+ total += _estimate_value(message.get("content"))
31
+ return int(math.ceil(total))
32
+
33
+
34
+ def _estimate_value(value: Any) -> float:
35
+ if value is None:
36
+ return 0.0
37
+ if isinstance(value, str):
38
+ return _estimate_text(value)
39
+ if isinstance(value, list):
40
+ return sum(_estimate_value(item) for item in value)
41
+ if isinstance(value, dict):
42
+ block_type = value.get("type")
43
+ if block_type == "tool_use":
44
+ return _estimate_text(str(value.get("name", ""))) + _estimate_value(
45
+ value.get("input")
46
+ )
47
+
48
+ total = 0.0
49
+ if "text" in value:
50
+ total += _estimate_value(value.get("text"))
51
+ if "content" in value:
52
+ total += _estimate_value(value.get("content"))
53
+ if "input" in value:
54
+ total += _estimate_value(value.get("input"))
55
+ if "name" in value and block_type != "tool_result":
56
+ total += _estimate_text(str(value.get("name", "")))
57
+ return total
58
+
59
+ return _estimate_text(stringify(value))
60
+
61
+
62
+ def _estimate_text(text: str) -> float:
63
+ cjk = len(_CJK_PATTERN.findall(text))
64
+ ascii_alnum = len(_ASCII_ALNUM_PATTERN.findall(text))
65
+ whitespace = len(_WHITESPACE_PATTERN.findall(text))
66
+ other = len(text) - cjk - ascii_alnum - whitespace
67
+ return cjk * 1.5 + ascii_alnum * 0.25 + whitespace * 0.25 + other * 0.5