mnemo-dev 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mnemo/__init__.py +3 -0
- mnemo/analyzers/__init__.py +108 -0
- mnemo/api_discovery/__init__.py +248 -0
- mnemo/chunking.py +136 -0
- mnemo/cli.py +186 -0
- mnemo/clients.py +147 -0
- mnemo/code_review/__init__.py +68 -0
- mnemo/config.py +30 -0
- mnemo/dependency_graph/__init__.py +126 -0
- mnemo/doctor.py +118 -0
- mnemo/embeddings/__init__.py +47 -0
- mnemo/errors/__init__.py +81 -0
- mnemo/health/__init__.py +103 -0
- mnemo/incidents/__init__.py +90 -0
- mnemo/init.py +167 -0
- mnemo/intelligence/__init__.py +323 -0
- mnemo/knowledge/__init__.py +118 -0
- mnemo/mcp_server.py +458 -0
- mnemo/memory.py +250 -0
- mnemo/onboarding/__init__.py +86 -0
- mnemo/repo_map.py +357 -0
- mnemo/retrieval.py +31 -0
- mnemo/sprint/__init__.py +102 -0
- mnemo/storage.py +215 -0
- mnemo/team_graph/__init__.py +96 -0
- mnemo/test_intel/__init__.py +111 -0
- mnemo/vector_index/__init__.py +180 -0
- mnemo/workspace/__init__.py +224 -0
- mnemo_dev-0.1.0.dist-info/METADATA +644 -0
- mnemo_dev-0.1.0.dist-info/RECORD +33 -0
- mnemo_dev-0.1.0.dist-info/WHEEL +5 -0
- mnemo_dev-0.1.0.dist-info/entry_points.txt +3 -0
- mnemo_dev-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Local vector index with optional ChromaDB backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Protocol
|
|
11
|
+
|
|
12
|
+
from ..chunking import Chunk
|
|
13
|
+
from ..embeddings import KeywordEmbeddingProvider
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_CHROMA_INSTALL_ATTEMPTED = False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _auto_install_chromadb() -> bool:
|
|
20
|
+
"""Attempt to install chromadb automatically. Only tries once per process."""
|
|
21
|
+
global _CHROMA_INSTALL_ATTEMPTED
|
|
22
|
+
if _CHROMA_INSTALL_ATTEMPTED:
|
|
23
|
+
return False
|
|
24
|
+
_CHROMA_INSTALL_ATTEMPTED = True
|
|
25
|
+
|
|
26
|
+
if getattr(sys, "frozen", False):
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
# Only auto-install if triggered by `mnemo init`, not on every recall
|
|
30
|
+
if os.environ.get("MNEMO_AUTO_INSTALL") != "1":
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
subprocess.check_call(
|
|
35
|
+
[sys.executable, "-m", "pip", "install", "chromadb>=0.5", "--quiet"],
|
|
36
|
+
stdout=subprocess.DEVNULL,
|
|
37
|
+
stderr=subprocess.DEVNULL,
|
|
38
|
+
)
|
|
39
|
+
return True
|
|
40
|
+
except Exception:
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class VectorIndex(Protocol):
|
|
45
|
+
def available(self) -> bool:
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
def upsert(self, namespace: str, chunks: list[Chunk]) -> None:
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
def query(self, namespace: str, query: str, limit: int = 10, filters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
def clear(self, namespace: str | None = None) -> None:
|
|
55
|
+
...
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class _MemoryRecord:
|
|
60
|
+
id: str
|
|
61
|
+
text: str
|
|
62
|
+
metadata: dict[str, Any]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class LocalVectorIndex:
|
|
66
|
+
"""Chroma-first index with in-memory keyword fallback."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, repo_root: Path):
|
|
69
|
+
self.repo_root = repo_root
|
|
70
|
+
self.index_dir = repo_root / ".mnemo" / "index"
|
|
71
|
+
self.index_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
self._fallback = KeywordEmbeddingProvider()
|
|
73
|
+
self._memory_store: dict[str, list[_MemoryRecord]] = {}
|
|
74
|
+
self._chroma_collection_cache: dict[str, Any] = {}
|
|
75
|
+
self._chroma_client: Any = None
|
|
76
|
+
self._chroma_ready = False
|
|
77
|
+
self._init_chroma()
|
|
78
|
+
|
|
79
|
+
def _init_chroma(self) -> None:
|
|
80
|
+
try:
|
|
81
|
+
import chromadb
|
|
82
|
+
except ImportError:
|
|
83
|
+
if not _auto_install_chromadb():
|
|
84
|
+
self._chroma_ready = False
|
|
85
|
+
return
|
|
86
|
+
try:
|
|
87
|
+
import chromadb
|
|
88
|
+
except ImportError:
|
|
89
|
+
self._chroma_ready = False
|
|
90
|
+
return
|
|
91
|
+
except Exception:
|
|
92
|
+
self._chroma_ready = False
|
|
93
|
+
return
|
|
94
|
+
try:
|
|
95
|
+
self._chroma_client = chromadb.PersistentClient(path=str(self.index_dir / "chroma"))
|
|
96
|
+
self._chroma_ready = True
|
|
97
|
+
except Exception:
|
|
98
|
+
self._chroma_ready = False
|
|
99
|
+
|
|
100
|
+
def _collection(self, namespace: str):
|
|
101
|
+
if not self._chroma_ready or self._chroma_client is None:
|
|
102
|
+
return None
|
|
103
|
+
if namespace not in self._chroma_collection_cache:
|
|
104
|
+
self._chroma_collection_cache[namespace] = self._chroma_client.get_or_create_collection(
|
|
105
|
+
name=namespace
|
|
106
|
+
)
|
|
107
|
+
return self._chroma_collection_cache[namespace]
|
|
108
|
+
|
|
109
|
+
def available(self) -> bool:
|
|
110
|
+
return self._chroma_ready
|
|
111
|
+
|
|
112
|
+
def upsert(self, namespace: str, chunks: list[Chunk]) -> None:
|
|
113
|
+
self._memory_store[namespace] = [
|
|
114
|
+
_MemoryRecord(
|
|
115
|
+
id=chunk.id,
|
|
116
|
+
text=f"{chunk.path} {chunk.symbol}\n{chunk.content}",
|
|
117
|
+
metadata={**chunk.metadata, "path": chunk.path, "symbol": chunk.symbol, "chunk_type": chunk.chunk_type},
|
|
118
|
+
)
|
|
119
|
+
for chunk in chunks
|
|
120
|
+
]
|
|
121
|
+
collection = self._collection(namespace)
|
|
122
|
+
if not collection:
|
|
123
|
+
return
|
|
124
|
+
if chunks:
|
|
125
|
+
collection.upsert(
|
|
126
|
+
ids=[chunk.id for chunk in chunks],
|
|
127
|
+
documents=[f"{chunk.path} {chunk.symbol}\n{chunk.content}" for chunk in chunks],
|
|
128
|
+
metadatas=[
|
|
129
|
+
{
|
|
130
|
+
"path": chunk.path,
|
|
131
|
+
"language": chunk.language,
|
|
132
|
+
"symbol": chunk.symbol,
|
|
133
|
+
"chunk_type": chunk.chunk_type,
|
|
134
|
+
**chunk.metadata,
|
|
135
|
+
}
|
|
136
|
+
for chunk in chunks
|
|
137
|
+
],
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _query_fallback(self, namespace: str, query: str, limit: int, filters: dict[str, Any] | None) -> list[dict[str, Any]]:
|
|
141
|
+
query_emb = self._fallback.embed(query)
|
|
142
|
+
records = self._memory_store.get(namespace, [])
|
|
143
|
+
scored: list[tuple[float, _MemoryRecord]] = []
|
|
144
|
+
for record in records:
|
|
145
|
+
if filters and any(record.metadata.get(k) != v for k, v in filters.items()):
|
|
146
|
+
continue
|
|
147
|
+
score = query_emb.score(self._fallback.embed(record.text))
|
|
148
|
+
if score > 0:
|
|
149
|
+
scored.append((score, record))
|
|
150
|
+
scored.sort(key=lambda item: item[0], reverse=True)
|
|
151
|
+
return [
|
|
152
|
+
{"id": record.id, "score": score, "content": record.text, "metadata": record.metadata}
|
|
153
|
+
for score, record in scored[:limit]
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
def query(self, namespace: str, query: str, limit: int = 10, filters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
|
|
157
|
+
collection = self._collection(namespace)
|
|
158
|
+
if not collection:
|
|
159
|
+
return self._query_fallback(namespace, query, limit, filters)
|
|
160
|
+
try:
|
|
161
|
+
where = filters or None
|
|
162
|
+
result = collection.query(query_texts=[query], n_results=limit, where=where)
|
|
163
|
+
ids = result.get("ids", [[]])[0]
|
|
164
|
+
docs = result.get("documents", [[]])[0]
|
|
165
|
+
distances = result.get("distances", [[]])[0] if result.get("distances") else [0.0] * len(ids)
|
|
166
|
+
metas = result.get("metadatas", [[]])[0] if result.get("metadatas") else [{} for _ in ids]
|
|
167
|
+
return [
|
|
168
|
+
{"id": doc_id, "score": 1.0 - float(dist), "content": doc, "metadata": meta}
|
|
169
|
+
for doc_id, doc, dist, meta in zip(ids, docs, distances, metas)
|
|
170
|
+
]
|
|
171
|
+
except Exception:
|
|
172
|
+
return self._query_fallback(namespace, query, limit, filters)
|
|
173
|
+
|
|
174
|
+
def clear(self, namespace: str | None = None) -> None:
|
|
175
|
+
if namespace is None:
|
|
176
|
+
self._memory_store.clear()
|
|
177
|
+
self._chroma_collection_cache.clear()
|
|
178
|
+
return
|
|
179
|
+
self._memory_store.pop(namespace, None)
|
|
180
|
+
self._chroma_collection_cache.pop(namespace, None)
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Multi-repo workspace — link, discover, and query across repositories."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..config import mnemo_path
|
|
10
|
+
|
|
11
|
+
LINKS_FILE = "links.json"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _links_path(repo_root: Path) -> Path:
|
|
15
|
+
return mnemo_path(repo_root) / LINKS_FILE
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_linked_repos(repo_root: Path) -> list[Path]:
|
|
19
|
+
"""Return resolved paths of all linked repos that exist and are initialized."""
|
|
20
|
+
path = _links_path(repo_root)
|
|
21
|
+
if not path.exists():
|
|
22
|
+
return []
|
|
23
|
+
try:
|
|
24
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
25
|
+
except (json.JSONDecodeError, OSError):
|
|
26
|
+
return []
|
|
27
|
+
linked: list[Path] = []
|
|
28
|
+
for entry in data:
|
|
29
|
+
repo_path = Path(entry["path"]).expanduser().resolve()
|
|
30
|
+
if repo_path != repo_root.resolve() and (repo_path / ".mnemo").exists():
|
|
31
|
+
linked.append(repo_path)
|
|
32
|
+
return linked
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _save_links(repo_root: Path, links: list[dict[str, str]]) -> None:
|
|
36
|
+
path = _links_path(repo_root)
|
|
37
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
path.write_text(json.dumps(links, indent=2), encoding="utf-8")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def link_repo(repo_root: Path, target: Path) -> str:
|
|
42
|
+
"""Link a sibling repo for cross-repo queries."""
|
|
43
|
+
target = target.expanduser().resolve()
|
|
44
|
+
if not target.exists():
|
|
45
|
+
return f"Path does not exist: {target}"
|
|
46
|
+
if not (target / ".git").exists() and not (target / ".mnemo").exists():
|
|
47
|
+
return f"Not a repo: {target} (no .git or .mnemo found)"
|
|
48
|
+
|
|
49
|
+
existing = get_linked_repos(repo_root)
|
|
50
|
+
if target in [r.resolve() for r in existing]:
|
|
51
|
+
return f"Already linked: {target.name}"
|
|
52
|
+
|
|
53
|
+
# Load raw data to append
|
|
54
|
+
path = _links_path(repo_root)
|
|
55
|
+
data: list[dict[str, str]] = []
|
|
56
|
+
if path.exists():
|
|
57
|
+
try:
|
|
58
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
59
|
+
except (json.JSONDecodeError, OSError):
|
|
60
|
+
data = []
|
|
61
|
+
|
|
62
|
+
data.append({"name": target.name, "path": str(target)})
|
|
63
|
+
_save_links(repo_root, data)
|
|
64
|
+
|
|
65
|
+
initialized = (target / ".mnemo").exists()
|
|
66
|
+
status = "✓ indexed" if initialized else "⚠ needs `mnemo init`"
|
|
67
|
+
return f"Linked: {target.name} ({status})"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def unlink_repo(repo_root: Path, name: str) -> str:
|
|
71
|
+
"""Remove a linked repo by name or path."""
|
|
72
|
+
path = _links_path(repo_root)
|
|
73
|
+
if not path.exists():
|
|
74
|
+
return "No linked repos."
|
|
75
|
+
try:
|
|
76
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
77
|
+
except (json.JSONDecodeError, OSError):
|
|
78
|
+
return "No linked repos."
|
|
79
|
+
|
|
80
|
+
name_lower = name.lower()
|
|
81
|
+
filtered = [e for e in data if e.get("name", "").lower() != name_lower and name_lower not in e.get("path", "").lower()]
|
|
82
|
+
if len(filtered) == len(data):
|
|
83
|
+
return f"No linked repo matching '{name}'."
|
|
84
|
+
_save_links(repo_root, filtered)
|
|
85
|
+
return f"Unlinked: {name}"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def discover_repos(repo_root: Path, search_dir: Path, auto_init: bool = False) -> str:
|
|
89
|
+
"""Auto-discover and link all repos under a directory. Optionally init uninitialized ones."""
|
|
90
|
+
search_dir = search_dir.expanduser().resolve()
|
|
91
|
+
if not search_dir.exists():
|
|
92
|
+
return f"Directory not found: {search_dir}"
|
|
93
|
+
|
|
94
|
+
found: list[Path] = []
|
|
95
|
+
for child in search_dir.iterdir():
|
|
96
|
+
if not child.is_dir():
|
|
97
|
+
continue
|
|
98
|
+
if child.resolve() == repo_root.resolve():
|
|
99
|
+
continue
|
|
100
|
+
if (child / ".git").exists():
|
|
101
|
+
found.append(child)
|
|
102
|
+
|
|
103
|
+
if not found:
|
|
104
|
+
return f"No git repos found under {search_dir}"
|
|
105
|
+
|
|
106
|
+
results = []
|
|
107
|
+
for repo in sorted(found):
|
|
108
|
+
result = link_repo(repo_root, repo)
|
|
109
|
+
results.append(result)
|
|
110
|
+
|
|
111
|
+
# Auto-init if requested and not yet initialized
|
|
112
|
+
if auto_init and not (repo / ".mnemo").exists():
|
|
113
|
+
try:
|
|
114
|
+
from ..init import init
|
|
115
|
+
import sys
|
|
116
|
+
sys.stderr.write(f" Initializing {repo.name}...\n")
|
|
117
|
+
init(repo)
|
|
118
|
+
results.append(f" ✓ Initialized {repo.name}")
|
|
119
|
+
except Exception as e:
|
|
120
|
+
results.append(f" ✗ Failed to init {repo.name}: {e}")
|
|
121
|
+
|
|
122
|
+
return "\n".join(results)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def format_links(repo_root: Path) -> str:
|
|
126
|
+
"""Show all linked repos with status."""
|
|
127
|
+
path = _links_path(repo_root)
|
|
128
|
+
if not path.exists():
|
|
129
|
+
return "No linked repos. Use `mnemo link <path>` or `mnemo link --discover <dir>`."
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
133
|
+
except (json.JSONDecodeError, OSError):
|
|
134
|
+
return "No linked repos."
|
|
135
|
+
|
|
136
|
+
if not data:
|
|
137
|
+
return "No linked repos."
|
|
138
|
+
|
|
139
|
+
lines = [f"# Linked Repos ({len(data)})\n"]
|
|
140
|
+
for entry in data:
|
|
141
|
+
name = entry.get("name", "unknown")
|
|
142
|
+
repo_path = Path(entry.get("path", ""))
|
|
143
|
+
exists = repo_path.exists()
|
|
144
|
+
initialized = (repo_path / ".mnemo").exists() if exists else False
|
|
145
|
+
|
|
146
|
+
if not exists:
|
|
147
|
+
status = "✗ path not found"
|
|
148
|
+
elif initialized:
|
|
149
|
+
status = "✓ indexed"
|
|
150
|
+
else:
|
|
151
|
+
status = "⚠ needs `mnemo init`"
|
|
152
|
+
|
|
153
|
+
lines.append(f"- **{name}** {repo_path} {status}")
|
|
154
|
+
|
|
155
|
+
return "\n".join(lines)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def cross_repo_semantic_query(
|
|
159
|
+
repo_root: Path, namespace: str, query: str, limit: int = 10
|
|
160
|
+
) -> list[dict[str, Any]]:
|
|
161
|
+
"""Query this repo + all linked repos, merge and rank results."""
|
|
162
|
+
from ..retrieval import semantic_query
|
|
163
|
+
|
|
164
|
+
# Query local repo first
|
|
165
|
+
results = semantic_query(repo_root, namespace, query, limit=limit)
|
|
166
|
+
for r in results:
|
|
167
|
+
r["repo"] = repo_root.name
|
|
168
|
+
|
|
169
|
+
# Query linked repos
|
|
170
|
+
for linked in get_linked_repos(repo_root):
|
|
171
|
+
try:
|
|
172
|
+
linked_results = semantic_query(linked, namespace, query, limit=limit // 2)
|
|
173
|
+
for r in linked_results:
|
|
174
|
+
r["repo"] = linked.name
|
|
175
|
+
results.extend(linked_results)
|
|
176
|
+
except Exception:
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
# Sort by score descending, deduplicate by id
|
|
180
|
+
seen: set[str] = set()
|
|
181
|
+
unique: list[dict[str, Any]] = []
|
|
182
|
+
for r in sorted(results, key=lambda x: x.get("score", 0), reverse=True):
|
|
183
|
+
rid = r.get("id", "")
|
|
184
|
+
if rid not in seen:
|
|
185
|
+
seen.add(rid)
|
|
186
|
+
unique.append(r)
|
|
187
|
+
|
|
188
|
+
return unique[:limit]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def cross_repo_impact(repo_root: Path, query: str) -> str:
|
|
192
|
+
"""Check all linked repos for code that depends on the queried service/file."""
|
|
193
|
+
from ..retrieval import semantic_query
|
|
194
|
+
|
|
195
|
+
lines = [f"# Cross-Repo Impact: '{query}'\n"]
|
|
196
|
+
lines.append("## This Repo")
|
|
197
|
+
|
|
198
|
+
local_hits = semantic_query(repo_root, "code", query, limit=5)
|
|
199
|
+
if local_hits:
|
|
200
|
+
for hit in local_hits:
|
|
201
|
+
meta = hit.get("metadata", {})
|
|
202
|
+
lines.append(f"- `{meta.get('path', '')}` :: `{meta.get('symbol', '')}`")
|
|
203
|
+
else:
|
|
204
|
+
lines.append("- No matches in this repo")
|
|
205
|
+
|
|
206
|
+
linked = get_linked_repos(repo_root)
|
|
207
|
+
if not linked:
|
|
208
|
+
lines.append("\nNo linked repos. Use `mnemo link` to enable cross-repo impact analysis.")
|
|
209
|
+
return "\n".join(lines)
|
|
210
|
+
|
|
211
|
+
for linked_repo in linked:
|
|
212
|
+
lines.append(f"\n## {linked_repo.name}")
|
|
213
|
+
try:
|
|
214
|
+
hits = semantic_query(linked_repo, "code", query, limit=5)
|
|
215
|
+
if hits:
|
|
216
|
+
for hit in hits:
|
|
217
|
+
meta = hit.get("metadata", {})
|
|
218
|
+
lines.append(f"- `{meta.get('path', '')}` :: `{meta.get('symbol', '')}`")
|
|
219
|
+
else:
|
|
220
|
+
lines.append("- No matches")
|
|
221
|
+
except Exception:
|
|
222
|
+
lines.append("- ⚠ Could not query (run `mnemo init` in that repo)")
|
|
223
|
+
|
|
224
|
+
return "\n".join(lines)
|