mnemo-dev 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,180 @@
1
+ """Local vector index with optional ChromaDB backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import subprocess
7
+ import sys
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any, Protocol
11
+
12
+ from ..chunking import Chunk
13
+ from ..embeddings import KeywordEmbeddingProvider
14
+
15
+
16
+ _CHROMA_INSTALL_ATTEMPTED = False
17
+
18
+
19
+ def _auto_install_chromadb() -> bool:
20
+ """Attempt to install chromadb automatically. Only tries once per process."""
21
+ global _CHROMA_INSTALL_ATTEMPTED
22
+ if _CHROMA_INSTALL_ATTEMPTED:
23
+ return False
24
+ _CHROMA_INSTALL_ATTEMPTED = True
25
+
26
+ if getattr(sys, "frozen", False):
27
+ return False
28
+
29
+ # Only auto-install if triggered by `mnemo init`, not on every recall
30
+ if os.environ.get("MNEMO_AUTO_INSTALL") != "1":
31
+ return False
32
+
33
+ try:
34
+ subprocess.check_call(
35
+ [sys.executable, "-m", "pip", "install", "chromadb>=0.5", "--quiet"],
36
+ stdout=subprocess.DEVNULL,
37
+ stderr=subprocess.DEVNULL,
38
+ )
39
+ return True
40
+ except Exception:
41
+ return False
42
+
43
+
44
+ class VectorIndex(Protocol):
45
+ def available(self) -> bool:
46
+ ...
47
+
48
+ def upsert(self, namespace: str, chunks: list[Chunk]) -> None:
49
+ ...
50
+
51
+ def query(self, namespace: str, query: str, limit: int = 10, filters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
52
+ ...
53
+
54
+ def clear(self, namespace: str | None = None) -> None:
55
+ ...
56
+
57
+
58
+ @dataclass
59
+ class _MemoryRecord:
60
+ id: str
61
+ text: str
62
+ metadata: dict[str, Any]
63
+
64
+
65
+ class LocalVectorIndex:
66
+ """Chroma-first index with in-memory keyword fallback."""
67
+
68
+ def __init__(self, repo_root: Path):
69
+ self.repo_root = repo_root
70
+ self.index_dir = repo_root / ".mnemo" / "index"
71
+ self.index_dir.mkdir(parents=True, exist_ok=True)
72
+ self._fallback = KeywordEmbeddingProvider()
73
+ self._memory_store: dict[str, list[_MemoryRecord]] = {}
74
+ self._chroma_collection_cache: dict[str, Any] = {}
75
+ self._chroma_client: Any = None
76
+ self._chroma_ready = False
77
+ self._init_chroma()
78
+
79
+ def _init_chroma(self) -> None:
80
+ try:
81
+ import chromadb
82
+ except ImportError:
83
+ if not _auto_install_chromadb():
84
+ self._chroma_ready = False
85
+ return
86
+ try:
87
+ import chromadb
88
+ except ImportError:
89
+ self._chroma_ready = False
90
+ return
91
+ except Exception:
92
+ self._chroma_ready = False
93
+ return
94
+ try:
95
+ self._chroma_client = chromadb.PersistentClient(path=str(self.index_dir / "chroma"))
96
+ self._chroma_ready = True
97
+ except Exception:
98
+ self._chroma_ready = False
99
+
100
+ def _collection(self, namespace: str):
101
+ if not self._chroma_ready or self._chroma_client is None:
102
+ return None
103
+ if namespace not in self._chroma_collection_cache:
104
+ self._chroma_collection_cache[namespace] = self._chroma_client.get_or_create_collection(
105
+ name=namespace
106
+ )
107
+ return self._chroma_collection_cache[namespace]
108
+
109
+ def available(self) -> bool:
110
+ return self._chroma_ready
111
+
112
+ def upsert(self, namespace: str, chunks: list[Chunk]) -> None:
113
+ self._memory_store[namespace] = [
114
+ _MemoryRecord(
115
+ id=chunk.id,
116
+ text=f"{chunk.path} {chunk.symbol}\n{chunk.content}",
117
+ metadata={**chunk.metadata, "path": chunk.path, "symbol": chunk.symbol, "chunk_type": chunk.chunk_type},
118
+ )
119
+ for chunk in chunks
120
+ ]
121
+ collection = self._collection(namespace)
122
+ if not collection:
123
+ return
124
+ if chunks:
125
+ collection.upsert(
126
+ ids=[chunk.id for chunk in chunks],
127
+ documents=[f"{chunk.path} {chunk.symbol}\n{chunk.content}" for chunk in chunks],
128
+ metadatas=[
129
+ {
130
+ "path": chunk.path,
131
+ "language": chunk.language,
132
+ "symbol": chunk.symbol,
133
+ "chunk_type": chunk.chunk_type,
134
+ **chunk.metadata,
135
+ }
136
+ for chunk in chunks
137
+ ],
138
+ )
139
+
140
+ def _query_fallback(self, namespace: str, query: str, limit: int, filters: dict[str, Any] | None) -> list[dict[str, Any]]:
141
+ query_emb = self._fallback.embed(query)
142
+ records = self._memory_store.get(namespace, [])
143
+ scored: list[tuple[float, _MemoryRecord]] = []
144
+ for record in records:
145
+ if filters and any(record.metadata.get(k) != v for k, v in filters.items()):
146
+ continue
147
+ score = query_emb.score(self._fallback.embed(record.text))
148
+ if score > 0:
149
+ scored.append((score, record))
150
+ scored.sort(key=lambda item: item[0], reverse=True)
151
+ return [
152
+ {"id": record.id, "score": score, "content": record.text, "metadata": record.metadata}
153
+ for score, record in scored[:limit]
154
+ ]
155
+
156
+ def query(self, namespace: str, query: str, limit: int = 10, filters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
157
+ collection = self._collection(namespace)
158
+ if not collection:
159
+ return self._query_fallback(namespace, query, limit, filters)
160
+ try:
161
+ where = filters or None
162
+ result = collection.query(query_texts=[query], n_results=limit, where=where)
163
+ ids = result.get("ids", [[]])[0]
164
+ docs = result.get("documents", [[]])[0]
165
+ distances = result.get("distances", [[]])[0] if result.get("distances") else [0.0] * len(ids)
166
+ metas = result.get("metadatas", [[]])[0] if result.get("metadatas") else [{} for _ in ids]
167
+ return [
168
+ {"id": doc_id, "score": 1.0 - float(dist), "content": doc, "metadata": meta}
169
+ for doc_id, doc, dist, meta in zip(ids, docs, distances, metas)
170
+ ]
171
+ except Exception:
172
+ return self._query_fallback(namespace, query, limit, filters)
173
+
174
+ def clear(self, namespace: str | None = None) -> None:
175
+ if namespace is None:
176
+ self._memory_store.clear()
177
+ self._chroma_collection_cache.clear()
178
+ return
179
+ self._memory_store.pop(namespace, None)
180
+ self._chroma_collection_cache.pop(namespace, None)
@@ -0,0 +1,224 @@
1
+ """Multi-repo workspace — link, discover, and query across repositories."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from ..config import mnemo_path
10
+
11
+ LINKS_FILE = "links.json"
12
+
13
+
14
+ def _links_path(repo_root: Path) -> Path:
15
+ return mnemo_path(repo_root) / LINKS_FILE
16
+
17
+
18
+ def get_linked_repos(repo_root: Path) -> list[Path]:
19
+ """Return resolved paths of all linked repos that exist and are initialized."""
20
+ path = _links_path(repo_root)
21
+ if not path.exists():
22
+ return []
23
+ try:
24
+ data = json.loads(path.read_text(encoding="utf-8"))
25
+ except (json.JSONDecodeError, OSError):
26
+ return []
27
+ linked: list[Path] = []
28
+ for entry in data:
29
+ repo_path = Path(entry["path"]).expanduser().resolve()
30
+ if repo_path != repo_root.resolve() and (repo_path / ".mnemo").exists():
31
+ linked.append(repo_path)
32
+ return linked
33
+
34
+
35
+ def _save_links(repo_root: Path, links: list[dict[str, str]]) -> None:
36
+ path = _links_path(repo_root)
37
+ path.parent.mkdir(parents=True, exist_ok=True)
38
+ path.write_text(json.dumps(links, indent=2), encoding="utf-8")
39
+
40
+
41
+ def link_repo(repo_root: Path, target: Path) -> str:
42
+ """Link a sibling repo for cross-repo queries."""
43
+ target = target.expanduser().resolve()
44
+ if not target.exists():
45
+ return f"Path does not exist: {target}"
46
+ if not (target / ".git").exists() and not (target / ".mnemo").exists():
47
+ return f"Not a repo: {target} (no .git or .mnemo found)"
48
+
49
+ existing = get_linked_repos(repo_root)
50
+ if target in [r.resolve() for r in existing]:
51
+ return f"Already linked: {target.name}"
52
+
53
+ # Load raw data to append
54
+ path = _links_path(repo_root)
55
+ data: list[dict[str, str]] = []
56
+ if path.exists():
57
+ try:
58
+ data = json.loads(path.read_text(encoding="utf-8"))
59
+ except (json.JSONDecodeError, OSError):
60
+ data = []
61
+
62
+ data.append({"name": target.name, "path": str(target)})
63
+ _save_links(repo_root, data)
64
+
65
+ initialized = (target / ".mnemo").exists()
66
+ status = "✓ indexed" if initialized else "⚠ needs `mnemo init`"
67
+ return f"Linked: {target.name} ({status})"
68
+
69
+
70
+ def unlink_repo(repo_root: Path, name: str) -> str:
71
+ """Remove a linked repo by name or path."""
72
+ path = _links_path(repo_root)
73
+ if not path.exists():
74
+ return "No linked repos."
75
+ try:
76
+ data = json.loads(path.read_text(encoding="utf-8"))
77
+ except (json.JSONDecodeError, OSError):
78
+ return "No linked repos."
79
+
80
+ name_lower = name.lower()
81
+ filtered = [e for e in data if e.get("name", "").lower() != name_lower and name_lower not in e.get("path", "").lower()]
82
+ if len(filtered) == len(data):
83
+ return f"No linked repo matching '{name}'."
84
+ _save_links(repo_root, filtered)
85
+ return f"Unlinked: {name}"
86
+
87
+
88
+ def discover_repos(repo_root: Path, search_dir: Path, auto_init: bool = False) -> str:
89
+ """Auto-discover and link all repos under a directory. Optionally init uninitialized ones."""
90
+ search_dir = search_dir.expanduser().resolve()
91
+ if not search_dir.exists():
92
+ return f"Directory not found: {search_dir}"
93
+
94
+ found: list[Path] = []
95
+ for child in search_dir.iterdir():
96
+ if not child.is_dir():
97
+ continue
98
+ if child.resolve() == repo_root.resolve():
99
+ continue
100
+ if (child / ".git").exists():
101
+ found.append(child)
102
+
103
+ if not found:
104
+ return f"No git repos found under {search_dir}"
105
+
106
+ results = []
107
+ for repo in sorted(found):
108
+ result = link_repo(repo_root, repo)
109
+ results.append(result)
110
+
111
+ # Auto-init if requested and not yet initialized
112
+ if auto_init and not (repo / ".mnemo").exists():
113
+ try:
114
+ from ..init import init
115
+ import sys
116
+ sys.stderr.write(f" Initializing {repo.name}...\n")
117
+ init(repo)
118
+ results.append(f" ✓ Initialized {repo.name}")
119
+ except Exception as e:
120
+ results.append(f" ✗ Failed to init {repo.name}: {e}")
121
+
122
+ return "\n".join(results)
123
+
124
+
125
+ def format_links(repo_root: Path) -> str:
126
+ """Show all linked repos with status."""
127
+ path = _links_path(repo_root)
128
+ if not path.exists():
129
+ return "No linked repos. Use `mnemo link <path>` or `mnemo link --discover <dir>`."
130
+
131
+ try:
132
+ data = json.loads(path.read_text(encoding="utf-8"))
133
+ except (json.JSONDecodeError, OSError):
134
+ return "No linked repos."
135
+
136
+ if not data:
137
+ return "No linked repos."
138
+
139
+ lines = [f"# Linked Repos ({len(data)})\n"]
140
+ for entry in data:
141
+ name = entry.get("name", "unknown")
142
+ repo_path = Path(entry.get("path", ""))
143
+ exists = repo_path.exists()
144
+ initialized = (repo_path / ".mnemo").exists() if exists else False
145
+
146
+ if not exists:
147
+ status = "✗ path not found"
148
+ elif initialized:
149
+ status = "✓ indexed"
150
+ else:
151
+ status = "⚠ needs `mnemo init`"
152
+
153
+ lines.append(f"- **{name}** {repo_path} {status}")
154
+
155
+ return "\n".join(lines)
156
+
157
+
158
+ def cross_repo_semantic_query(
159
+ repo_root: Path, namespace: str, query: str, limit: int = 10
160
+ ) -> list[dict[str, Any]]:
161
+ """Query this repo + all linked repos, merge and rank results."""
162
+ from ..retrieval import semantic_query
163
+
164
+ # Query local repo first
165
+ results = semantic_query(repo_root, namespace, query, limit=limit)
166
+ for r in results:
167
+ r["repo"] = repo_root.name
168
+
169
+ # Query linked repos
170
+ for linked in get_linked_repos(repo_root):
171
+ try:
172
+ linked_results = semantic_query(linked, namespace, query, limit=limit // 2)
173
+ for r in linked_results:
174
+ r["repo"] = linked.name
175
+ results.extend(linked_results)
176
+ except Exception:
177
+ continue
178
+
179
+ # Sort by score descending, deduplicate by id
180
+ seen: set[str] = set()
181
+ unique: list[dict[str, Any]] = []
182
+ for r in sorted(results, key=lambda x: x.get("score", 0), reverse=True):
183
+ rid = r.get("id", "")
184
+ if rid not in seen:
185
+ seen.add(rid)
186
+ unique.append(r)
187
+
188
+ return unique[:limit]
189
+
190
+
191
+ def cross_repo_impact(repo_root: Path, query: str) -> str:
192
+ """Check all linked repos for code that depends on the queried service/file."""
193
+ from ..retrieval import semantic_query
194
+
195
+ lines = [f"# Cross-Repo Impact: '{query}'\n"]
196
+ lines.append("## This Repo")
197
+
198
+ local_hits = semantic_query(repo_root, "code", query, limit=5)
199
+ if local_hits:
200
+ for hit in local_hits:
201
+ meta = hit.get("metadata", {})
202
+ lines.append(f"- `{meta.get('path', '')}` :: `{meta.get('symbol', '')}`")
203
+ else:
204
+ lines.append("- No matches in this repo")
205
+
206
+ linked = get_linked_repos(repo_root)
207
+ if not linked:
208
+ lines.append("\nNo linked repos. Use `mnemo link` to enable cross-repo impact analysis.")
209
+ return "\n".join(lines)
210
+
211
+ for linked_repo in linked:
212
+ lines.append(f"\n## {linked_repo.name}")
213
+ try:
214
+ hits = semantic_query(linked_repo, "code", query, limit=5)
215
+ if hits:
216
+ for hit in hits:
217
+ meta = hit.get("metadata", {})
218
+ lines.append(f"- `{meta.get('path', '')}` :: `{meta.get('symbol', '')}`")
219
+ else:
220
+ lines.append("- No matches")
221
+ except Exception:
222
+ lines.append("- ⚠ Could not query (run `mnemo init` in that repo)")
223
+
224
+ return "\n".join(lines)