mnemo-dev 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mnemo/__init__.py +3 -0
- mnemo/analyzers/__init__.py +108 -0
- mnemo/api_discovery/__init__.py +248 -0
- mnemo/chunking.py +136 -0
- mnemo/cli.py +186 -0
- mnemo/clients.py +147 -0
- mnemo/code_review/__init__.py +68 -0
- mnemo/config.py +30 -0
- mnemo/dependency_graph/__init__.py +126 -0
- mnemo/doctor.py +118 -0
- mnemo/embeddings/__init__.py +47 -0
- mnemo/errors/__init__.py +81 -0
- mnemo/health/__init__.py +103 -0
- mnemo/incidents/__init__.py +90 -0
- mnemo/init.py +167 -0
- mnemo/intelligence/__init__.py +323 -0
- mnemo/knowledge/__init__.py +118 -0
- mnemo/mcp_server.py +458 -0
- mnemo/memory.py +250 -0
- mnemo/onboarding/__init__.py +86 -0
- mnemo/repo_map.py +357 -0
- mnemo/retrieval.py +31 -0
- mnemo/sprint/__init__.py +102 -0
- mnemo/storage.py +215 -0
- mnemo/team_graph/__init__.py +96 -0
- mnemo/test_intel/__init__.py +111 -0
- mnemo/vector_index/__init__.py +180 -0
- mnemo/workspace/__init__.py +224 -0
- mnemo_dev-0.1.0.dist-info/METADATA +644 -0
- mnemo_dev-0.1.0.dist-info/RECORD +33 -0
- mnemo_dev-0.1.0.dist-info/WHEEL +5 -0
- mnemo_dev-0.1.0.dist-info/entry_points.txt +3 -0
- mnemo_dev-0.1.0.dist-info/top_level.txt +1 -0
mnemo/memory.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""Persistent memory store for decisions, context, and notes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from .config import CONTEXT_FILE, DECISIONS_FILE, MEMORY_FILE, mnemo_path
|
|
11
|
+
from .retrieval import semantic_query
|
|
12
|
+
from .storage import Collections, get_storage
|
|
13
|
+
|
|
14
|
+
MAX_OUTPUT_CHARS = 75000 # Hard limit for MCP response
|
|
15
|
+
RECALL_BUDGET = 12000 # Target: keep recall small (~3K tokens)
|
|
16
|
+
MAX_MEMORY_ENTRIES = 50 # Keep last 50 entries, summarize older ones
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _refresh_rule(repo_root: Path) -> None:
|
|
20
|
+
"""Update installed client context files with latest context."""
|
|
21
|
+
from .init import refresh_context_files
|
|
22
|
+
|
|
23
|
+
refresh_context_files(repo_root)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _as_list(data: Any) -> list[dict[str, Any]]:
|
|
27
|
+
return data if isinstance(data, list) else []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _as_dict(data: Any) -> dict[str, Any]:
|
|
31
|
+
return data if isinstance(data, dict) else {}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _compact_memory(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
35
|
+
"""When memory exceeds limit, compress old entries into a summary."""
|
|
36
|
+
if len(entries) <= MAX_MEMORY_ENTRIES:
|
|
37
|
+
return entries
|
|
38
|
+
|
|
39
|
+
keep = entries[-30:]
|
|
40
|
+
old = entries[:-30]
|
|
41
|
+
summary_lines = [entry["content"] for entry in old[-20:] if entry.get("content")]
|
|
42
|
+
summary = "Previous context: " + "; ".join(summary_lines)
|
|
43
|
+
|
|
44
|
+
summary_entry = {
|
|
45
|
+
"id": 0,
|
|
46
|
+
"timestamp": old[-1]["timestamp"],
|
|
47
|
+
"category": "summary",
|
|
48
|
+
"content": summary[:500],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return [summary_entry] + keep
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def add_memory(repo_root: Path, content: str, category: str = "general") -> dict[str, Any]:
|
|
55
|
+
"""Add a memory entry and refresh installed context files."""
|
|
56
|
+
storage = get_storage(repo_root)
|
|
57
|
+
entries = _as_list(storage.read_collection(Collections.MEMORY))
|
|
58
|
+
entry = {
|
|
59
|
+
"id": len(entries) + 1,
|
|
60
|
+
"timestamp": time.time(),
|
|
61
|
+
"category": category,
|
|
62
|
+
"content": content,
|
|
63
|
+
}
|
|
64
|
+
entries.append(entry)
|
|
65
|
+
storage.write_collection(Collections.MEMORY, _compact_memory(entries))
|
|
66
|
+
_refresh_rule(repo_root)
|
|
67
|
+
return entry
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def add_decision(repo_root: Path, decision: str, reasoning: str = "") -> dict[str, Any]:
|
|
71
|
+
"""Record a decision and refresh installed context files."""
|
|
72
|
+
storage = get_storage(repo_root)
|
|
73
|
+
entries = _as_list(storage.read_collection(Collections.DECISIONS))
|
|
74
|
+
entry = {
|
|
75
|
+
"id": len(entries) + 1,
|
|
76
|
+
"timestamp": time.time(),
|
|
77
|
+
"decision": decision,
|
|
78
|
+
"reasoning": reasoning,
|
|
79
|
+
}
|
|
80
|
+
entries.append(entry)
|
|
81
|
+
storage.write_collection(Collections.DECISIONS, entries)
|
|
82
|
+
_refresh_rule(repo_root)
|
|
83
|
+
return entry
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def save_context(repo_root: Path, context: dict[str, Any]) -> None:
|
|
87
|
+
"""Save project context and refresh installed context files."""
|
|
88
|
+
storage = get_storage(repo_root)
|
|
89
|
+
existing = _as_dict(storage.read_collection(Collections.CONTEXT))
|
|
90
|
+
existing.update(context)
|
|
91
|
+
existing["last_updated"] = time.time()
|
|
92
|
+
storage.write_collection(Collections.CONTEXT, existing)
|
|
93
|
+
_refresh_rule(repo_root)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def lookup(repo_root: Path, query: str) -> str:
|
|
97
|
+
"""Look up detailed info for a specific file or folder - parses on demand."""
|
|
98
|
+
from .repo_map import MAX_FILE_SIZE, SUPPORTED_EXTENSIONS, _extract_file, _should_ignore
|
|
99
|
+
from .chunking import make_code_chunks
|
|
100
|
+
from .retrieval import index_chunks
|
|
101
|
+
|
|
102
|
+
query_lower = query.lower().strip("/")
|
|
103
|
+
matches: list[tuple[str, dict[str, Any]]] = []
|
|
104
|
+
discovered_chunks = []
|
|
105
|
+
|
|
106
|
+
for ext, language in SUPPORTED_EXTENSIONS.items():
|
|
107
|
+
for filepath in repo_root.rglob(f"*{ext}"):
|
|
108
|
+
if _should_ignore(filepath) or filepath.stat().st_size > MAX_FILE_SIZE:
|
|
109
|
+
continue
|
|
110
|
+
rel = str(filepath.relative_to(repo_root))
|
|
111
|
+
if query_lower not in rel.lower():
|
|
112
|
+
continue
|
|
113
|
+
try:
|
|
114
|
+
source = filepath.read_bytes()
|
|
115
|
+
except (OSError, PermissionError):
|
|
116
|
+
continue
|
|
117
|
+
info = _extract_file(source, language)
|
|
118
|
+
if info:
|
|
119
|
+
matches.append((rel, info))
|
|
120
|
+
discovered_chunks.extend(make_code_chunks(rel, language, info))
|
|
121
|
+
|
|
122
|
+
# Feed discovered code into the index for future queries
|
|
123
|
+
if discovered_chunks:
|
|
124
|
+
try:
|
|
125
|
+
index_chunks(repo_root, "code", discovered_chunks)
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
if not matches:
|
|
130
|
+
return f"No files matching '{query}' found."
|
|
131
|
+
|
|
132
|
+
lines = [f"# Details for '{query}'\n"]
|
|
133
|
+
for filepath, info in sorted(matches):
|
|
134
|
+
lines.append(f"## {filepath}")
|
|
135
|
+
if info.get("imports"):
|
|
136
|
+
lines.append("**Imports:** " + ", ".join(info["imports"]))
|
|
137
|
+
for cls in info.get("classes", []):
|
|
138
|
+
impl = f" : {cls['implements']}" if cls.get("implements") else ""
|
|
139
|
+
lines.append(f"### `{cls['name']}{impl}`")
|
|
140
|
+
for method in cls.get("methods", []):
|
|
141
|
+
lines.append(f"- {method}")
|
|
142
|
+
for function in info.get("functions", []):
|
|
143
|
+
lines.append(f"- {function}")
|
|
144
|
+
lines.append("")
|
|
145
|
+
|
|
146
|
+
result = "\n".join(lines)
|
|
147
|
+
if len(result) > MAX_OUTPUT_CHARS:
|
|
148
|
+
result = result[:MAX_OUTPUT_CHARS]
|
|
149
|
+
last_nl = result.rfind("\n")
|
|
150
|
+
if last_nl > 0:
|
|
151
|
+
result = result[:last_nl]
|
|
152
|
+
result += "\n... (narrow your query for more details)"
|
|
153
|
+
return result
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def recall(repo_root: Path) -> str:
|
|
157
|
+
"""Recall project memory as a compact markdown document."""
|
|
158
|
+
from .repo_map import CHANGELOG_FILE, has_changes, save_repo_map
|
|
159
|
+
|
|
160
|
+
base = mnemo_path(repo_root)
|
|
161
|
+
if not base.exists():
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
if has_changes(repo_root):
|
|
165
|
+
save_repo_map(repo_root, index=False)
|
|
166
|
+
|
|
167
|
+
storage = get_storage(repo_root)
|
|
168
|
+
sections: list[str] = []
|
|
169
|
+
|
|
170
|
+
context = dict(_as_dict(storage.read_collection(Collections.CONTEXT)))
|
|
171
|
+
context.pop("last_updated", None)
|
|
172
|
+
if context:
|
|
173
|
+
sections.append("# Project Context")
|
|
174
|
+
for key, value in context.items():
|
|
175
|
+
sections.append(f"- **{key}**: {value}")
|
|
176
|
+
sections.append("")
|
|
177
|
+
|
|
178
|
+
decisions = _as_list(storage.read_collection(Collections.DECISIONS))
|
|
179
|
+
if decisions:
|
|
180
|
+
sections.append("# Decisions")
|
|
181
|
+
for decision in decisions:
|
|
182
|
+
reasoning = f" - {decision['reasoning']}" if decision.get("reasoning") else ""
|
|
183
|
+
sections.append(f"- {decision['decision']}{reasoning}")
|
|
184
|
+
sections.append("")
|
|
185
|
+
|
|
186
|
+
memory = _as_list(storage.read_collection(Collections.MEMORY))
|
|
187
|
+
if memory:
|
|
188
|
+
sections.append("# Memory")
|
|
189
|
+
for item in memory:
|
|
190
|
+
cat = f" [{item['category']}]" if item.get("category") != "general" else ""
|
|
191
|
+
sections.append(f"- {item['content']}{cat}")
|
|
192
|
+
sections.append("")
|
|
193
|
+
|
|
194
|
+
tasks = _as_list(storage.read_collection(Collections.TASKS))
|
|
195
|
+
active_tasks = [task for task in tasks if task.get("status") == "active"]
|
|
196
|
+
if active_tasks:
|
|
197
|
+
sections.append("# Active Task Context")
|
|
198
|
+
active = active_tasks[-1]
|
|
199
|
+
sections.append(f"- **{active.get('task_id', '')}**: {active.get('description', '')}")
|
|
200
|
+
task_query = " ".join(
|
|
201
|
+
filter(
|
|
202
|
+
None,
|
|
203
|
+
[
|
|
204
|
+
str(active.get("task_id", "")),
|
|
205
|
+
str(active.get("description", "")),
|
|
206
|
+
" ".join(active.get("files", [])),
|
|
207
|
+
str(active.get("notes", "")),
|
|
208
|
+
],
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
hits = semantic_query(repo_root, "code", task_query, limit=5)
|
|
212
|
+
for hit in hits:
|
|
213
|
+
meta = hit.get("metadata", {})
|
|
214
|
+
sections.append(f"- Relevant: `{meta.get('path', '')}` :: `{meta.get('symbol', '')}`")
|
|
215
|
+
sections.append("")
|
|
216
|
+
|
|
217
|
+
changelog_path = base / CHANGELOG_FILE
|
|
218
|
+
if changelog_path.exists():
|
|
219
|
+
changelog = json.loads(changelog_path.read_text(encoding="utf-8"))
|
|
220
|
+
if changelog:
|
|
221
|
+
sections.append("# Recent Changes")
|
|
222
|
+
for entry in changelog[-5:]:
|
|
223
|
+
if entry.get("added"):
|
|
224
|
+
sections.append(f"- Added: {', '.join(entry['added'])}")
|
|
225
|
+
if entry.get("modified"):
|
|
226
|
+
sections.append(f"- Modified: {', '.join(entry['modified'])}")
|
|
227
|
+
if entry.get("deleted"):
|
|
228
|
+
sections.append(f"- Deleted: {', '.join(entry['deleted'])}")
|
|
229
|
+
if entry.get("renamed"):
|
|
230
|
+
for new, old in entry["renamed"].items():
|
|
231
|
+
sections.append(f"- Renamed: {old} -> {new}")
|
|
232
|
+
sections.append("")
|
|
233
|
+
|
|
234
|
+
summary_path = base / "summary.md"
|
|
235
|
+
if summary_path.exists():
|
|
236
|
+
summary = summary_path.read_text(encoding="utf-8")
|
|
237
|
+
sections.append("# Repo Map")
|
|
238
|
+
sections.append("(use mnemo_lookup for method-level details)\n")
|
|
239
|
+
|
|
240
|
+
header_size = len("\n".join(sections))
|
|
241
|
+
budget = RECALL_BUDGET - header_size
|
|
242
|
+
if len(summary) > budget:
|
|
243
|
+
summary = summary[:budget]
|
|
244
|
+
last_nl = summary.rfind("\n")
|
|
245
|
+
if last_nl > 0:
|
|
246
|
+
summary = summary[:last_nl]
|
|
247
|
+
summary += "\n... (use mnemo_lookup for details)"
|
|
248
|
+
sections.append(summary)
|
|
249
|
+
|
|
250
|
+
return "\n".join(sections)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Onboarding Mode — generate a complete project overview for new developers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..config import mnemo_path
|
|
8
|
+
from ..intelligence import detect_patterns, detect_dependencies, detect_service_calls
|
|
9
|
+
from ..api_discovery import discover_apis
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def generate_onboarding(repo_root: Path) -> str:
|
|
13
|
+
"""Generate a comprehensive onboarding document for new team members."""
|
|
14
|
+
lines = ["# Project Onboarding Guide\n"]
|
|
15
|
+
|
|
16
|
+
# Project basics
|
|
17
|
+
lines.append("## Overview")
|
|
18
|
+
lines.append(f"Repository: `{repo_root.name}`")
|
|
19
|
+
|
|
20
|
+
# Detect services
|
|
21
|
+
services = set()
|
|
22
|
+
for f in repo_root.rglob("Program.cs"):
|
|
23
|
+
from ..config import IGNORE_DIRS
|
|
24
|
+
if not any(part in IGNORE_DIRS for part in f.relative_to(repo_root).parts):
|
|
25
|
+
services.add(f.relative_to(repo_root).parts[0])
|
|
26
|
+
if services:
|
|
27
|
+
lines.append(f"Services: {len(services)} microservices")
|
|
28
|
+
for svc in sorted(services):
|
|
29
|
+
lines.append(f"- **{svc}**")
|
|
30
|
+
lines.append("")
|
|
31
|
+
|
|
32
|
+
# Patterns
|
|
33
|
+
patterns = detect_patterns(repo_root)
|
|
34
|
+
if patterns:
|
|
35
|
+
lines.append("## Tech Stack & Patterns")
|
|
36
|
+
for p in patterns:
|
|
37
|
+
lines.append(f"- {p}")
|
|
38
|
+
lines.append("")
|
|
39
|
+
|
|
40
|
+
# Architecture
|
|
41
|
+
graph = detect_service_calls(repo_root)
|
|
42
|
+
if graph:
|
|
43
|
+
lines.append("## Service Architecture")
|
|
44
|
+
lines.append("How services communicate:")
|
|
45
|
+
for svc, targets in sorted(graph.items()):
|
|
46
|
+
lines.append(f"- **{svc}** calls → {', '.join(targets)}")
|
|
47
|
+
lines.append("")
|
|
48
|
+
|
|
49
|
+
# Key dependencies
|
|
50
|
+
deps = detect_dependencies(repo_root)
|
|
51
|
+
if deps:
|
|
52
|
+
lines.append("## Key Dependencies")
|
|
53
|
+
for project, pkgs in sorted(deps.items()):
|
|
54
|
+
key_pkgs = [p for p in pkgs if any(k in p for k in ["Cosmos", "Azure", "Identity", "Polly", "Swagger"])]
|
|
55
|
+
if key_pkgs:
|
|
56
|
+
lines.append(f"**{project}:** {', '.join(key_pkgs[:5])}")
|
|
57
|
+
lines.append("")
|
|
58
|
+
|
|
59
|
+
# API endpoints summary
|
|
60
|
+
lines.append("## API Endpoints")
|
|
61
|
+
lines.append("Use `mnemo_discover_apis` for full details. Key services:")
|
|
62
|
+
for svc in sorted(services):
|
|
63
|
+
lines.append(f"- {svc}")
|
|
64
|
+
lines.append("")
|
|
65
|
+
|
|
66
|
+
# Getting started
|
|
67
|
+
lines.append("## Getting Started")
|
|
68
|
+
lines.append("1. Clone the repo")
|
|
69
|
+
lines.append("2. Run `mnemo init` to set up project memory")
|
|
70
|
+
lines.append("3. Ask Amazon Q: \"What does this project do?\"")
|
|
71
|
+
lines.append("4. Ask: \"Show me the architecture\"")
|
|
72
|
+
lines.append("5. Ask: \"I need to add a new [feature], show me similar implementations\"")
|
|
73
|
+
lines.append("")
|
|
74
|
+
|
|
75
|
+
# Knowledge base
|
|
76
|
+
kb_path = mnemo_path(repo_root) / "knowledge"
|
|
77
|
+
if kb_path.exists():
|
|
78
|
+
kb_files = list(kb_path.rglob("*.md"))
|
|
79
|
+
if kb_files:
|
|
80
|
+
lines.append("## Team Knowledge")
|
|
81
|
+
lines.append("Check `.mnemo/knowledge/` for:")
|
|
82
|
+
for f in kb_files:
|
|
83
|
+
lines.append(f"- {f.name}")
|
|
84
|
+
lines.append("")
|
|
85
|
+
|
|
86
|
+
return "\n".join(lines)
|
mnemo/repo_map.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""Lightweight repo map — stores a hash index and pre-built markdown summary."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from .config import IGNORE_DIRS, SUPPORTED_EXTENSIONS, mnemo_path, REPO_MAP_FILE
|
|
11
|
+
from .chunking import make_code_chunks
|
|
12
|
+
from .retrieval import index_chunks
|
|
13
|
+
from .storage import Collections, get_storage
|
|
14
|
+
|
|
15
|
+
CHANGELOG_FILE = "changelog.json"
|
|
16
|
+
HASH_INDEX_FILE = "hashes.json"
|
|
17
|
+
MAX_FILE_SIZE = 100_000
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _should_ignore(path: Path) -> bool:
|
|
21
|
+
return any(part in IGNORE_DIRS for part in path.parts)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _file_hash(path: Path) -> str:
|
|
25
|
+
return hashlib.md5(path.read_bytes()).hexdigest()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _get_parser(language: str):
|
|
29
|
+
import tree_sitter_python
|
|
30
|
+
import tree_sitter_javascript
|
|
31
|
+
import tree_sitter_typescript
|
|
32
|
+
import tree_sitter_go
|
|
33
|
+
import tree_sitter_c_sharp
|
|
34
|
+
from tree_sitter import Language, Parser
|
|
35
|
+
|
|
36
|
+
lang_map = {
|
|
37
|
+
"python": tree_sitter_python.language(),
|
|
38
|
+
"javascript": tree_sitter_javascript.language(),
|
|
39
|
+
"typescript": tree_sitter_typescript.language_typescript(),
|
|
40
|
+
"go": tree_sitter_go.language(),
|
|
41
|
+
"csharp": tree_sitter_c_sharp.language(),
|
|
42
|
+
}
|
|
43
|
+
lang = lang_map.get(language)
|
|
44
|
+
if not lang:
|
|
45
|
+
return None
|
|
46
|
+
return Parser(Language(lang))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_node_text(node) -> str:
|
|
50
|
+
return node.text.decode() if node else ""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# --- Extractors (return compact dict per file) ---
|
|
54
|
+
|
|
55
|
+
def _extract_csharp(source: bytes, parser) -> dict:
|
|
56
|
+
tree = parser.parse(source)
|
|
57
|
+
result: dict = {"imports": [], "classes": []}
|
|
58
|
+
|
|
59
|
+
def extract_class(node) -> dict:
|
|
60
|
+
name = _get_node_text(node.child_by_field_name("name"))
|
|
61
|
+
base = ""
|
|
62
|
+
methods = []
|
|
63
|
+
for child in node.children:
|
|
64
|
+
if child.type == "base_list":
|
|
65
|
+
base = child.text.decode().lstrip(": ").strip()
|
|
66
|
+
if child.type == "declaration_list":
|
|
67
|
+
for member in child.children:
|
|
68
|
+
if member.type in ("method_declaration", "constructor_declaration"):
|
|
69
|
+
mname = _get_node_text(member.child_by_field_name("name"))
|
|
70
|
+
if not mname:
|
|
71
|
+
continue
|
|
72
|
+
params = ""
|
|
73
|
+
ret = ""
|
|
74
|
+
for part in member.children:
|
|
75
|
+
if part.type == "parameter_list":
|
|
76
|
+
params = part.text.decode()
|
|
77
|
+
elif part.type in ("predefined_type", "identifier", "generic_name",
|
|
78
|
+
"nullable_type", "array_type", "void_keyword", "qualified_name"):
|
|
79
|
+
name_node = member.child_by_field_name("name")
|
|
80
|
+
if name_node and part.end_byte < name_node.start_byte:
|
|
81
|
+
ret = part.text.decode()
|
|
82
|
+
sig = f"{ret + ' ' if ret else ''}{mname}{params}".strip()
|
|
83
|
+
methods.append(sig)
|
|
84
|
+
entry = {"name": name}
|
|
85
|
+
if base:
|
|
86
|
+
entry["implements"] = base
|
|
87
|
+
if methods:
|
|
88
|
+
entry["methods"] = methods
|
|
89
|
+
return entry
|
|
90
|
+
|
|
91
|
+
def walk(node):
|
|
92
|
+
for child in node.children:
|
|
93
|
+
if child.type == "using_directive":
|
|
94
|
+
result["imports"].append(child.text.decode().strip())
|
|
95
|
+
elif child.type in ("class_declaration", "interface_declaration",
|
|
96
|
+
"struct_declaration", "record_declaration"):
|
|
97
|
+
result["classes"].append(extract_class(child))
|
|
98
|
+
elif child.type in ("file_scoped_namespace_declaration",
|
|
99
|
+
"namespace_declaration", "declaration_list"):
|
|
100
|
+
walk(child)
|
|
101
|
+
|
|
102
|
+
walk(tree.root_node)
|
|
103
|
+
return {k: v for k, v in result.items() if v}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _extract_python(source: bytes, parser) -> dict:
|
|
107
|
+
tree = parser.parse(source)
|
|
108
|
+
root = tree.root_node
|
|
109
|
+
result: dict = {"imports": [], "classes": [], "functions": []}
|
|
110
|
+
|
|
111
|
+
for node in root.children:
|
|
112
|
+
if node.type in ("import_statement", "import_from_statement"):
|
|
113
|
+
result["imports"].append(node.text.decode().strip())
|
|
114
|
+
|
|
115
|
+
actual = node
|
|
116
|
+
decs = []
|
|
117
|
+
if node.type == "decorated_definition":
|
|
118
|
+
for child in node.children:
|
|
119
|
+
if child.type == "decorator":
|
|
120
|
+
decs.append(child.text.decode().strip())
|
|
121
|
+
elif child.type in ("function_definition", "class_definition"):
|
|
122
|
+
actual = child
|
|
123
|
+
|
|
124
|
+
if actual.type == "function_definition":
|
|
125
|
+
name = _get_node_text(actual.child_by_field_name("name"))
|
|
126
|
+
params = _get_node_text(actual.child_by_field_name("parameters"))
|
|
127
|
+
ret = _get_node_text(actual.child_by_field_name("return_type"))
|
|
128
|
+
sig = f"def {name}{params}"
|
|
129
|
+
if ret:
|
|
130
|
+
sig += f" -> {ret}"
|
|
131
|
+
if decs:
|
|
132
|
+
sig = f"{decs[0]} {sig}"
|
|
133
|
+
result["functions"].append(sig)
|
|
134
|
+
|
|
135
|
+
elif actual.type == "class_definition":
|
|
136
|
+
cname = _get_node_text(actual.child_by_field_name("name"))
|
|
137
|
+
methods = []
|
|
138
|
+
body = actual.child_by_field_name("body")
|
|
139
|
+
if body:
|
|
140
|
+
for child in body.children:
|
|
141
|
+
fn = child
|
|
142
|
+
if child.type == "decorated_definition":
|
|
143
|
+
for sub in child.children:
|
|
144
|
+
if sub.type == "function_definition":
|
|
145
|
+
fn = sub
|
|
146
|
+
if fn.type == "function_definition":
|
|
147
|
+
mname = _get_node_text(fn.child_by_field_name("name"))
|
|
148
|
+
mparams = _get_node_text(fn.child_by_field_name("parameters"))
|
|
149
|
+
methods.append(f"{mname}{mparams}")
|
|
150
|
+
entry = {"name": cname}
|
|
151
|
+
if methods:
|
|
152
|
+
entry["methods"] = methods
|
|
153
|
+
result["classes"].append(entry)
|
|
154
|
+
|
|
155
|
+
return {k: v for k, v in result.items() if v}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _extract_js(source: bytes, parser) -> dict:
|
|
159
|
+
tree = parser.parse(source)
|
|
160
|
+
result: dict = {"functions": [], "classes": []}
|
|
161
|
+
|
|
162
|
+
def walk(node):
|
|
163
|
+
if node.type == "function_declaration":
|
|
164
|
+
name = _get_node_text(node.child_by_field_name("name"))
|
|
165
|
+
if name:
|
|
166
|
+
result["functions"].append(name)
|
|
167
|
+
elif node.type == "lexical_declaration":
|
|
168
|
+
text = node.text.decode(errors="replace")
|
|
169
|
+
if "=>" in text:
|
|
170
|
+
result["functions"].append(text.split("=")[0].replace("const", "").replace("let", "").replace("var", "").strip())
|
|
171
|
+
elif node.type == "method_definition":
|
|
172
|
+
name = _get_node_text(node.child_by_field_name("name"))
|
|
173
|
+
if name:
|
|
174
|
+
result["functions"].append(name)
|
|
175
|
+
elif node.type == "class_declaration":
|
|
176
|
+
cname = _get_node_text(node.child_by_field_name("name")) or "AnonymousClass"
|
|
177
|
+
methods = []
|
|
178
|
+
for child in node.children:
|
|
179
|
+
if child.type == "class_body":
|
|
180
|
+
for member in child.children:
|
|
181
|
+
if member.type == "method_definition":
|
|
182
|
+
mname = _get_node_text(member.child_by_field_name("name"))
|
|
183
|
+
if mname:
|
|
184
|
+
methods.append(mname)
|
|
185
|
+
result["classes"].append({"name": cname, "methods": methods})
|
|
186
|
+
for child in node.children:
|
|
187
|
+
walk(child)
|
|
188
|
+
|
|
189
|
+
walk(tree.root_node)
|
|
190
|
+
result["functions"] = sorted(set(result["functions"]))
|
|
191
|
+
return {k: v for k, v in result.items() if v}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _extract_go(source: bytes, parser) -> dict:
|
|
195
|
+
tree = parser.parse(source)
|
|
196
|
+
result: dict = {"functions": [], "classes": []}
|
|
197
|
+
for node in tree.root_node.children:
|
|
198
|
+
if node.type == "function_declaration":
|
|
199
|
+
name = _get_node_text(node.child_by_field_name("name"))
|
|
200
|
+
if name:
|
|
201
|
+
result["functions"].append(name)
|
|
202
|
+
elif node.type == "method_declaration":
|
|
203
|
+
name = _get_node_text(node.child_by_field_name("name"))
|
|
204
|
+
receiver = _get_node_text(node.child_by_field_name("receiver"))
|
|
205
|
+
if name:
|
|
206
|
+
symbol = f"{receiver}.{name}" if receiver else name
|
|
207
|
+
result["functions"].append(symbol)
|
|
208
|
+
elif node.type == "type_declaration":
|
|
209
|
+
text = node.text.decode(errors="replace")
|
|
210
|
+
for line in text.splitlines():
|
|
211
|
+
line = line.strip()
|
|
212
|
+
if line.startswith("type ") and " struct" in line:
|
|
213
|
+
parts = line.split()
|
|
214
|
+
if len(parts) >= 2:
|
|
215
|
+
result["classes"].append({"name": parts[1], "methods": []})
|
|
216
|
+
return {k: v for k, v in result.items() if v}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _extract_file(source: bytes, language: str) -> dict | None:
|
|
220
|
+
parser = _get_parser(language)
|
|
221
|
+
if not parser:
|
|
222
|
+
return None
|
|
223
|
+
try:
|
|
224
|
+
if language == "csharp":
|
|
225
|
+
return _extract_csharp(source, parser)
|
|
226
|
+
elif language == "python":
|
|
227
|
+
return _extract_python(source, parser)
|
|
228
|
+
elif language in ("javascript", "typescript"):
|
|
229
|
+
return _extract_js(source, parser)
|
|
230
|
+
elif language == "go":
|
|
231
|
+
return _extract_go(source, parser)
|
|
232
|
+
return None
|
|
233
|
+
except Exception:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# --- Hash index for change detection ---
|
|
238
|
+
|
|
239
|
+
def _load_hashes(repo_root: Path) -> dict[str, str]:
|
|
240
|
+
data = get_storage(repo_root).read_collection(Collections.HASHES)
|
|
241
|
+
if not isinstance(data, dict):
|
|
242
|
+
return {}
|
|
243
|
+
return {str(path): str(file_hash) for path, file_hash in data.items()}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _save_hashes(repo_root: Path, hashes: dict[str, str]):
|
|
247
|
+
get_storage(repo_root).write_collection(Collections.HASHES, hashes)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def has_changes(repo_root: Path) -> bool:
|
|
251
|
+
"""Quick check if any files changed since last map generation."""
|
|
252
|
+
old_hashes = _load_hashes(repo_root)
|
|
253
|
+
if not old_hashes:
|
|
254
|
+
return True
|
|
255
|
+
|
|
256
|
+
for ext, language in SUPPORTED_EXTENSIONS.items():
|
|
257
|
+
for filepath in repo_root.rglob(f"*{ext}"):
|
|
258
|
+
if _should_ignore(filepath) or filepath.stat().st_size > MAX_FILE_SIZE:
|
|
259
|
+
continue
|
|
260
|
+
rel = str(filepath.relative_to(repo_root))
|
|
261
|
+
try:
|
|
262
|
+
h = _file_hash(filepath)
|
|
263
|
+
except (OSError, PermissionError):
|
|
264
|
+
continue
|
|
265
|
+
if old_hashes.get(rel) != h:
|
|
266
|
+
return True
|
|
267
|
+
|
|
268
|
+
# Check for deletions
|
|
269
|
+
for rel in old_hashes:
|
|
270
|
+
if not (repo_root / rel).exists():
|
|
271
|
+
return True
|
|
272
|
+
|
|
273
|
+
return False
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# --- Summary generation ---
|
|
277
|
+
|
|
278
|
+
def generate_summary(repo_root: Path, index: bool = True) -> str:
|
|
279
|
+
"""Generate a compact markdown summary of the repo structure."""
|
|
280
|
+
tree: dict[str, dict[str, list[str]]] = {}
|
|
281
|
+
hashes: dict[str, str] = {}
|
|
282
|
+
all_chunks = []
|
|
283
|
+
|
|
284
|
+
# Try Roslyn for C# if .NET SDK is available
|
|
285
|
+
from .analyzers import roslyn_available, run_roslyn_analyzer, roslyn_to_mnemo_format
|
|
286
|
+
roslyn_data: dict[str, dict] = {}
|
|
287
|
+
if roslyn_available(repo_root):
|
|
288
|
+
results = run_roslyn_analyzer(repo_root)
|
|
289
|
+
if results:
|
|
290
|
+
roslyn_data = roslyn_to_mnemo_format(results, repo_root)
|
|
291
|
+
|
|
292
|
+
for ext, language in SUPPORTED_EXTENSIONS.items():
|
|
293
|
+
for filepath in repo_root.rglob(f"*{ext}"):
|
|
294
|
+
if _should_ignore(filepath) or filepath.stat().st_size > MAX_FILE_SIZE:
|
|
295
|
+
continue
|
|
296
|
+
try:
|
|
297
|
+
source = filepath.read_bytes()
|
|
298
|
+
except (OSError, PermissionError):
|
|
299
|
+
continue
|
|
300
|
+
|
|
301
|
+
rel = str(filepath.relative_to(repo_root))
|
|
302
|
+
hashes[rel] = hashlib.md5(source).hexdigest()
|
|
303
|
+
|
|
304
|
+
parts = rel.split("/")
|
|
305
|
+
module = parts[0] if len(parts) > 1 else "."
|
|
306
|
+
submodule = parts[1] if len(parts) > 2 else "_root"
|
|
307
|
+
if module not in tree:
|
|
308
|
+
tree[module] = {}
|
|
309
|
+
if submodule not in tree[module]:
|
|
310
|
+
tree[module][submodule] = []
|
|
311
|
+
|
|
312
|
+
# Use Roslyn data if available for this file, else tree-sitter
|
|
313
|
+
info = roslyn_data.get(rel) or _extract_file(source, language)
|
|
314
|
+
rel_short = "/".join(parts[1:]) or rel
|
|
315
|
+
if info:
|
|
316
|
+
all_chunks.extend(make_code_chunks(rel, language, info))
|
|
317
|
+
|
|
318
|
+
if info and info.get("classes"):
|
|
319
|
+
class_names = []
|
|
320
|
+
for cls in info["classes"]:
|
|
321
|
+
name = cls["name"]
|
|
322
|
+
impl = f" : {cls['implements']}" if cls.get("implements") else ""
|
|
323
|
+
class_names.append(f"`{name}{impl}`")
|
|
324
|
+
tree[module][submodule].append(f" {rel_short} → {', '.join(class_names)}")
|
|
325
|
+
elif info and info.get("functions"):
|
|
326
|
+
tree[module][submodule].append(f" {rel_short} ({len(info['functions'])} functions)")
|
|
327
|
+
else:
|
|
328
|
+
tree[module][submodule].append(f" {rel_short}")
|
|
329
|
+
|
|
330
|
+
# Save hashes
|
|
331
|
+
_save_hashes(repo_root, hashes)
|
|
332
|
+
|
|
333
|
+
# Build markdown
|
|
334
|
+
lines = []
|
|
335
|
+
for module in sorted(tree.keys()):
|
|
336
|
+
lines.append(f"**{module}/**")
|
|
337
|
+
for submodule in sorted(tree[module].keys()):
|
|
338
|
+
if submodule != "_root":
|
|
339
|
+
lines.append(f" - {submodule}/")
|
|
340
|
+
for entry in sorted(tree[module][submodule]):
|
|
341
|
+
lines.append(entry)
|
|
342
|
+
if index and all_chunks:
|
|
343
|
+
index_chunks(repo_root, "code", all_chunks)
|
|
344
|
+
return "\n".join(lines)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def save_summary(repo_root: Path, index: bool = True) -> Path:
|
|
348
|
+
"""Generate and save the markdown summary."""
|
|
349
|
+
summary = generate_summary(repo_root, index=index)
|
|
350
|
+
out = mnemo_path(repo_root) / "summary.md"
|
|
351
|
+
out.write_text(summary, encoding="utf-8")
|
|
352
|
+
return out
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def save_repo_map(repo_root: Path, index: bool = True) -> Path:
|
|
356
|
+
"""Generate summary (replaces old JSON map)."""
|
|
357
|
+
return save_summary(repo_root, index=index)
|