knowledge-master 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/PKG-INFO +52 -3
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/README.md +45 -2
- knowledge_master-0.3.0/knowledge_master/api.py +92 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/cli.py +168 -48
- knowledge_master-0.3.0/knowledge_master/connectors.py +134 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/embeddings.py +7 -3
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/intelligence.py +98 -42
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/parsers/git_repo.py +5 -1
- knowledge_master-0.3.0/knowledge_master/rerank.py +60 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/server.py +1 -1
- knowledge_master-0.3.0/knowledge_master/static_analysis.py +306 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/store.py +15 -3
- knowledge_master-0.3.0/knowledge_master/ts_parsers.py +192 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/web.py +33 -4
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master.egg-info/PKG-INFO +52 -3
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master.egg-info/SOURCES.txt +9 -1
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master.egg-info/requires.txt +6 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/pyproject.toml +7 -1
- knowledge_master-0.3.0/tests/test_api.py +42 -0
- knowledge_master-0.3.0/tests/test_cli.py +35 -0
- knowledge_master-0.3.0/tests/test_static_analysis.py +48 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/LICENSE +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/__init__.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/__main__.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/chunking.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/parsers/__init__.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/parsers/markdown.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master/watcher.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master.egg-info/dependency_links.txt +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master.egg-info/entry_points.txt +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/knowledge_master.egg-info/top_level.txt +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/setup.cfg +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/tests/test_chunking.py +0 -0
- {knowledge_master-0.1.0 → knowledge_master-0.3.0}/tests/test_intelligence.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowledge-master
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Local-first knowledge graph for developers. Your AI agent's permanent memory.
|
|
5
5
|
Author: Milenko Mitrovic
|
|
6
6
|
License: MIT
|
|
@@ -27,6 +27,12 @@ Requires-Dist: gitpython<4.0,>=3.1.0
|
|
|
27
27
|
Requires-Dist: rich<15.0,>=14.0.0
|
|
28
28
|
Requires-Dist: fastapi<1.0,>=0.115.0
|
|
29
29
|
Requires-Dist: uvicorn<1.0,>=0.34.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Requires-Dist: tree-sitter>=0.23.0
|
|
32
|
+
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
33
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
34
|
+
Requires-Dist: tree-sitter-go>=0.23.0
|
|
35
|
+
Requires-Dist: tree-sitter-rust>=0.23.0
|
|
30
36
|
Provides-Extra: office
|
|
31
37
|
Requires-Dist: python-docx<2.0,>=1.1.0; extra == "office"
|
|
32
38
|
Requires-Dist: openpyxl<4.0,>=3.1.0; extra == "office"
|
|
@@ -41,6 +47,10 @@ Dynamic: license-file
|
|
|
41
47
|
**Your codebase's memory.** A local knowledge graph that gives AI agents real understanding of your architecture — not just text search.
|
|
42
48
|
|
|
43
49
|
[](LICENSE)
|
|
50
|
+

|
|
51
|
+

|
|
52
|
+
|
|
53
|
+
> ⚠️ **Alpha software.** Core features work (search, graph, CLI, MCP server) but some capabilities are early-stage. See [Feature Status](#feature-status) below.
|
|
44
54
|
|
|
45
55
|
---
|
|
46
56
|
|
|
@@ -210,9 +220,11 @@ Your AI agent gets these tools:
|
|
|
210
220
|
| `km start` | Boot Docker containers + pull embedding model |
|
|
211
221
|
| `km stop` | Stop containers |
|
|
212
222
|
| `km index <path>` | Index a git repo or docs directory |
|
|
213
|
-
| `km search <query>` | Semantic search with
|
|
214
|
-
| `km blast-radius <target>` |
|
|
223
|
+
| `km search <query>` | Semantic search with re-ranking |
|
|
224
|
+
| `km blast-radius <target>` | Multi-layer dependency analysis (imports → services → people) |
|
|
225
|
+
| `km who-owns <file>` | File ownership from git blame (weighted by recency) |
|
|
215
226
|
| `km check-conventions <path>` | Verify code follows detected patterns |
|
|
227
|
+
| `km connect <source>` | Pull from external MCP (email, Slack) |
|
|
216
228
|
| `km list` | Show indexed repos, techs, stats |
|
|
217
229
|
| `km remove <name>` | Remove a source from the knowledge base |
|
|
218
230
|
| `km serve` | Start web UI at http://127.0.0.1:9999 |
|
|
@@ -231,6 +243,26 @@ When you index a repo, Knowledge Master detects:
|
|
|
231
243
|
| **People** | Git commit authors and file ownership |
|
|
232
244
|
| **Code structure** | Functions, classes, chunked by AST-aware boundaries |
|
|
233
245
|
|
|
246
|
+
## Feature Status
|
|
247
|
+
|
|
248
|
+
| Feature | Status | Notes |
|
|
249
|
+
|---|---|---|
|
|
250
|
+
| Semantic search + re-ranking | ✅ Stable | Core retrieval works well |
|
|
251
|
+
| Knowledge graph (FalkorDB) | ✅ Stable | Node/edge storage, vector index |
|
|
252
|
+
| CLI commands | ✅ Stable | All commands functional |
|
|
253
|
+
| MCP server | ✅ Stable | search, blast_radius, check_conventions |
|
|
254
|
+
| Web UI + graph viz | ✅ Stable | htmx + D3, no build step |
|
|
255
|
+
| Git repo indexing | ✅ Stable | Parses code, extracts authors |
|
|
256
|
+
| Tech stack detection | ⚡ Basic | Regex over dependency files — works for common cases |
|
|
257
|
+
| Service topology | ⚡ Basic | docker-compose parsing — limited YAML support |
|
|
258
|
+
| Convention detection | ⚡ Basic | Folder structure + file naming patterns |
|
|
259
|
+
| Blast radius | ⚡ Basic | Graph traversal on stored edges — doesn't trace imports/calls |
|
|
260
|
+
| Email connector (ms-365) | 🧪 Experimental | Works but requires ms-365-mcp setup |
|
|
261
|
+
| Re-ranking | 🧪 Experimental | Novel approach, not benchmarked against cross-encoders |
|
|
262
|
+
| Incremental indexing | 🧪 Experimental | File watcher + git hooks, needs more testing |
|
|
263
|
+
|
|
264
|
+
**Legend:** ✅ Stable — ⚡ Basic (works, limited scope) — 🧪 Experimental (may change)
|
|
265
|
+
|
|
234
266
|
## Comparison
|
|
235
267
|
|
|
236
268
|
| Feature | Knowledge Master | Generic RAG | GitHub Copilot | Glean |
|
|
@@ -259,6 +291,23 @@ python -m knowledge_master.server
|
|
|
259
291
|
python -m knowledge_master.cli status
|
|
260
292
|
```
|
|
261
293
|
|
|
294
|
+
## Security
|
|
295
|
+
|
|
296
|
+
Knowledge Master runs **entirely on your machine**. No data leaves localhost.
|
|
297
|
+
|
|
298
|
+
- All ports bound to `127.0.0.1` (not accessible from LAN)
|
|
299
|
+
- Ollama runs locally — no cloud API calls
|
|
300
|
+
- MCP server uses stdio (no network exposure)
|
|
301
|
+
- Optional API key auth for REST endpoints
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
# Enable API key auth
|
|
305
|
+
export KM_API_KEY=$(openssl rand -hex 32)
|
|
306
|
+
km serve
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
See [SECURITY.md](SECURITY.md) for full security model, risks, and hardening guide.
|
|
310
|
+
|
|
262
311
|
## Troubleshooting
|
|
263
312
|
|
|
264
313
|
| Issue | Fix |
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
**Your codebase's memory.** A local knowledge graph that gives AI agents real understanding of your architecture — not just text search.
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
|
+

|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
> ⚠️ **Alpha software.** Core features work (search, graph, CLI, MCP server) but some capabilities are early-stage. See [Feature Status](#feature-status) below.
|
|
6
10
|
|
|
7
11
|
---
|
|
8
12
|
|
|
@@ -172,9 +176,11 @@ Your AI agent gets these tools:
|
|
|
172
176
|
| `km start` | Boot Docker containers + pull embedding model |
|
|
173
177
|
| `km stop` | Stop containers |
|
|
174
178
|
| `km index <path>` | Index a git repo or docs directory |
|
|
175
|
-
| `km search <query>` | Semantic search with
|
|
176
|
-
| `km blast-radius <target>` |
|
|
179
|
+
| `km search <query>` | Semantic search with re-ranking |
|
|
180
|
+
| `km blast-radius <target>` | Multi-layer dependency analysis (imports → services → people) |
|
|
181
|
+
| `km who-owns <file>` | File ownership from git blame (weighted by recency) |
|
|
177
182
|
| `km check-conventions <path>` | Verify code follows detected patterns |
|
|
183
|
+
| `km connect <source>` | Pull from external MCP (email, Slack) |
|
|
178
184
|
| `km list` | Show indexed repos, techs, stats |
|
|
179
185
|
| `km remove <name>` | Remove a source from the knowledge base |
|
|
180
186
|
| `km serve` | Start web UI at http://127.0.0.1:9999 |
|
|
@@ -193,6 +199,26 @@ When you index a repo, Knowledge Master detects:
|
|
|
193
199
|
| **People** | Git commit authors and file ownership |
|
|
194
200
|
| **Code structure** | Functions, classes, chunked by AST-aware boundaries |
|
|
195
201
|
|
|
202
|
+
## Feature Status
|
|
203
|
+
|
|
204
|
+
| Feature | Status | Notes |
|
|
205
|
+
|---|---|---|
|
|
206
|
+
| Semantic search + re-ranking | ✅ Stable | Core retrieval works well |
|
|
207
|
+
| Knowledge graph (FalkorDB) | ✅ Stable | Node/edge storage, vector index |
|
|
208
|
+
| CLI commands | ✅ Stable | All commands functional |
|
|
209
|
+
| MCP server | ✅ Stable | search, blast_radius, check_conventions |
|
|
210
|
+
| Web UI + graph viz | ✅ Stable | htmx + D3, no build step |
|
|
211
|
+
| Git repo indexing | ✅ Stable | Parses code, extracts authors |
|
|
212
|
+
| Tech stack detection | ⚡ Basic | Regex over dependency files — works for common cases |
|
|
213
|
+
| Service topology | ⚡ Basic | docker-compose parsing — limited YAML support |
|
|
214
|
+
| Convention detection | ⚡ Basic | Folder structure + file naming patterns |
|
|
215
|
+
| Blast radius | ⚡ Basic | Graph traversal on stored edges — doesn't trace imports/calls |
|
|
216
|
+
| Email connector (ms-365) | 🧪 Experimental | Works but requires ms-365-mcp setup |
|
|
217
|
+
| Re-ranking | 🧪 Experimental | Novel approach, not benchmarked against cross-encoders |
|
|
218
|
+
| Incremental indexing | 🧪 Experimental | File watcher + git hooks, needs more testing |
|
|
219
|
+
|
|
220
|
+
**Legend:** ✅ Stable — ⚡ Basic (works, limited scope) — 🧪 Experimental (may change)
|
|
221
|
+
|
|
196
222
|
## Comparison
|
|
197
223
|
|
|
198
224
|
| Feature | Knowledge Master | Generic RAG | GitHub Copilot | Glean |
|
|
@@ -221,6 +247,23 @@ python -m knowledge_master.server
|
|
|
221
247
|
python -m knowledge_master.cli status
|
|
222
248
|
```
|
|
223
249
|
|
|
250
|
+
## Security
|
|
251
|
+
|
|
252
|
+
Knowledge Master runs **entirely on your machine**. No data leaves localhost.
|
|
253
|
+
|
|
254
|
+
- All ports bound to `127.0.0.1` (not accessible from LAN)
|
|
255
|
+
- Ollama runs locally — no cloud API calls
|
|
256
|
+
- MCP server uses stdio (no network exposure)
|
|
257
|
+
- Optional API key auth for REST endpoints
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
# Enable API key auth
|
|
261
|
+
export KM_API_KEY=$(openssl rand -hex 32)
|
|
262
|
+
km serve
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
See [SECURITY.md](SECURITY.md) for full security model, risks, and hardening guide.
|
|
266
|
+
|
|
224
267
|
## Troubleshooting
|
|
225
268
|
|
|
226
269
|
| Issue | Fix |
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""REST API — JSON endpoints for external tool integration."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter
|
|
6
|
+
|
|
7
|
+
from . import embeddings, store
|
|
8
|
+
from .parsers import git_repo, markdown
|
|
9
|
+
|
|
10
|
+
router = APIRouter(prefix="/api/v1")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@router.get("/search")
|
|
14
|
+
async def search(q: str, top_k: int = 10, source_type: str = None):
|
|
15
|
+
"""Semantic search across the knowledge base."""
|
|
16
|
+
graph = store.get_graph()
|
|
17
|
+
vec = embeddings.embed(q)
|
|
18
|
+
results = store.graph_context_search(graph, vec, top_k, query=q)
|
|
19
|
+
if source_type:
|
|
20
|
+
results = [r for r in results if r.get("source_type") == source_type]
|
|
21
|
+
return {"query": q, "results": results}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@router.get("/blast-radius/{target}")
|
|
25
|
+
async def blast_radius(target: str):
|
|
26
|
+
"""Show what depends on a target."""
|
|
27
|
+
graph = store.get_graph()
|
|
28
|
+
# Try Service
|
|
29
|
+
result = graph.query(
|
|
30
|
+
"""MATCH (t:Service {name: $name})
|
|
31
|
+
OPTIONAL MATCH (other)-[*1..3]->(t)
|
|
32
|
+
WHERE other <> t
|
|
33
|
+
RETURN labels(other)[0] AS type, other.name AS name""",
|
|
34
|
+
params={"name": target},
|
|
35
|
+
)
|
|
36
|
+
if not result.result_set or all(r[1] is None for r in result.result_set):
|
|
37
|
+
# Try Tech
|
|
38
|
+
result = graph.query(
|
|
39
|
+
"""MATCH (t:Tech {name: $name})
|
|
40
|
+
OPTIONAL MATCH (r:Repo)-[:USES_TECH]->(t)
|
|
41
|
+
RETURN 'Repo' AS type, r.name AS name""",
|
|
42
|
+
params={"name": target},
|
|
43
|
+
)
|
|
44
|
+
affected = [{"type": r[0], "name": r[1]} for r in (result.result_set or []) if r[1]]
|
|
45
|
+
return {"target": target, "affected_count": len(affected), "affected": affected}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@router.get("/conventions/check")
|
|
49
|
+
async def check_conventions(path: str = "."):
|
|
50
|
+
"""Check conventions for a path."""
|
|
51
|
+
path = str(Path(path).expanduser().resolve())
|
|
52
|
+
repo_name = Path(path).name
|
|
53
|
+
graph = store.get_graph()
|
|
54
|
+
|
|
55
|
+
result = graph.query(
|
|
56
|
+
"MATCH (r:Repo)-[:FOLLOWS]->(c:Convention) WHERE r.name = $name RETURN c.name, c.category",
|
|
57
|
+
params={"name": repo_name},
|
|
58
|
+
)
|
|
59
|
+
if not result.result_set:
|
|
60
|
+
result = graph.query("MATCH (c:Convention) RETURN c.name, c.category")
|
|
61
|
+
|
|
62
|
+
from .cli import _check_convention
|
|
63
|
+
checks = []
|
|
64
|
+
for conv_name, category in (result.result_set or []):
|
|
65
|
+
passed = _check_convention(path, conv_name)
|
|
66
|
+
checks.append({"convention": conv_name, "category": category, "passed": passed})
|
|
67
|
+
return {"path": path, "checks": checks}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@router.post("/index")
|
|
71
|
+
async def index_source(path: str, type: str = "auto"):
|
|
72
|
+
"""Index a repo or directory."""
|
|
73
|
+
path = str(Path(path).expanduser().resolve())
|
|
74
|
+
if not Path(path).exists():
|
|
75
|
+
return {"error": f"Path not found: {path}"}
|
|
76
|
+
|
|
77
|
+
graph = store.get_graph()
|
|
78
|
+
store.init_schema(graph)
|
|
79
|
+
resolved_type = type if type != "auto" else ("repo" if (Path(path) / ".git").exists() else "docs")
|
|
80
|
+
|
|
81
|
+
if resolved_type == "repo":
|
|
82
|
+
result = git_repo.index_repo(path, graph)
|
|
83
|
+
else:
|
|
84
|
+
result = markdown.index_directory(path, graph)
|
|
85
|
+
return result
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@router.get("/status")
|
|
89
|
+
async def status():
|
|
90
|
+
"""Knowledge base stats."""
|
|
91
|
+
graph = store.get_graph()
|
|
92
|
+
return store.get_stats(graph)
|
|
@@ -108,7 +108,7 @@ def search(
|
|
|
108
108
|
"""Semantic search across the knowledge base."""
|
|
109
109
|
graph = store.get_graph()
|
|
110
110
|
vec = embeddings.embed(query)
|
|
111
|
-
results = store.graph_context_search(graph, vec, top_k)
|
|
111
|
+
results = store.graph_context_search(graph, vec, top_k, query=query)
|
|
112
112
|
|
|
113
113
|
table = Table(title=f"Results for: {query}")
|
|
114
114
|
table.add_column("Score", width=6)
|
|
@@ -134,62 +134,135 @@ def search(
|
|
|
134
134
|
|
|
135
135
|
@app.command()
|
|
136
136
|
def blast_radius(
|
|
137
|
-
target: str = typer.Argument(..., help="Service, file, or tech name
|
|
138
|
-
depth: int = typer.Option(
|
|
137
|
+
target: str = typer.Argument(..., help="Service, file, function, or tech name"),
|
|
138
|
+
depth: int = typer.Option(4, "--depth", "-d", help="Traversal depth"),
|
|
139
139
|
):
|
|
140
|
-
"""Show what depends on a target —
|
|
140
|
+
"""Show what depends on a target — multi-layer blast radius analysis."""
|
|
141
141
|
graph = store.get_graph()
|
|
142
|
+
results = _compute_blast_radius(graph, target, depth)
|
|
142
143
|
|
|
143
|
-
|
|
144
|
-
result = graph.query(
|
|
145
|
-
"""MATCH (target:Service {name: $name})
|
|
146
|
-
OPTIONAL MATCH path = (other)-[*1..3]->(target)
|
|
147
|
-
WHERE other <> target
|
|
148
|
-
RETURN labels(other)[0] AS type, other.name AS name,
|
|
149
|
-
length(path) AS distance, type(last(relationships(path))) AS rel
|
|
150
|
-
ORDER BY distance""",
|
|
151
|
-
params={"name": target},
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
if not result.result_set:
|
|
155
|
-
# Try as Tech
|
|
156
|
-
result = graph.query(
|
|
157
|
-
"""MATCH (target:Tech {name: $name})
|
|
158
|
-
OPTIONAL MATCH (r:Repo)-[:USES_TECH]->(target)
|
|
159
|
-
RETURN 'Repo' AS type, r.name AS name, 1 AS distance, 'USES_TECH' AS rel""",
|
|
160
|
-
params={"name": target},
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
if not result.result_set:
|
|
164
|
-
# Try as file/document
|
|
165
|
-
result = graph.query(
|
|
166
|
-
"""MATCH (target:Document) WHERE target.path CONTAINS $name
|
|
167
|
-
OPTIONAL MATCH (c:Chunk)-[:PART_OF]->(target)
|
|
168
|
-
OPTIONAL MATCH (p:Person)-[:AUTHORED]->(target)
|
|
169
|
-
OPTIONAL MATCH (target)-[:IN_REPO]->(r:Repo)
|
|
170
|
-
RETURN 'Repo' AS type, r.name AS name, 1 AS distance, 'CONTAINS' AS rel
|
|
171
|
-
UNION
|
|
172
|
-
MATCH (target:Document) WHERE target.path CONTAINS $name
|
|
173
|
-
OPTIONAL MATCH (p:Person)-[:AUTHORED]->(target)
|
|
174
|
-
RETURN 'Person' AS type, p.name AS name, 1 AS distance, 'AUTHORED' AS rel""",
|
|
175
|
-
params={"name": target},
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
if not result.result_set or all(r[1] is None for r in result.result_set):
|
|
144
|
+
if not results:
|
|
179
145
|
console.print(f"[yellow]No dependencies found for:[/] {target}")
|
|
180
|
-
console.print("[dim]Try: a
|
|
146
|
+
console.print("[dim]Try: a file path, function name, service, or technology[/]")
|
|
181
147
|
return
|
|
182
148
|
|
|
183
149
|
tree = Tree(f"[bold red]💥 Blast radius: {target}[/]")
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
150
|
+
|
|
151
|
+
# Group by confidence
|
|
152
|
+
definite = [r for r in results if r["confidence"] == "definite"]
|
|
153
|
+
likely = [r for r in results if r["confidence"] == "likely"]
|
|
154
|
+
possible = [r for r in results if r["confidence"] == "possible"]
|
|
155
|
+
|
|
156
|
+
if definite:
|
|
157
|
+
branch = tree.add("[bold]Definite impact[/]")
|
|
158
|
+
for r in definite:
|
|
159
|
+
icon = _icon(r["type"])
|
|
160
|
+
branch.add(f"{icon} [bold]{r['name']}[/] [dim]({r['type']}, {r['rel']})[/]")
|
|
161
|
+
|
|
162
|
+
if likely:
|
|
163
|
+
branch = tree.add("[yellow]Likely affected[/]")
|
|
164
|
+
for r in likely:
|
|
165
|
+
icon = _icon(r["type"])
|
|
166
|
+
branch.add(f"{icon} {r['name']} [dim]({r['type']}, {r['rel']})[/]")
|
|
167
|
+
|
|
168
|
+
if possible:
|
|
169
|
+
branch = tree.add("[dim]Possibly affected[/]")
|
|
170
|
+
for r in possible:
|
|
171
|
+
icon = _icon(r["type"])
|
|
172
|
+
branch.add(f"{icon} {r['name']} [dim]({r['type']}, {r['rel']})[/]")
|
|
190
173
|
|
|
191
174
|
console.print(tree)
|
|
192
|
-
console.print(f"\n[dim]{len(
|
|
175
|
+
console.print(f"\n[dim]{len(results)} entities: {len(definite)} definite, {len(likely)} likely, {len(possible)} possible[/]")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _compute_blast_radius(graph, target: str, depth: int = 4) -> list[dict]:
|
|
179
|
+
"""Multi-layer blast radius: Symbol → File → Service → Person."""
|
|
180
|
+
results = []
|
|
181
|
+
seen = set()
|
|
182
|
+
|
|
183
|
+
# Layer 1: File-level imports (who imports this file?)
|
|
184
|
+
r = graph.query(
|
|
185
|
+
"""MATCH (src:Document)-[:IMPORTS]->(dst:Document)
|
|
186
|
+
WHERE dst.path CONTAINS $name
|
|
187
|
+
RETURN 'Document' AS type, src.path AS name, 'IMPORTS' AS rel""",
|
|
188
|
+
params={"name": target},
|
|
189
|
+
)
|
|
190
|
+
for row in (r.result_set or []):
|
|
191
|
+
if row[1] and row[1] not in seen:
|
|
192
|
+
seen.add(row[1])
|
|
193
|
+
results.append({"type": row[0], "name": row[1], "rel": row[2], "confidence": "definite"})
|
|
194
|
+
|
|
195
|
+
# Layer 1b: Symbol-level (who defines/uses this function?)
|
|
196
|
+
r = graph.query(
|
|
197
|
+
"""MATCH (f:Function {name: $name})-[:DEFINED_IN]->(d:Document)
|
|
198
|
+
OPTIONAL MATCH (importer:Document)-[:IMPORTS]->(d)
|
|
199
|
+
RETURN 'Document' AS type, importer.path AS name, 'IMPORTS function' AS rel""",
|
|
200
|
+
params={"name": target},
|
|
201
|
+
)
|
|
202
|
+
for row in (r.result_set or []):
|
|
203
|
+
if row[1] and row[1] not in seen:
|
|
204
|
+
seen.add(row[1])
|
|
205
|
+
results.append({"type": row[0], "name": row[1], "rel": row[2], "confidence": "definite"})
|
|
206
|
+
|
|
207
|
+
# Layer 2: Service-level (which service owns affected files?)
|
|
208
|
+
affected_files = [r["name"] for r in results if r["type"] == "Document"]
|
|
209
|
+
affected_files.append(target) # include the target itself
|
|
210
|
+
|
|
211
|
+
r = graph.query(
|
|
212
|
+
"""MATCH (d:Document)-[:IN_REPO]->(repo:Repo)-[:DEFINES_SERVICE]->(svc:Service)
|
|
213
|
+
WHERE any(f IN $files WHERE d.path CONTAINS f)
|
|
214
|
+
RETURN 'Service' AS type, svc.name AS name, 'owns affected file' AS rel""",
|
|
215
|
+
params={"files": affected_files},
|
|
216
|
+
)
|
|
217
|
+
for row in (r.result_set or []):
|
|
218
|
+
if row[1] and row[1] not in seen:
|
|
219
|
+
seen.add(row[1])
|
|
220
|
+
results.append({"type": row[0], "name": row[1], "rel": row[2], "confidence": "likely"})
|
|
221
|
+
|
|
222
|
+
# Layer 2b: Services that depend on affected services
|
|
223
|
+
affected_services = [r["name"] for r in results if r["type"] == "Service"]
|
|
224
|
+
if affected_services:
|
|
225
|
+
r = graph.query(
|
|
226
|
+
"""MATCH (upstream:Service)-[:DEPENDS_ON]->(downstream:Service)
|
|
227
|
+
WHERE downstream.name IN $services
|
|
228
|
+
RETURN 'Service' AS type, upstream.name AS name, 'DEPENDS_ON' AS rel""",
|
|
229
|
+
params={"services": affected_services},
|
|
230
|
+
)
|
|
231
|
+
for row in (r.result_set or []):
|
|
232
|
+
if row[1] and row[1] not in seen:
|
|
233
|
+
seen.add(row[1])
|
|
234
|
+
results.append({"type": row[0], "name": row[1], "rel": row[2], "confidence": "likely"})
|
|
235
|
+
|
|
236
|
+
# Layer 3: Tech-level
|
|
237
|
+
r = graph.query(
|
|
238
|
+
"""MATCH (t:Tech {name: $name})
|
|
239
|
+
OPTIONAL MATCH (repo:Repo)-[:USES_TECH]->(t)
|
|
240
|
+
RETURN 'Repo' AS type, repo.name AS name, 'USES_TECH' AS rel""",
|
|
241
|
+
params={"name": target},
|
|
242
|
+
)
|
|
243
|
+
for row in (r.result_set or []):
|
|
244
|
+
if row[1] and row[1] not in seen:
|
|
245
|
+
seen.add(row[1])
|
|
246
|
+
results.append({"type": row[0], "name": row[1], "rel": row[2], "confidence": "possible"})
|
|
247
|
+
|
|
248
|
+
# Layer 4: People (who authored affected files?)
|
|
249
|
+
r = graph.query(
|
|
250
|
+
"""MATCH (p:Person)-[:AUTHORED]->(d:Document)
|
|
251
|
+
WHERE any(f IN $files WHERE d.path = f)
|
|
252
|
+
RETURN 'Person' AS type, p.name AS name, 'AUTHORED affected file' AS rel""",
|
|
253
|
+
params={"files": affected_files},
|
|
254
|
+
)
|
|
255
|
+
for row in (r.result_set or []):
|
|
256
|
+
if row[1] and row[1] not in seen:
|
|
257
|
+
seen.add(row[1])
|
|
258
|
+
results.append({"type": row[0], "name": row[1], "rel": row[2], "confidence": "possible"})
|
|
259
|
+
|
|
260
|
+
return results
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _icon(node_type: str) -> str:
|
|
264
|
+
return {"Repo": "📦", "Service": "⚙️", "Person": "👤", "Document": "📄",
|
|
265
|
+
"Tech": "🔧", "Function": "🔧", "Class": "🏗️"}.get(node_type, "•")
|
|
193
266
|
|
|
194
267
|
|
|
195
268
|
@app.command()
|
|
@@ -313,6 +386,33 @@ def remove(source: str = typer.Argument(..., help="Repo name or doc path to remo
|
|
|
313
386
|
console.print(f"[yellow]Not found:[/] {source}")
|
|
314
387
|
|
|
315
388
|
|
|
389
|
+
|
|
390
|
+
@app.command()
|
|
391
|
+
def connect(
|
|
392
|
+
source: str = typer.Argument(..., help="Source to pull from: outlook, slack, notion, or custom"),
|
|
393
|
+
command: str = typer.Option(None, "--command", "-c", help="Custom MCP server command"),
|
|
394
|
+
tool: str = typer.Option(None, "--tool", "-t", help="Tool name to call on the MCP server"),
|
|
395
|
+
):
|
|
396
|
+
"""Pull and index data from an external MCP server (email, Slack, etc.)."""
|
|
397
|
+
from .connectors import sync_pull_and_index, add_custom_source, SOURCES
|
|
398
|
+
|
|
399
|
+
if command and tool:
|
|
400
|
+
add_custom_source(source, command.split(), tool)
|
|
401
|
+
|
|
402
|
+
if source not in SOURCES:
|
|
403
|
+
console.print(f"[yellow]Unknown source:[/] {source}")
|
|
404
|
+
console.print(f"[dim]Available: {', '.join(SOURCES.keys())}[/]")
|
|
405
|
+
console.print("[dim]Or use --command and --tool for custom MCP servers[/]")
|
|
406
|
+
raise typer.Exit(1)
|
|
407
|
+
|
|
408
|
+
console.print(f"[bold blue]Connecting to {source}...[/]")
|
|
409
|
+
try:
|
|
410
|
+
result = sync_pull_and_index(source)
|
|
411
|
+
console.print(f"[green]✓ Done![/] {json.dumps(result)}")
|
|
412
|
+
except Exception as e:
|
|
413
|
+
console.print(f"[red]✗ Failed:[/] {e}")
|
|
414
|
+
raise typer.Exit(1)
|
|
415
|
+
|
|
316
416
|
@app.command()
|
|
317
417
|
def status():
|
|
318
418
|
"""Check system health."""
|
|
@@ -340,5 +440,25 @@ def serve(port: int = typer.Option(9999, help="Port for web UI")):
|
|
|
340
440
|
uvicorn.run(create_app(), host="127.0.0.1", port=port)
|
|
341
441
|
|
|
342
442
|
|
|
443
|
+
@app.command(name="who-owns")
|
|
444
|
+
def who_owns(file: str = typer.Argument(..., help="File path to check ownership")):
|
|
445
|
+
"""Show who owns a file based on git blame analysis."""
|
|
446
|
+
graph = store.get_graph()
|
|
447
|
+
result = graph.query(
|
|
448
|
+
"""MATCH (p:Person)-[r:OWNS]->(d:Document)
|
|
449
|
+
WHERE d.path CONTAINS $file
|
|
450
|
+
RETURN p.name, r.weight, d.path
|
|
451
|
+
ORDER BY r.weight DESC LIMIT 1""",
|
|
452
|
+
params={"file": file},
|
|
453
|
+
)
|
|
454
|
+
if result.result_set:
|
|
455
|
+
name, weight, path = result.result_set[0]
|
|
456
|
+
console.print(f"[bold]{path}[/]")
|
|
457
|
+
console.print(f" Owner: [green]{name}[/] (weight: {weight:.2f})")
|
|
458
|
+
else:
|
|
459
|
+
console.print(f"[yellow]No ownership data for:[/] {file}")
|
|
460
|
+
console.print("[dim]Run 'km index <repo>' first to extract ownership.[/]")
|
|
461
|
+
|
|
462
|
+
|
|
343
463
|
if __name__ == "__main__":
|
|
344
464
|
app()
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""MCP Connector — index data from external MCP servers (email, Slack, etc.)."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from . import chunking, embeddings, store
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class MCPSource:
|
|
12
|
+
"""Configuration for an external MCP server to pull data from."""
|
|
13
|
+
name: str
|
|
14
|
+
command: list[str]
|
|
15
|
+
tool_name: str # which tool to call to get data
|
|
16
|
+
tool_args: dict # arguments to pass
|
|
17
|
+
source_type: str # email, slack, docs, etc.
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Pre-configured sources — commands must be installed separately
|
|
21
|
+
SOURCES = {
|
|
22
|
+
"outlook": MCPSource(
|
|
23
|
+
name="Microsoft 365 Emails",
|
|
24
|
+
command=["npx", "@subzone81/ms-365-mcp", "--preset", "mail"],
|
|
25
|
+
tool_name="list-mail-messages",
|
|
26
|
+
tool_args={"top": 50},
|
|
27
|
+
source_type="email",
|
|
28
|
+
),
|
|
29
|
+
"slack": MCPSource(
|
|
30
|
+
name="Slack Messages",
|
|
31
|
+
command=["npx", "@modelcontextprotocol/server-slack"],
|
|
32
|
+
tool_name="slack_search_messages",
|
|
33
|
+
tool_args={"query": ""},
|
|
34
|
+
source_type="slack",
|
|
35
|
+
),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def pull_and_index(source: MCPSource, graph=None):
|
|
40
|
+
"""Connect to an MCP server, pull data, and index it into our graph."""
|
|
41
|
+
from mcp import ClientSession
|
|
42
|
+
from mcp.client.stdio import stdio_client, StdioServerParameters
|
|
43
|
+
|
|
44
|
+
if graph is None:
|
|
45
|
+
graph = store.get_graph()
|
|
46
|
+
store.init_schema(graph)
|
|
47
|
+
|
|
48
|
+
params = StdioServerParameters(command=source.command[0], args=source.command[1:])
|
|
49
|
+
|
|
50
|
+
async with stdio_client(params) as (read, write):
|
|
51
|
+
async with ClientSession(read, write) as session:
|
|
52
|
+
await session.initialize()
|
|
53
|
+
|
|
54
|
+
# Call the tool to get data
|
|
55
|
+
result = await session.call_tool(source.tool_name, source.tool_args)
|
|
56
|
+
|
|
57
|
+
items = _parse_mcp_result(result)
|
|
58
|
+
indexed = 0
|
|
59
|
+
|
|
60
|
+
for item in items:
|
|
61
|
+
text = item.get("text", item.get("content", item.get("body", "")))
|
|
62
|
+
if not text or len(text.strip()) < 20:
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
title = item.get("subject", item.get("title", item.get("name", "")))
|
|
66
|
+
author = item.get("from", item.get("author", item.get("user", "")))
|
|
67
|
+
source_id = item.get("id", item.get("url", title))
|
|
68
|
+
|
|
69
|
+
# Chunk and embed
|
|
70
|
+
chunks = chunking.chunk_text(text)
|
|
71
|
+
vectors = embeddings.embed_batch(chunks)
|
|
72
|
+
|
|
73
|
+
# Store document
|
|
74
|
+
doc_path = f"{source.source_type}/{source_id}"
|
|
75
|
+
store.upsert_document(graph, doc_path, source.source_type, {"title": title})
|
|
76
|
+
|
|
77
|
+
# Store person if we have author info
|
|
78
|
+
if author:
|
|
79
|
+
email = author if "@" in author else ""
|
|
80
|
+
store.upsert_person(graph, author, email)
|
|
81
|
+
store.link_person_authored(graph, email or author, doc_path)
|
|
82
|
+
|
|
83
|
+
# Store chunks
|
|
84
|
+
for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
|
|
85
|
+
cid = chunking.chunk_id(doc_path, i)
|
|
86
|
+
store.upsert_chunk(graph, cid, chunk_text, vector,
|
|
87
|
+
{"source": doc_path, "source_type": source.source_type})
|
|
88
|
+
store.link_chunk_to_document(graph, cid, doc_path)
|
|
89
|
+
|
|
90
|
+
indexed += 1
|
|
91
|
+
|
|
92
|
+
return {"source": source.name, "items_indexed": indexed}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _parse_mcp_result(result) -> list[dict]:
|
|
96
|
+
"""Parse MCP tool result into a list of items."""
|
|
97
|
+
items = []
|
|
98
|
+
for content in result.content:
|
|
99
|
+
if hasattr(content, "text"):
|
|
100
|
+
try:
|
|
101
|
+
data = json.loads(content.text)
|
|
102
|
+
if isinstance(data, list):
|
|
103
|
+
items.extend(data)
|
|
104
|
+
elif isinstance(data, dict):
|
|
105
|
+
if "results" in data:
|
|
106
|
+
items.extend(data["results"])
|
|
107
|
+
elif "messages" in data:
|
|
108
|
+
items.extend(data["messages"])
|
|
109
|
+
elif "items" in data:
|
|
110
|
+
items.extend(data["items"])
|
|
111
|
+
else:
|
|
112
|
+
items.append(data)
|
|
113
|
+
except json.JSONDecodeError:
|
|
114
|
+
# Plain text — treat as single item
|
|
115
|
+
items.append({"text": content.text, "title": "mcp-result"})
|
|
116
|
+
return items
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def sync_pull_and_index(source_key: str, graph=None):
|
|
120
|
+
"""Synchronous wrapper for CLI usage."""
|
|
121
|
+
if source_key not in SOURCES:
|
|
122
|
+
available = ", ".join(SOURCES.keys())
|
|
123
|
+
raise ValueError(f"Unknown source: {source_key}. Available: {available}")
|
|
124
|
+
source = SOURCES[source_key]
|
|
125
|
+
return asyncio.run(pull_and_index(source, graph))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def add_custom_source(name: str, command: list[str], tool_name: str,
|
|
129
|
+
tool_args: dict = None, source_type: str = "external"):
|
|
130
|
+
"""Register a custom MCP source."""
|
|
131
|
+
SOURCES[name] = MCPSource(
|
|
132
|
+
name=name, command=command, tool_name=tool_name,
|
|
133
|
+
tool_args=tool_args or {}, source_type=source_type,
|
|
134
|
+
)
|
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
"""Embedding client using Ollama local models."""
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
from ollama import Client
|
|
4
4
|
|
|
5
5
|
MODEL = "nomic-embed-text"
|
|
6
|
+
TIMEOUT = 30 # seconds
|
|
7
|
+
|
|
8
|
+
# Create client with timeout
|
|
9
|
+
_client = Client(timeout=TIMEOUT)
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
def embed(text: str) -> list[float]:
|
|
9
13
|
"""Embed a single text string, returns vector."""
|
|
10
|
-
response =
|
|
14
|
+
response = _client.embed(model=MODEL, input=text)
|
|
11
15
|
return response["embeddings"][0]
|
|
12
16
|
|
|
13
17
|
|
|
@@ -16,6 +20,6 @@ def embed_batch(texts: list[str], batch_size: int = 64) -> list[list[float]]:
|
|
|
16
20
|
vectors = []
|
|
17
21
|
for i in range(0, len(texts), batch_size):
|
|
18
22
|
batch = texts[i : i + batch_size]
|
|
19
|
-
response =
|
|
23
|
+
response = _client.embed(model=MODEL, input=batch)
|
|
20
24
|
vectors.extend(response["embeddings"])
|
|
21
25
|
return vectors
|