kg-mcp 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kg_mcp/__init__.py +5 -0
- kg_mcp/__main__.py +8 -0
- kg_mcp/cli/__init__.py +3 -0
- kg_mcp/cli/setup.py +1100 -0
- kg_mcp/cli/status.py +344 -0
- kg_mcp/codegraph/__init__.py +3 -0
- kg_mcp/codegraph/indexer.py +296 -0
- kg_mcp/codegraph/model.py +170 -0
- kg_mcp/config.py +83 -0
- kg_mcp/kg/__init__.py +3 -0
- kg_mcp/kg/apply_schema.py +93 -0
- kg_mcp/kg/ingest.py +253 -0
- kg_mcp/kg/neo4j.py +155 -0
- kg_mcp/kg/repo.py +756 -0
- kg_mcp/kg/retrieval.py +225 -0
- kg_mcp/kg/schema.cypher +176 -0
- kg_mcp/llm/__init__.py +4 -0
- kg_mcp/llm/client.py +291 -0
- kg_mcp/llm/prompts/__init__.py +8 -0
- kg_mcp/llm/prompts/extractor.py +84 -0
- kg_mcp/llm/prompts/linker.py +117 -0
- kg_mcp/llm/schemas.py +248 -0
- kg_mcp/main.py +195 -0
- kg_mcp/mcp/__init__.py +3 -0
- kg_mcp/mcp/change_schemas.py +140 -0
- kg_mcp/mcp/prompts.py +223 -0
- kg_mcp/mcp/resources.py +218 -0
- kg_mcp/mcp/tools.py +537 -0
- kg_mcp/security/__init__.py +3 -0
- kg_mcp/security/auth.py +121 -0
- kg_mcp/security/origin.py +112 -0
- kg_mcp/utils.py +100 -0
- kg_mcp-0.1.8.dist-info/METADATA +86 -0
- kg_mcp-0.1.8.dist-info/RECORD +36 -0
- kg_mcp-0.1.8.dist-info/WHEEL +4 -0
- kg_mcp-0.1.8.dist-info/entry_points.txt +4 -0
kg_mcp/cli/status.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Status and health check command for MCP-KG-Memory.
|
|
4
|
+
|
|
5
|
+
Provides a quick overview of system health:
|
|
6
|
+
- Docker status
|
|
7
|
+
- Neo4j container health
|
|
8
|
+
- Configuration summary
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional, Tuple
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
from rich.panel import Panel
|
|
21
|
+
from rich.table import Table
|
|
22
|
+
except ImportError:
|
|
23
|
+
print("Installing rich...")
|
|
24
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "rich", "-q"])
|
|
25
|
+
from rich.console import Console
|
|
26
|
+
from rich.panel import Panel
|
|
27
|
+
from rich.table import Table
|
|
28
|
+
|
|
29
|
+
console = Console()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def check_docker_running() -> Tuple[bool, str]:
|
|
33
|
+
"""Check if Docker daemon is running."""
|
|
34
|
+
try:
|
|
35
|
+
result = subprocess.run(
|
|
36
|
+
["docker", "info"],
|
|
37
|
+
capture_output=True,
|
|
38
|
+
text=True,
|
|
39
|
+
timeout=10,
|
|
40
|
+
)
|
|
41
|
+
if result.returncode == 0:
|
|
42
|
+
return True, "Running"
|
|
43
|
+
return False, "Not running"
|
|
44
|
+
except FileNotFoundError:
|
|
45
|
+
return False, "Not installed"
|
|
46
|
+
except subprocess.TimeoutExpired:
|
|
47
|
+
return False, "Timeout"
|
|
48
|
+
except Exception as e:
|
|
49
|
+
return False, str(e)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def check_neo4j_container() -> Tuple[bool, str, Optional[str]]:
|
|
53
|
+
"""Check Neo4j container status."""
|
|
54
|
+
try:
|
|
55
|
+
result = subprocess.run(
|
|
56
|
+
["docker", "ps", "--filter", "name=kg-memory-neo4j", "--format", "{{.Status}}"],
|
|
57
|
+
capture_output=True,
|
|
58
|
+
text=True,
|
|
59
|
+
timeout=10,
|
|
60
|
+
)
|
|
61
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
62
|
+
status = result.stdout.strip()
|
|
63
|
+
is_healthy = "healthy" in status.lower() or "Up" in status
|
|
64
|
+
return is_healthy, status, None
|
|
65
|
+
|
|
66
|
+
# Check if container exists but is stopped
|
|
67
|
+
result = subprocess.run(
|
|
68
|
+
["docker", "ps", "-a", "--filter", "name=kg-memory-neo4j", "--format", "{{.Status}}"],
|
|
69
|
+
capture_output=True,
|
|
70
|
+
text=True,
|
|
71
|
+
timeout=10,
|
|
72
|
+
)
|
|
73
|
+
if result.stdout.strip():
|
|
74
|
+
return False, f"Stopped: {result.stdout.strip()}", "Run: docker compose up -d neo4j"
|
|
75
|
+
|
|
76
|
+
return False, "Not created", "Run: docker compose up -d neo4j"
|
|
77
|
+
except Exception as e:
|
|
78
|
+
return False, str(e), None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_neo4j_ports() -> Tuple[Optional[str], Optional[str]]:
|
|
82
|
+
"""Get Neo4j port mappings."""
|
|
83
|
+
try:
|
|
84
|
+
result = subprocess.run(
|
|
85
|
+
["docker", "port", "kg-memory-neo4j"],
|
|
86
|
+
capture_output=True,
|
|
87
|
+
text=True,
|
|
88
|
+
timeout=5,
|
|
89
|
+
)
|
|
90
|
+
if result.returncode == 0:
|
|
91
|
+
browser_port = "7474"
|
|
92
|
+
bolt_port = "7687"
|
|
93
|
+
for line in result.stdout.strip().split("\n"):
|
|
94
|
+
if "7474" in line:
|
|
95
|
+
browser_port = line.split(":")[-1]
|
|
96
|
+
elif "7687" in line:
|
|
97
|
+
bolt_port = line.split(":")[-1]
|
|
98
|
+
return browser_port, bolt_port
|
|
99
|
+
except Exception:
|
|
100
|
+
pass
|
|
101
|
+
return "7474", "7687"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_config() -> dict:
|
|
105
|
+
"""Load configuration from environment or config file."""
|
|
106
|
+
config = {
|
|
107
|
+
"neo4j_uri": os.environ.get("NEO4J_URI", "bolt://localhost:7687"),
|
|
108
|
+
"neo4j_user": os.environ.get("NEO4J_USER", "neo4j"),
|
|
109
|
+
"neo4j_password": os.environ.get("NEO4J_PASSWORD", ""),
|
|
110
|
+
"llm_model": os.environ.get("LLM_MODEL", "gemini/gemini-2.5-pro-preview-05-06"),
|
|
111
|
+
"has_gemini_key": bool(os.environ.get("GEMINI_API_KEY")),
|
|
112
|
+
"has_litellm_key": bool(os.environ.get("LITELLM_API_KEY")),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# Try to load from .env in common locations
|
|
116
|
+
for env_path in [
|
|
117
|
+
Path.cwd() / ".env",
|
|
118
|
+
Path.cwd().parent / ".env",
|
|
119
|
+
Path.home() / ".kg-mcp" / "config.json",
|
|
120
|
+
]:
|
|
121
|
+
if env_path.exists():
|
|
122
|
+
if env_path.suffix == ".json":
|
|
123
|
+
try:
|
|
124
|
+
with open(env_path) as f:
|
|
125
|
+
config.update(json.load(f))
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
else:
|
|
129
|
+
# Parse .env file
|
|
130
|
+
try:
|
|
131
|
+
with open(env_path) as f:
|
|
132
|
+
for line in f:
|
|
133
|
+
line = line.strip()
|
|
134
|
+
if line and not line.startswith("#") and "=" in line:
|
|
135
|
+
key, value = line.split("=", 1)
|
|
136
|
+
key = key.strip().lower()
|
|
137
|
+
value = value.strip().strip('"').strip("'")
|
|
138
|
+
if key == "neo4j_uri":
|
|
139
|
+
config["neo4j_uri"] = value
|
|
140
|
+
elif key == "neo4j_user":
|
|
141
|
+
config["neo4j_user"] = value
|
|
142
|
+
elif key == "neo4j_password":
|
|
143
|
+
config["neo4j_password"] = value
|
|
144
|
+
elif key == "llm_model":
|
|
145
|
+
config["llm_model"] = value
|
|
146
|
+
elif key == "gemini_api_key" and value:
|
|
147
|
+
config["has_gemini_key"] = True
|
|
148
|
+
elif key == "litellm_api_key" and value:
|
|
149
|
+
config["has_litellm_key"] = True
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
return config
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def check_docker_autostart() -> Tuple[bool, str]:
|
|
157
|
+
"""Check if Docker Desktop is configured to start on login (macOS)."""
|
|
158
|
+
if sys.platform != "darwin":
|
|
159
|
+
return True, "N/A (non-macOS)"
|
|
160
|
+
|
|
161
|
+
# Check Docker Desktop plist
|
|
162
|
+
plist_path = Path.home() / "Library" / "LaunchAgents" / "com.docker.docker.plist"
|
|
163
|
+
docker_app_settings = Path.home() / "Library" / "Group Containers" / "group.com.docker" / "settings.json"
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
if docker_app_settings.exists():
|
|
167
|
+
with open(docker_app_settings) as f:
|
|
168
|
+
settings = json.load(f)
|
|
169
|
+
if settings.get("openAtStartup", False):
|
|
170
|
+
return True, "Enabled"
|
|
171
|
+
except Exception:
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
return False, "Disabled (enable in Docker Desktop → Settings → General → Start Docker Desktop when you sign in)"
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def print_status():
|
|
178
|
+
"""Print comprehensive status."""
|
|
179
|
+
console.print()
|
|
180
|
+
console.print(Panel("[bold]🧠 MCP-KG-Memory Status[/]", style="blue"))
|
|
181
|
+
console.print()
|
|
182
|
+
|
|
183
|
+
# Docker status
|
|
184
|
+
docker_ok, docker_status = check_docker_running()
|
|
185
|
+
docker_icon = "✅" if docker_ok else "❌"
|
|
186
|
+
console.print(f" {docker_icon} Docker: [{'green' if docker_ok else 'red'}]{docker_status}[/]")
|
|
187
|
+
|
|
188
|
+
if not docker_ok:
|
|
189
|
+
console.print()
|
|
190
|
+
console.print(" [yellow]💡 Tip: Start Docker Desktop and try again[/]")
|
|
191
|
+
console.print()
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
# Docker auto-start
|
|
195
|
+
autostart_ok, autostart_status = check_docker_autostart()
|
|
196
|
+
autostart_icon = "✅" if autostart_ok else "⚠️"
|
|
197
|
+
console.print(f" {autostart_icon} Docker Auto-Start: [{'green' if autostart_ok else 'yellow'}]{autostart_status}[/]")
|
|
198
|
+
|
|
199
|
+
# Neo4j container
|
|
200
|
+
neo4j_ok, neo4j_status, neo4j_fix = check_neo4j_container()
|
|
201
|
+
neo4j_icon = "✅" if neo4j_ok else "❌"
|
|
202
|
+
console.print(f" {neo4j_icon} Neo4j Container: [{'green' if neo4j_ok else 'red'}]{neo4j_status}[/]")
|
|
203
|
+
|
|
204
|
+
if neo4j_fix:
|
|
205
|
+
console.print(f" └─ [yellow]Fix: {neo4j_fix}[/]")
|
|
206
|
+
|
|
207
|
+
# Neo4j endpoints
|
|
208
|
+
if neo4j_ok:
|
|
209
|
+
browser_port, bolt_port = get_neo4j_ports()
|
|
210
|
+
console.print(f" └─ Browser: [cyan]http://localhost:{browser_port}[/]")
|
|
211
|
+
console.print(f" └─ Bolt: [cyan]bolt://localhost:{bolt_port}[/]")
|
|
212
|
+
|
|
213
|
+
# Configuration
|
|
214
|
+
config = get_config()
|
|
215
|
+
console.print()
|
|
216
|
+
console.print(" [bold]Configuration:[/]")
|
|
217
|
+
|
|
218
|
+
llm_ok = config["has_gemini_key"] or config["has_litellm_key"]
|
|
219
|
+
llm_icon = "✅" if llm_ok else "❌"
|
|
220
|
+
llm_provider = "Gemini" if config["has_gemini_key"] else ("LiteLLM" if config["has_litellm_key"] else "Not configured")
|
|
221
|
+
console.print(f" {llm_icon} LLM Provider: [{'green' if llm_ok else 'red'}]{llm_provider}[/]")
|
|
222
|
+
console.print(f" └─ Model: [dim]{config['llm_model']}[/]")
|
|
223
|
+
|
|
224
|
+
if config["neo4j_password"]:
|
|
225
|
+
pwd_display = config["neo4j_password"][:4] + "..." if len(config["neo4j_password"]) > 4 else "***"
|
|
226
|
+
else:
|
|
227
|
+
pwd_display = "[not set]"
|
|
228
|
+
console.print(f" ℹ️ Neo4j User: [dim]{config['neo4j_user']}[/] / Password: [dim]{pwd_display}[/]")
|
|
229
|
+
|
|
230
|
+
# Overall status
|
|
231
|
+
console.print()
|
|
232
|
+
all_ok = docker_ok and neo4j_ok and llm_ok
|
|
233
|
+
if all_ok:
|
|
234
|
+
console.print(Panel("[bold green]✓ All systems operational![/]", style="green"))
|
|
235
|
+
else:
|
|
236
|
+
issues = []
|
|
237
|
+
if not docker_ok:
|
|
238
|
+
issues.append("Docker not running")
|
|
239
|
+
if not neo4j_ok:
|
|
240
|
+
issues.append("Neo4j not healthy")
|
|
241
|
+
if not llm_ok:
|
|
242
|
+
issues.append("LLM not configured")
|
|
243
|
+
console.print(Panel(f"[bold yellow]⚠️ Issues: {', '.join(issues)}[/]", style="yellow"))
|
|
244
|
+
|
|
245
|
+
console.print()
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def doctor():
|
|
249
|
+
"""Run diagnostics and attempt to fix common issues."""
|
|
250
|
+
console.print()
|
|
251
|
+
console.print(Panel("[bold]🩺 MCP-KG-Memory Doctor[/]", style="blue"))
|
|
252
|
+
console.print()
|
|
253
|
+
|
|
254
|
+
issues_found = 0
|
|
255
|
+
issues_fixed = 0
|
|
256
|
+
|
|
257
|
+
# Check Docker
|
|
258
|
+
docker_ok, docker_status = check_docker_running()
|
|
259
|
+
if not docker_ok:
|
|
260
|
+
issues_found += 1
|
|
261
|
+
console.print(" ❌ Docker is not running")
|
|
262
|
+
if sys.platform == "darwin":
|
|
263
|
+
console.print(" Attempting to start Docker Desktop...")
|
|
264
|
+
try:
|
|
265
|
+
subprocess.run(["open", "-a", "Docker"], check=True)
|
|
266
|
+
console.print(" [green]✓ Docker Desktop launched. Please wait ~30 seconds.[/]")
|
|
267
|
+
issues_fixed += 1
|
|
268
|
+
except Exception as e:
|
|
269
|
+
console.print(f" [red]Failed: {e}[/]")
|
|
270
|
+
else:
|
|
271
|
+
console.print(" ✅ Docker is running")
|
|
272
|
+
|
|
273
|
+
if docker_ok:
|
|
274
|
+
# Check Neo4j container
|
|
275
|
+
neo4j_ok, neo4j_status, _ = check_neo4j_container()
|
|
276
|
+
if not neo4j_ok:
|
|
277
|
+
issues_found += 1
|
|
278
|
+
console.print(" ❌ Neo4j container is not running")
|
|
279
|
+
console.print(" Attempting to start Neo4j...")
|
|
280
|
+
try:
|
|
281
|
+
# Find project root
|
|
282
|
+
for check_dir in [Path.cwd(), Path.cwd().parent]:
|
|
283
|
+
if (check_dir / "docker-compose.yml").exists():
|
|
284
|
+
result = subprocess.run(
|
|
285
|
+
["docker", "compose", "up", "-d", "neo4j"],
|
|
286
|
+
cwd=check_dir,
|
|
287
|
+
capture_output=True,
|
|
288
|
+
text=True,
|
|
289
|
+
)
|
|
290
|
+
if result.returncode == 0:
|
|
291
|
+
console.print(" [green]✓ Neo4j container started[/]")
|
|
292
|
+
issues_fixed += 1
|
|
293
|
+
else:
|
|
294
|
+
console.print(f" [red]Failed: {result.stderr}[/]")
|
|
295
|
+
break
|
|
296
|
+
else:
|
|
297
|
+
console.print(" [yellow]Could not find docker-compose.yml[/]")
|
|
298
|
+
except Exception as e:
|
|
299
|
+
console.print(f" [red]Failed: {e}[/]")
|
|
300
|
+
else:
|
|
301
|
+
console.print(" ✅ Neo4j container is healthy")
|
|
302
|
+
|
|
303
|
+
# Check LLM configuration
|
|
304
|
+
config = get_config()
|
|
305
|
+
if not config["has_gemini_key"] and not config["has_litellm_key"]:
|
|
306
|
+
issues_found += 1
|
|
307
|
+
console.print(" ❌ LLM not configured")
|
|
308
|
+
console.print(" [yellow]Run 'kg-mcp-setup' to configure LLM API key[/]")
|
|
309
|
+
else:
|
|
310
|
+
console.print(" ✅ LLM is configured")
|
|
311
|
+
|
|
312
|
+
# Summary
|
|
313
|
+
console.print()
|
|
314
|
+
if issues_found == 0:
|
|
315
|
+
console.print(Panel("[bold green]✓ No issues found![/]", style="green"))
|
|
316
|
+
elif issues_fixed == issues_found:
|
|
317
|
+
console.print(Panel(f"[bold green]✓ Fixed {issues_fixed}/{issues_found} issues![/]", style="green"))
|
|
318
|
+
else:
|
|
319
|
+
console.print(Panel(f"[bold yellow]⚠️ Fixed {issues_fixed}/{issues_found} issues. Some require manual intervention.[/]", style="yellow"))
|
|
320
|
+
|
|
321
|
+
console.print()
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def main():
|
|
325
|
+
"""Entry point for kg-mcp-status command."""
|
|
326
|
+
import argparse
|
|
327
|
+
|
|
328
|
+
parser = argparse.ArgumentParser(description="MCP-KG-Memory status and diagnostics")
|
|
329
|
+
parser.add_argument(
|
|
330
|
+
"--doctor", "-d",
|
|
331
|
+
action="store_true",
|
|
332
|
+
help="Run diagnostics and attempt to fix common issues"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
args = parser.parse_args()
|
|
336
|
+
|
|
337
|
+
if args.doctor:
|
|
338
|
+
doctor()
|
|
339
|
+
else:
|
|
340
|
+
print_status()
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
if __name__ == "__main__":
|
|
344
|
+
main()
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Code indexer for building the code graph.
|
|
3
|
+
V1 implementation using basic AST parsing.
|
|
4
|
+
|
|
5
|
+
For production use, consider integrating:
|
|
6
|
+
- Tree-sitter for multi-language parsing
|
|
7
|
+
- LSP integration for IDE data
|
|
8
|
+
- Scip/LSIF for pre-computed indices
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import List, Optional, Set
|
|
17
|
+
|
|
18
|
+
from kg_mcp.codegraph.model import (
|
|
19
|
+
FileInfo,
|
|
20
|
+
Symbol,
|
|
21
|
+
SymbolKind,
|
|
22
|
+
SymbolReference,
|
|
23
|
+
ReferenceKind,
|
|
24
|
+
SourceLocation,
|
|
25
|
+
CodeGraphSnapshot,
|
|
26
|
+
detect_language,
|
|
27
|
+
)
|
|
28
|
+
from kg_mcp.kg.repo import get_repository
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# File patterns to ignore
|
|
34
|
+
IGNORE_PATTERNS = {
|
|
35
|
+
"__pycache__",
|
|
36
|
+
".git",
|
|
37
|
+
".svn",
|
|
38
|
+
".hg",
|
|
39
|
+
"node_modules",
|
|
40
|
+
".venv",
|
|
41
|
+
"venv",
|
|
42
|
+
".env",
|
|
43
|
+
"dist",
|
|
44
|
+
"build",
|
|
45
|
+
".tox",
|
|
46
|
+
".mypy_cache",
|
|
47
|
+
".pytest_cache",
|
|
48
|
+
".coverage",
|
|
49
|
+
"*.pyc",
|
|
50
|
+
"*.pyo",
|
|
51
|
+
"*.egg-info",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class CodeIndexer:
|
|
56
|
+
"""
|
|
57
|
+
Indexes source code to build a code graph.
|
|
58
|
+
|
|
59
|
+
This V1 implementation uses basic file parsing.
|
|
60
|
+
For production, integrate tree-sitter or LSP.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(self, project_id: str, root_path: str):
|
|
64
|
+
self.project_id = project_id
|
|
65
|
+
self.root_path = Path(root_path).resolve()
|
|
66
|
+
self.repo = get_repository()
|
|
67
|
+
|
|
68
|
+
async def index_codebase(
|
|
69
|
+
self,
|
|
70
|
+
extensions: Optional[List[str]] = None,
|
|
71
|
+
incremental: bool = True,
|
|
72
|
+
) -> CodeGraphSnapshot:
|
|
73
|
+
"""
|
|
74
|
+
Index the entire codebase.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
extensions: Optional list of file extensions to index (e.g., [".py", ".js"])
|
|
78
|
+
incremental: If True, only index changed files
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
CodeGraphSnapshot with indexed files and symbols
|
|
82
|
+
"""
|
|
83
|
+
logger.info(f"Indexing codebase at {self.root_path}")
|
|
84
|
+
|
|
85
|
+
# Default to common code extensions
|
|
86
|
+
if extensions is None:
|
|
87
|
+
extensions = [".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rs"]
|
|
88
|
+
|
|
89
|
+
files: List[FileInfo] = []
|
|
90
|
+
references: List[SymbolReference] = []
|
|
91
|
+
|
|
92
|
+
# Walk directory tree
|
|
93
|
+
file_count = 0
|
|
94
|
+
for root, dirs, filenames in os.walk(self.root_path):
|
|
95
|
+
# Filter out ignored directories
|
|
96
|
+
dirs[:] = [d for d in dirs if not self._should_ignore(d)]
|
|
97
|
+
|
|
98
|
+
for filename in filenames:
|
|
99
|
+
file_path = Path(root) / filename
|
|
100
|
+
|
|
101
|
+
# Check extension
|
|
102
|
+
if extensions and file_path.suffix.lower() not in extensions:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
if self._should_ignore(filename):
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
file_info = await self._index_file(file_path)
|
|
110
|
+
if file_info:
|
|
111
|
+
files.append(file_info)
|
|
112
|
+
file_count += 1
|
|
113
|
+
|
|
114
|
+
# Save to graph
|
|
115
|
+
await self._save_file_to_graph(file_info)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.warning(f"Failed to index {file_path}: {e}")
|
|
118
|
+
|
|
119
|
+
logger.info(f"Indexed {file_count} files with {sum(len(f.symbols) for f in files)} symbols")
|
|
120
|
+
|
|
121
|
+
return CodeGraphSnapshot(
|
|
122
|
+
project_id=self.project_id,
|
|
123
|
+
timestamp=datetime.utcnow(),
|
|
124
|
+
files=files,
|
|
125
|
+
references=references,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
async def _index_file(self, file_path: Path) -> Optional[FileInfo]:
|
|
129
|
+
"""Index a single file."""
|
|
130
|
+
try:
|
|
131
|
+
content = file_path.read_text(encoding="utf-8", errors="replace")
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.debug(f"Could not read {file_path}: {e}")
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
# Compute content hash
|
|
137
|
+
content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
138
|
+
|
|
139
|
+
# Get file stats
|
|
140
|
+
stat = file_path.stat()
|
|
141
|
+
line_count = content.count("\n") + 1
|
|
142
|
+
|
|
143
|
+
# Detect language
|
|
144
|
+
language = detect_language(str(file_path))
|
|
145
|
+
|
|
146
|
+
# Create file info
|
|
147
|
+
file_info = FileInfo(
|
|
148
|
+
path=str(file_path.relative_to(self.root_path)),
|
|
149
|
+
language=language,
|
|
150
|
+
content_hash=content_hash,
|
|
151
|
+
size_bytes=stat.st_size,
|
|
152
|
+
line_count=line_count,
|
|
153
|
+
last_modified=datetime.fromtimestamp(stat.st_mtime),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Extract symbols based on language
|
|
157
|
+
if language == "python":
|
|
158
|
+
symbols = self._extract_python_symbols(content, file_info.path)
|
|
159
|
+
for symbol in symbols:
|
|
160
|
+
file_info.add_symbol(symbol)
|
|
161
|
+
|
|
162
|
+
return file_info
|
|
163
|
+
|
|
164
|
+
def _extract_python_symbols(self, content: str, file_path: str) -> List[Symbol]:
|
|
165
|
+
"""Extract symbols from Python code using AST."""
|
|
166
|
+
symbols = []
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
import ast
|
|
170
|
+
|
|
171
|
+
tree = ast.parse(content)
|
|
172
|
+
except SyntaxError as e:
|
|
173
|
+
logger.debug(f"Syntax error in {file_path}: {e}")
|
|
174
|
+
return symbols
|
|
175
|
+
|
|
176
|
+
# Visit all nodes
|
|
177
|
+
for node in ast.walk(tree):
|
|
178
|
+
if isinstance(node, ast.FunctionDef):
|
|
179
|
+
symbols.append(
|
|
180
|
+
Symbol(
|
|
181
|
+
fqn=f"{file_path}:{node.name}",
|
|
182
|
+
name=node.name,
|
|
183
|
+
kind=SymbolKind.FUNCTION,
|
|
184
|
+
location=SourceLocation(
|
|
185
|
+
file_path=file_path,
|
|
186
|
+
start_line=node.lineno,
|
|
187
|
+
end_line=node.end_lineno,
|
|
188
|
+
),
|
|
189
|
+
signature=self._get_python_function_signature(node),
|
|
190
|
+
docstring=ast.get_docstring(node),
|
|
191
|
+
)
|
|
192
|
+
)
|
|
193
|
+
elif isinstance(node, ast.AsyncFunctionDef):
|
|
194
|
+
symbols.append(
|
|
195
|
+
Symbol(
|
|
196
|
+
fqn=f"{file_path}:{node.name}",
|
|
197
|
+
name=node.name,
|
|
198
|
+
kind=SymbolKind.FUNCTION,
|
|
199
|
+
location=SourceLocation(
|
|
200
|
+
file_path=file_path,
|
|
201
|
+
start_line=node.lineno,
|
|
202
|
+
end_line=node.end_lineno,
|
|
203
|
+
),
|
|
204
|
+
signature=self._get_python_function_signature(node),
|
|
205
|
+
docstring=ast.get_docstring(node),
|
|
206
|
+
modifiers=["async"],
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
elif isinstance(node, ast.ClassDef):
|
|
210
|
+
symbols.append(
|
|
211
|
+
Symbol(
|
|
212
|
+
fqn=f"{file_path}:{node.name}",
|
|
213
|
+
name=node.name,
|
|
214
|
+
kind=SymbolKind.CLASS,
|
|
215
|
+
location=SourceLocation(
|
|
216
|
+
file_path=file_path,
|
|
217
|
+
start_line=node.lineno,
|
|
218
|
+
end_line=node.end_lineno,
|
|
219
|
+
),
|
|
220
|
+
docstring=ast.get_docstring(node),
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return symbols
|
|
225
|
+
|
|
226
|
+
def _get_python_function_signature(self, node) -> str:
|
|
227
|
+
"""Extract function signature from AST node."""
|
|
228
|
+
import ast
|
|
229
|
+
|
|
230
|
+
args = []
|
|
231
|
+
for arg in node.args.args:
|
|
232
|
+
arg_str = arg.arg
|
|
233
|
+
if arg.annotation:
|
|
234
|
+
try:
|
|
235
|
+
arg_str += f": {ast.unparse(arg.annotation)}"
|
|
236
|
+
except:
|
|
237
|
+
pass
|
|
238
|
+
args.append(arg_str)
|
|
239
|
+
|
|
240
|
+
returns = ""
|
|
241
|
+
if node.returns:
|
|
242
|
+
try:
|
|
243
|
+
returns = f" -> {ast.unparse(node.returns)}"
|
|
244
|
+
except:
|
|
245
|
+
pass
|
|
246
|
+
|
|
247
|
+
return f"def {node.name}({', '.join(args)}){returns}"
|
|
248
|
+
|
|
249
|
+
async def _save_file_to_graph(self, file_info: FileInfo) -> None:
|
|
250
|
+
"""Save file and its symbols to Neo4j."""
|
|
251
|
+
# Save file as CodeArtifact
|
|
252
|
+
artifact = await self.repo.upsert_code_artifact(
|
|
253
|
+
project_id=self.project_id,
|
|
254
|
+
path=file_info.path,
|
|
255
|
+
kind="file",
|
|
256
|
+
language=file_info.language,
|
|
257
|
+
content_hash=file_info.content_hash,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Save symbols
|
|
261
|
+
for symbol in file_info.symbols:
|
|
262
|
+
await self.repo.upsert_symbol(
|
|
263
|
+
artifact_id=artifact["id"],
|
|
264
|
+
fqn=symbol.fqn,
|
|
265
|
+
kind=symbol.kind.value,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _should_ignore(self, name: str) -> bool:
|
|
269
|
+
"""Check if a file/directory should be ignored."""
|
|
270
|
+
for pattern in IGNORE_PATTERNS:
|
|
271
|
+
if pattern.startswith("*"):
|
|
272
|
+
if name.endswith(pattern[1:]):
|
|
273
|
+
return True
|
|
274
|
+
elif name == pattern:
|
|
275
|
+
return True
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
async def index_project(
|
|
280
|
+
project_id: str,
|
|
281
|
+
root_path: str,
|
|
282
|
+
extensions: Optional[List[str]] = None,
|
|
283
|
+
) -> CodeGraphSnapshot:
|
|
284
|
+
"""
|
|
285
|
+
Convenience function to index a project.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
project_id: Project ID in the knowledge graph
|
|
289
|
+
root_path: Root path of the codebase
|
|
290
|
+
extensions: Optional list of extensions to index
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
CodeGraphSnapshot with indexed content
|
|
294
|
+
"""
|
|
295
|
+
indexer = CodeIndexer(project_id, root_path)
|
|
296
|
+
return await indexer.index_codebase(extensions=extensions)
|