codegraphy 0.1.1__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codegraphy-0.1.1 → codegraphy-2.0.0}/PKG-INFO +34 -1
- {codegraphy-0.1.1 → codegraphy-2.0.0}/README.md +33 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/__init__.py +1 -1
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/cli.py +77 -47
- codegraphy-2.0.0/codegraphy/db/store.py +204 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/indexer/python.py +10 -2
- codegraphy-2.0.0/codegraphy/indexer/walker.py +104 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/mcp/server.py +33 -15
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/PKG-INFO +34 -1
- codegraphy-0.1.1/codegraphy/db/store.py +0 -162
- codegraphy-0.1.1/codegraphy/indexer/walker.py +0 -77
- {codegraphy-0.1.1 → codegraphy-2.0.0}/LICENSE +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/config.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/db/__init__.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/db/schema.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/indexer/__init__.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/indexer/base.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/mcp/__init__.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/plugins/__init__.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/plugins/base.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/plugins/django.py +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/SOURCES.txt +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/dependency_links.txt +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/entry_points.txt +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/requires.txt +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/top_level.txt +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/pyproject.toml +0 -0
- {codegraphy-0.1.1 → codegraphy-2.0.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codegraphy
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: SQLite/PostgreSQL codebase knowledge graph and MCP server for Claude Code
|
|
5
5
|
Author: Charan Kulal
|
|
6
6
|
License-Expression: MIT
|
|
@@ -80,6 +80,37 @@ The base PyPI package keeps SQLite support in the standard library path, so Post
|
|
|
80
80
|
|
|
81
81
|
---
|
|
82
82
|
|
|
83
|
+
## PostgreSQL
|
|
84
|
+
|
|
85
|
+
Install PostgreSQL support:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install 'codegraphy[postgres]'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Initialize with a PostgreSQL URL:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
codegraphy init --db postgresql://USER:PASSWORD@HOST:PORT/DBNAME
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
codegraphy init --db postgresql://postgres:postgres@localhost:5432/codegraphy
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Or set `DATABASE_URL` once and reuse it:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codegraphy
|
|
107
|
+
codegraphy init
|
|
108
|
+
codegraphy index .
|
|
109
|
+
codegraphy serve
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
83
114
|
## Quickstart
|
|
84
115
|
|
|
85
116
|
```bash
|
|
@@ -109,6 +140,8 @@ codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
|
|
|
109
140
|
codegraphy stats # Show graph statistics
|
|
110
141
|
```
|
|
111
142
|
|
|
143
|
+
`codegraphy index` and `codegraphy update` show progress while they run, then print a summary with files scanned, files indexed, and elapsed time. `codegraphy serve` also shows startup progress and then reports when the MCP server is ready and waiting for a stdio client.
|
|
144
|
+
|
|
112
145
|
---
|
|
113
146
|
|
|
114
147
|
## MCP Tools
|
|
@@ -40,6 +40,37 @@ The base PyPI package keeps SQLite support in the standard library path, so Post
|
|
|
40
40
|
|
|
41
41
|
---
|
|
42
42
|
|
|
43
|
+
## PostgreSQL
|
|
44
|
+
|
|
45
|
+
Install PostgreSQL support:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install 'codegraphy[postgres]'
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Initialize with a PostgreSQL URL:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
codegraphy init --db postgresql://USER:PASSWORD@HOST:PORT/DBNAME
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
codegraphy init --db postgresql://postgres:postgres@localhost:5432/codegraphy
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Or set `DATABASE_URL` once and reuse it:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codegraphy
|
|
67
|
+
codegraphy init
|
|
68
|
+
codegraphy index .
|
|
69
|
+
codegraphy serve
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
43
74
|
## Quickstart
|
|
44
75
|
|
|
45
76
|
```bash
|
|
@@ -69,6 +100,8 @@ codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
|
|
|
69
100
|
codegraphy stats # Show graph statistics
|
|
70
101
|
```
|
|
71
102
|
|
|
103
|
+
`codegraphy index` and `codegraphy update` show progress while they run, then print a summary with files scanned, files indexed, and elapsed time. `codegraphy serve` also shows startup progress and then reports when the MCP server is ready and waiting for a stdio client.
|
|
104
|
+
|
|
72
105
|
---
|
|
73
106
|
|
|
74
107
|
## MCP Tools
|
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
import click
|
|
2
2
|
from .config import load_config
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
def _format_elapsed(seconds: float) -> str:
|
|
6
|
+
if seconds < 60:
|
|
7
|
+
return f"{seconds:.1f}s"
|
|
8
|
+
minutes, remaining = divmod(seconds, 60)
|
|
9
|
+
return f"{int(minutes)}m {remaining:.1f}s"
|
|
10
|
+
|
|
11
|
+
def _run_with_progress(label: str, files: list[str], runner):
|
|
12
|
+
total_files = len(files)
|
|
13
|
+
start = time.monotonic()
|
|
14
|
+
|
|
15
|
+
if total_files == 0:
|
|
16
|
+
click.echo(f"{label}...")
|
|
17
|
+
click.echo("Scanned 0 files, indexed 0 files in 0.0s.")
|
|
18
|
+
return 0
|
|
19
|
+
|
|
20
|
+
with click.progressbar(length=total_files, label=label, show_eta=True, show_percent=True) as bar:
|
|
21
|
+
def progress_callback(path, scanned_count, indexed_count, total_count):
|
|
22
|
+
bar.update(scanned_count - bar.pos)
|
|
23
|
+
|
|
24
|
+
indexed_count = runner(progress_callback)
|
|
25
|
+
|
|
26
|
+
elapsed = _format_elapsed(time.monotonic() - start)
|
|
27
|
+
click.echo(f"Scanned {total_files} files, indexed {indexed_count} files in {elapsed}.")
|
|
28
|
+
return indexed_count
|
|
3
29
|
|
|
4
30
|
@click.group()
|
|
5
31
|
def cli():
|
|
@@ -26,26 +52,30 @@ def index(path, exclude):
|
|
|
26
52
|
"""Index a directory into the graph."""
|
|
27
53
|
import codegraphy.config as config
|
|
28
54
|
from codegraphy.db.store import Store
|
|
29
|
-
from codegraphy.indexer.walker import
|
|
30
|
-
|
|
31
|
-
click.echo(f"Indexing {path}...")
|
|
55
|
+
from codegraphy.indexer.walker import DEFAULT_EXCLUDE, get_files_to_index, index_files
|
|
56
|
+
|
|
32
57
|
store = Store(config.DATABASE_URL)
|
|
33
|
-
exclude_list = exclude.split(',') if exclude else
|
|
34
|
-
|
|
58
|
+
exclude_list = exclude.split(',') if exclude else DEFAULT_EXCLUDE
|
|
59
|
+
files = get_files_to_index(path, exclude_list)
|
|
60
|
+
|
|
35
61
|
# Load plugins
|
|
36
62
|
plugins = [] # TODO: instantiate from config.CODEGRAPHY_PLUGINS
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
63
|
+
|
|
64
|
+
_run_with_progress(
|
|
65
|
+
f"Indexing {path}",
|
|
66
|
+
files,
|
|
67
|
+
lambda progress_callback: index_files(files, store, plugins, progress_callback=progress_callback),
|
|
68
|
+
)
|
|
40
69
|
|
|
41
70
|
@cli.command()
|
|
42
71
|
def update():
|
|
43
72
|
"""Update index incrementally based on git diff."""
|
|
44
73
|
import subprocess
|
|
74
|
+
import os
|
|
45
75
|
import codegraphy.config as config
|
|
46
76
|
from codegraphy.db.store import Store
|
|
77
|
+
from codegraphy.indexer.walker import index_files
|
|
47
78
|
|
|
48
|
-
click.echo("Updating index...")
|
|
49
79
|
try:
|
|
50
80
|
res = subprocess.run(['git', 'diff', '--name-only', 'HEAD'], capture_output=True, text=True)
|
|
51
81
|
changed_files = res.stdout.splitlines()
|
|
@@ -55,49 +85,49 @@ def update():
|
|
|
55
85
|
|
|
56
86
|
store = Store(config.DATABASE_URL)
|
|
57
87
|
plugins = [] # TODO
|
|
58
|
-
|
|
59
|
-
|
|
88
|
+
|
|
89
|
+
paths = []
|
|
60
90
|
for file_path in changed_files:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
|
|
72
|
-
if not indexer:
|
|
73
|
-
continue
|
|
74
|
-
|
|
75
|
-
try:
|
|
76
|
-
with open(path, 'rb') as f:
|
|
77
|
-
content_bytes = f.read()
|
|
78
|
-
file_hash = sha256(content_bytes)
|
|
79
|
-
if store.get_file_hash(path) == file_hash:
|
|
80
|
-
continue
|
|
81
|
-
|
|
82
|
-
content_str = content_bytes.decode('utf-8', errors='replace')
|
|
83
|
-
symbols, edges = indexer.index_file(path, content_str)
|
|
84
|
-
|
|
85
|
-
for plugin in plugins:
|
|
86
|
-
symbols = [plugin.on_symbol(s) for s in symbols]
|
|
87
|
-
edges.extend(plugin.extra_edges(symbols))
|
|
88
|
-
|
|
89
|
-
store.upsert_file(path, file_hash, symbols, edges)
|
|
90
|
-
count += 1
|
|
91
|
-
except Exception:
|
|
92
|
-
pass
|
|
93
|
-
|
|
94
|
-
click.echo(f"Updated {count} files.")
|
|
91
|
+
path = os.path.join('.', file_path)
|
|
92
|
+
if os.path.exists(path):
|
|
93
|
+
paths.append(path)
|
|
94
|
+
|
|
95
|
+
_run_with_progress(
|
|
96
|
+
"Updating index",
|
|
97
|
+
paths,
|
|
98
|
+
lambda progress_callback: index_files(paths, store, plugins, progress_callback=progress_callback),
|
|
99
|
+
)
|
|
95
100
|
|
|
96
101
|
@cli.command()
|
|
97
102
|
def serve():
|
|
98
103
|
"""Start the MCP server over stdio."""
|
|
99
|
-
from codegraphy.mcp.server import start_server
|
|
100
|
-
|
|
104
|
+
from codegraphy.mcp.server import prepare_server, start_server
|
|
105
|
+
|
|
106
|
+
startup_steps = [
|
|
107
|
+
"Loading MCP tool registry",
|
|
108
|
+
"Connecting to graph database",
|
|
109
|
+
"Starting stdio transport",
|
|
110
|
+
]
|
|
111
|
+
server_info = None
|
|
112
|
+
|
|
113
|
+
with click.progressbar(
|
|
114
|
+
length=len(startup_steps),
|
|
115
|
+
label="Starting MCP server",
|
|
116
|
+
show_eta=False,
|
|
117
|
+
show_percent=True,
|
|
118
|
+
file=click.get_text_stream('stderr'),
|
|
119
|
+
) as bar:
|
|
120
|
+
bar.update(1)
|
|
121
|
+
server_info = prepare_server()
|
|
122
|
+
bar.update(1)
|
|
123
|
+
bar.update(1)
|
|
124
|
+
|
|
125
|
+
click.echo(
|
|
126
|
+
f"MCP server ready on stdio "
|
|
127
|
+
f"(backend: {server_info['backend']}, files: {server_info['files']}, symbols: {server_info['symbols']}). "
|
|
128
|
+
f"Waiting for client...",
|
|
129
|
+
err=True,
|
|
130
|
+
)
|
|
101
131
|
start_server()
|
|
102
132
|
|
|
103
133
|
@cli.command()
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
|
|
6
|
+
from .schema import get_schema
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import psycopg2
|
|
10
|
+
from psycopg2.extras import Json
|
|
11
|
+
except ImportError:
|
|
12
|
+
psycopg2 = None
|
|
13
|
+
|
|
14
|
+
class Store:
|
|
15
|
+
def __init__(self, db_url: str):
|
|
16
|
+
self.db_url = db_url
|
|
17
|
+
self.is_postgres = db_url.startswith("postgres")
|
|
18
|
+
|
|
19
|
+
if self.is_postgres and psycopg2 is None:
|
|
20
|
+
raise ImportError("psycopg2-binary is required for postgres support")
|
|
21
|
+
|
|
22
|
+
if self.is_postgres:
|
|
23
|
+
parsed = urlparse(db_url)
|
|
24
|
+
self.conn_kwargs = {
|
|
25
|
+
'dbname': parsed.path[1:],
|
|
26
|
+
'user': parsed.username,
|
|
27
|
+
'password': parsed.password,
|
|
28
|
+
'host': parsed.hostname,
|
|
29
|
+
'port': parsed.port,
|
|
30
|
+
}
|
|
31
|
+
# Remove None values
|
|
32
|
+
self.conn_kwargs = {k: v for k, v in self.conn_kwargs.items() if v is not None}
|
|
33
|
+
else:
|
|
34
|
+
# Handle sqlite:///path
|
|
35
|
+
self.db_path = db_url.replace("sqlite:///", "")
|
|
36
|
+
if not self.db_path:
|
|
37
|
+
self.db_path = "codegraphy.db"
|
|
38
|
+
|
|
39
|
+
@contextmanager
|
|
40
|
+
def get_connection(self):
|
|
41
|
+
if self.is_postgres:
|
|
42
|
+
conn = psycopg2.connect(**self.conn_kwargs)
|
|
43
|
+
else:
|
|
44
|
+
conn = sqlite3.connect(self.db_path)
|
|
45
|
+
conn.execute("PRAGMA foreign_keys = ON")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
yield conn
|
|
49
|
+
conn.commit()
|
|
50
|
+
except Exception:
|
|
51
|
+
conn.rollback()
|
|
52
|
+
raise
|
|
53
|
+
finally:
|
|
54
|
+
conn.close()
|
|
55
|
+
|
|
56
|
+
def init_schema(self):
|
|
57
|
+
schema_sql = get_schema(self.db_url)
|
|
58
|
+
with self.get_connection() as conn:
|
|
59
|
+
cursor = conn.cursor()
|
|
60
|
+
if self.is_postgres:
|
|
61
|
+
cursor.execute(schema_sql)
|
|
62
|
+
else:
|
|
63
|
+
# sqlite3 executescript for multiple statements
|
|
64
|
+
cursor.executescript(schema_sql)
|
|
65
|
+
|
|
66
|
+
def _placeholder(self) -> str:
|
|
67
|
+
return "%s" if self.is_postgres else "?"
|
|
68
|
+
|
|
69
|
+
def _cursor(self, conn):
|
|
70
|
+
return conn.cursor()
|
|
71
|
+
|
|
72
|
+
def get_file_hash(self, file_path: str, conn=None) -> str:
|
|
73
|
+
if conn is None:
|
|
74
|
+
with self.get_connection() as managed_conn:
|
|
75
|
+
return self.get_file_hash(file_path, managed_conn)
|
|
76
|
+
|
|
77
|
+
cursor = self._cursor(conn)
|
|
78
|
+
cursor.execute(
|
|
79
|
+
f"SELECT git_hash FROM cg_files WHERE file_path = {self._placeholder()}",
|
|
80
|
+
(file_path,),
|
|
81
|
+
)
|
|
82
|
+
row = cursor.fetchone()
|
|
83
|
+
return row[0] if row else None
|
|
84
|
+
|
|
85
|
+
def get_file_hashes(self, file_paths: list[str], conn=None) -> dict[str, str]:
|
|
86
|
+
if not file_paths:
|
|
87
|
+
return {}
|
|
88
|
+
|
|
89
|
+
if conn is None:
|
|
90
|
+
with self.get_connection() as managed_conn:
|
|
91
|
+
return self.get_file_hashes(file_paths, managed_conn)
|
|
92
|
+
|
|
93
|
+
cursor = self._cursor(conn)
|
|
94
|
+
placeholder = self._placeholder()
|
|
95
|
+
file_hashes = {}
|
|
96
|
+
|
|
97
|
+
batch_size = 500
|
|
98
|
+
for i in range(0, len(file_paths), batch_size):
|
|
99
|
+
batch = file_paths[i:i + batch_size]
|
|
100
|
+
placeholders = ",".join([placeholder] * len(batch))
|
|
101
|
+
cursor.execute(
|
|
102
|
+
f"SELECT file_path, git_hash FROM cg_files WHERE file_path IN ({placeholders})",
|
|
103
|
+
tuple(batch),
|
|
104
|
+
)
|
|
105
|
+
for file_path, git_hash in cursor.fetchall():
|
|
106
|
+
file_hashes[file_path] = git_hash
|
|
107
|
+
|
|
108
|
+
return file_hashes
|
|
109
|
+
|
|
110
|
+
def _dedupe_symbols(self, symbols: list) -> list:
|
|
111
|
+
deduped = []
|
|
112
|
+
seen = set()
|
|
113
|
+
for symbol in symbols:
|
|
114
|
+
if symbol.qualified_name in seen:
|
|
115
|
+
continue
|
|
116
|
+
seen.add(symbol.qualified_name)
|
|
117
|
+
deduped.append(symbol)
|
|
118
|
+
return deduped
|
|
119
|
+
|
|
120
|
+
def _upsert_file_with_cursor(self, cursor, file_path: str, git_hash: str, symbols: list, edges: list):
|
|
121
|
+
placeholder = self._placeholder()
|
|
122
|
+
|
|
123
|
+
# Upsert file
|
|
124
|
+
if self.is_postgres:
|
|
125
|
+
cursor.execute(f"""
|
|
126
|
+
INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
|
|
127
|
+
VALUES ({placeholder}, {placeholder}, {placeholder}, NOW())
|
|
128
|
+
ON CONFLICT (file_path) DO UPDATE
|
|
129
|
+
SET git_hash = EXCLUDED.git_hash, symbol_count = EXCLUDED.symbol_count, last_indexed = NOW()
|
|
130
|
+
""", (file_path, git_hash, len(symbols)))
|
|
131
|
+
else:
|
|
132
|
+
cursor.execute(f"""
|
|
133
|
+
INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
|
|
134
|
+
VALUES ({placeholder}, {placeholder}, {placeholder}, CURRENT_TIMESTAMP)
|
|
135
|
+
ON CONFLICT(file_path) DO UPDATE
|
|
136
|
+
SET git_hash=excluded.git_hash, symbol_count=excluded.symbol_count, last_indexed=CURRENT_TIMESTAMP
|
|
137
|
+
""", (file_path, git_hash, len(symbols)))
|
|
138
|
+
|
|
139
|
+
# Delete old symbols (cascade deletes edges)
|
|
140
|
+
cursor.execute(f"DELETE FROM cg_symbols WHERE file_path = {placeholder}", (file_path,))
|
|
141
|
+
|
|
142
|
+
# Insert new symbols
|
|
143
|
+
if symbols:
|
|
144
|
+
symbol_records = []
|
|
145
|
+
for s in symbols:
|
|
146
|
+
extra_val = Json(s.extra) if self.is_postgres else json.dumps(s.extra)
|
|
147
|
+
symbol_records.append((
|
|
148
|
+
s.name, s.qualified_name, s.kind, s.file_path,
|
|
149
|
+
s.line_start, s.line_end, s.summary, s.raw_signature, extra_val
|
|
150
|
+
))
|
|
151
|
+
|
|
152
|
+
cursor.executemany(f"""
|
|
153
|
+
INSERT INTO cg_symbols (name, qualified_name, kind, file_path, line_start, line_end, summary, raw_signature, extra)
|
|
154
|
+
VALUES ({placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder})
|
|
155
|
+
""", symbol_records)
|
|
156
|
+
|
|
157
|
+
if edges:
|
|
158
|
+
quals = set()
|
|
159
|
+
for e in edges:
|
|
160
|
+
quals.add(e.from_qualified)
|
|
161
|
+
quals.add(e.to_qualified)
|
|
162
|
+
|
|
163
|
+
if quals:
|
|
164
|
+
quals_list = list(quals)
|
|
165
|
+
qual_to_id = {}
|
|
166
|
+
|
|
167
|
+
batch_size = 500
|
|
168
|
+
for i in range(0, len(quals_list), batch_size):
|
|
169
|
+
batch = quals_list[i:i + batch_size]
|
|
170
|
+
placeholders = ",".join([placeholder] * len(batch))
|
|
171
|
+
cursor.execute(
|
|
172
|
+
f"SELECT id, qualified_name FROM cg_symbols WHERE qualified_name IN ({placeholders})",
|
|
173
|
+
tuple(batch),
|
|
174
|
+
)
|
|
175
|
+
for row in cursor.fetchall():
|
|
176
|
+
qual_to_id[row[1]] = row[0]
|
|
177
|
+
|
|
178
|
+
edge_records = []
|
|
179
|
+
for e in edges:
|
|
180
|
+
from_id = qual_to_id.get(e.from_qualified)
|
|
181
|
+
to_id = qual_to_id.get(e.to_qualified)
|
|
182
|
+
if from_id and to_id:
|
|
183
|
+
edge_records.append((from_id, to_id, e.relation))
|
|
184
|
+
|
|
185
|
+
if edge_records:
|
|
186
|
+
cursor.executemany(f"""
|
|
187
|
+
INSERT INTO cg_edges (from_id, to_id, relation)
|
|
188
|
+
VALUES ({placeholder}, {placeholder}, {placeholder})
|
|
189
|
+
ON CONFLICT DO NOTHING
|
|
190
|
+
""", edge_records)
|
|
191
|
+
|
|
192
|
+
def upsert_file(self, file_path: str, git_hash: str, symbols: list, edges: list, conn=None):
|
|
193
|
+
"""
|
|
194
|
+
Replace symbols and edges for a file.
|
|
195
|
+
"""
|
|
196
|
+
symbols = self._dedupe_symbols(symbols)
|
|
197
|
+
if conn is None:
|
|
198
|
+
with self.get_connection() as managed_conn:
|
|
199
|
+
cursor = self._cursor(managed_conn)
|
|
200
|
+
self._upsert_file_with_cursor(cursor, file_path, git_hash, symbols, edges)
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
cursor = self._cursor(conn)
|
|
204
|
+
self._upsert_file_with_cursor(cursor, file_path, git_hash, symbols, edges)
|
|
@@ -44,6 +44,14 @@ class PythonIndexer(BaseIndexer):
|
|
|
44
44
|
return f"{module_path}.{name}" if module_path else name
|
|
45
45
|
return f"{module_path}.{'.'.join(self.current_scope)}.{name}"
|
|
46
46
|
|
|
47
|
+
def get_import_qualname(self, node, imported_name, source_module):
|
|
48
|
+
scope = ".".join(self.current_scope)
|
|
49
|
+
parts = [module_path] if module_path else []
|
|
50
|
+
if scope:
|
|
51
|
+
parts.append(scope)
|
|
52
|
+
parts.extend(["__import__", str(node.lineno), str(node.col_offset), source_module, imported_name])
|
|
53
|
+
return ".".join(parts)
|
|
54
|
+
|
|
47
55
|
def visit_ClassDef(self, node):
|
|
48
56
|
qualname = self.get_qualname(node.name)
|
|
49
57
|
summary = ast.get_docstring(node) or ""
|
|
@@ -108,7 +116,7 @@ class PythonIndexer(BaseIndexer):
|
|
|
108
116
|
for alias in node.names:
|
|
109
117
|
# module level import
|
|
110
118
|
# e.g., import os
|
|
111
|
-
qualname = self.
|
|
119
|
+
qualname = self.get_import_qualname(node, alias.asname or alias.name, alias.name)
|
|
112
120
|
symbols.append(Symbol(
|
|
113
121
|
name=alias.asname or alias.name,
|
|
114
122
|
qualified_name=qualname,
|
|
@@ -129,7 +137,7 @@ class PythonIndexer(BaseIndexer):
|
|
|
129
137
|
if node.module:
|
|
130
138
|
for alias in node.names:
|
|
131
139
|
name = alias.asname or alias.name
|
|
132
|
-
qualname = self.
|
|
140
|
+
qualname = self.get_import_qualname(node, name, node.module)
|
|
133
141
|
symbols.append(Symbol(
|
|
134
142
|
name=name,
|
|
135
143
|
qualified_name=qualname,
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import hashlib
|
|
3
|
+
import subprocess
|
|
4
|
+
from .python import PythonIndexer
|
|
5
|
+
from ..db.store import Store
|
|
6
|
+
|
|
7
|
+
INDEXERS = [PythonIndexer()]
|
|
8
|
+
DEFAULT_EXCLUDE = [
|
|
9
|
+
'.git',
|
|
10
|
+
'node_modules',
|
|
11
|
+
'__pycache__',
|
|
12
|
+
'.venv',
|
|
13
|
+
'dist',
|
|
14
|
+
'build',
|
|
15
|
+
'.tox',
|
|
16
|
+
'.pytest_cache',
|
|
17
|
+
'migrations',
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
def sha256(content: bytes) -> str:
|
|
21
|
+
return hashlib.sha256(content).hexdigest()
|
|
22
|
+
|
|
23
|
+
def get_files_to_index(root: str, exclude: list[str]) -> list[str]:
|
|
24
|
+
# Use git ls-files if possible
|
|
25
|
+
try:
|
|
26
|
+
result = subprocess.run(
|
|
27
|
+
['git', 'ls-files'],
|
|
28
|
+
cwd=root,
|
|
29
|
+
capture_output=True,
|
|
30
|
+
text=True,
|
|
31
|
+
check=True
|
|
32
|
+
)
|
|
33
|
+
files = result.stdout.splitlines()
|
|
34
|
+
# Make paths absolute
|
|
35
|
+
files = [os.path.join(root, f) for f in files]
|
|
36
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
37
|
+
# Fallback to os.walk
|
|
38
|
+
files = []
|
|
39
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
40
|
+
# rudimentary exclude
|
|
41
|
+
dirnames[:] = [d for d in dirnames if d not in exclude and not d.startswith('.')]
|
|
42
|
+
for f in filenames:
|
|
43
|
+
files.append(os.path.join(dirpath, f))
|
|
44
|
+
|
|
45
|
+
# Filter excludes
|
|
46
|
+
if exclude:
|
|
47
|
+
filtered = []
|
|
48
|
+
for f in files:
|
|
49
|
+
if not any(ex in f for ex in exclude):
|
|
50
|
+
filtered.append(f)
|
|
51
|
+
files = filtered
|
|
52
|
+
|
|
53
|
+
return files
|
|
54
|
+
|
|
55
|
+
def index_files(files: list[str], store: Store, plugins: list, progress_callback=None):
|
|
56
|
+
indexed_count = 0
|
|
57
|
+
if not files:
|
|
58
|
+
return indexed_count
|
|
59
|
+
|
|
60
|
+
with store.get_connection() as conn:
|
|
61
|
+
existing_hashes = store.get_file_hashes(files, conn=conn)
|
|
62
|
+
|
|
63
|
+
total_files = len(files)
|
|
64
|
+
for scanned_count, path in enumerate(files, start=1):
|
|
65
|
+
indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
|
|
66
|
+
if not indexer:
|
|
67
|
+
if progress_callback:
|
|
68
|
+
progress_callback(path, scanned_count, indexed_count, total_files)
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
with open(path, 'rb') as f:
|
|
73
|
+
content_bytes = f.read()
|
|
74
|
+
except OSError:
|
|
75
|
+
if progress_callback:
|
|
76
|
+
progress_callback(path, scanned_count, indexed_count, total_files)
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
file_hash = sha256(content_bytes)
|
|
80
|
+
if existing_hashes.get(path) == file_hash:
|
|
81
|
+
if progress_callback:
|
|
82
|
+
progress_callback(path, scanned_count, indexed_count, total_files)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
content_str = content_bytes.decode('utf-8', errors='replace')
|
|
86
|
+
symbols, edges = indexer.index_file(path, content_str)
|
|
87
|
+
|
|
88
|
+
for plugin in plugins:
|
|
89
|
+
symbols = [plugin.on_symbol(s) for s in symbols]
|
|
90
|
+
edges.extend(plugin.extra_edges(symbols))
|
|
91
|
+
|
|
92
|
+
store.upsert_file(path, file_hash, symbols, edges, conn=conn)
|
|
93
|
+
existing_hashes[path] = file_hash
|
|
94
|
+
indexed_count += 1
|
|
95
|
+
|
|
96
|
+
if progress_callback:
|
|
97
|
+
progress_callback(path, scanned_count, indexed_count, total_files)
|
|
98
|
+
|
|
99
|
+
return indexed_count
|
|
100
|
+
|
|
101
|
+
def index_path(root: str, store: Store, plugins: list, exclude: list[str] = None, progress_callback=None):
|
|
102
|
+
exclude = exclude or DEFAULT_EXCLUDE
|
|
103
|
+
files = get_files_to_index(root, exclude)
|
|
104
|
+
return index_files(files, store, plugins, progress_callback=progress_callback)
|
|
@@ -4,13 +4,40 @@ from ..config import DATABASE_URL, CODEGRAPHY_ROOT
|
|
|
4
4
|
import subprocess
|
|
5
5
|
|
|
6
6
|
mcp = FastMCP("codegraphy")
|
|
7
|
-
|
|
7
|
+
_store = None
|
|
8
|
+
|
|
9
|
+
def get_store() -> Store:
|
|
10
|
+
global _store
|
|
11
|
+
if _store is None:
|
|
12
|
+
_store = Store(DATABASE_URL)
|
|
13
|
+
return _store
|
|
14
|
+
|
|
15
|
+
def _graph_stats(store: Store) -> dict:
|
|
16
|
+
with store.get_connection() as conn:
|
|
17
|
+
cursor = conn.cursor()
|
|
18
|
+
cursor.execute("SELECT COUNT(*) FROM cg_files")
|
|
19
|
+
files = cursor.fetchone()[0]
|
|
20
|
+
cursor.execute("SELECT COUNT(*) FROM cg_symbols")
|
|
21
|
+
symbols = cursor.fetchone()[0]
|
|
22
|
+
cursor.execute("SELECT COUNT(*) FROM cg_edges")
|
|
23
|
+
edges = cursor.fetchone()[0]
|
|
24
|
+
return {
|
|
25
|
+
"files": files,
|
|
26
|
+
"symbols": symbols,
|
|
27
|
+
"edges": edges,
|
|
28
|
+
"backend": "postgres" if store.is_postgres else "sqlite"
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def prepare_server() -> dict:
|
|
32
|
+
store = get_store()
|
|
33
|
+
return _graph_stats(store)
|
|
8
34
|
|
|
9
35
|
@mcp.tool()
|
|
10
36
|
def search_symbol(name: str, kind: str = None, limit: int = 10, fallback_grep: bool = True) -> list[dict]:
|
|
11
37
|
"""
|
|
12
38
|
Find symbols by name (exact, prefix, or substring match).
|
|
13
39
|
"""
|
|
40
|
+
store = get_store()
|
|
14
41
|
results = []
|
|
15
42
|
|
|
16
43
|
with store.get_connection() as conn:
|
|
@@ -75,6 +102,7 @@ def get_file_summary(file_path: str) -> dict:
|
|
|
75
102
|
"""
|
|
76
103
|
One-shot summary of a file: classes, functions, imports.
|
|
77
104
|
"""
|
|
105
|
+
store = get_store()
|
|
78
106
|
with store.get_connection() as conn:
|
|
79
107
|
cursor = conn.cursor()
|
|
80
108
|
p = "%s" if store.is_postgres else "?"
|
|
@@ -114,6 +142,7 @@ def find_usages(qualified_name: str, limit: int = 20, fallback_grep: bool = True
|
|
|
114
142
|
"""
|
|
115
143
|
Find every symbol that imports, calls, or references this symbol.
|
|
116
144
|
"""
|
|
145
|
+
store = get_store()
|
|
117
146
|
results = []
|
|
118
147
|
with store.get_connection() as conn:
|
|
119
148
|
cursor = conn.cursor()
|
|
@@ -187,6 +216,7 @@ def path_between(from_qualified: str, to_qualified: str, max_depth: int = 6) ->
|
|
|
187
216
|
"""
|
|
188
217
|
BFS shortest path through the edge graph between two symbols.
|
|
189
218
|
"""
|
|
219
|
+
store = get_store()
|
|
190
220
|
with store.get_connection() as conn:
|
|
191
221
|
cursor = conn.cursor()
|
|
192
222
|
p = "%s" if store.is_postgres else "?"
|
|
@@ -239,6 +269,7 @@ def search_semantic(query: str, limit: int = 10) -> list[dict]:
|
|
|
239
269
|
pgvector semantic search over symbol summaries.
|
|
240
270
|
No-ops on SQLite.
|
|
241
271
|
"""
|
|
272
|
+
store = get_store()
|
|
242
273
|
if not store.is_postgres:
|
|
243
274
|
return []
|
|
244
275
|
|
|
@@ -248,20 +279,7 @@ def search_semantic(query: str, limit: int = 10) -> list[dict]:
|
|
|
248
279
|
@mcp.tool()
|
|
249
280
|
def graph_stats() -> dict:
|
|
250
281
|
"""Quick health check."""
|
|
251
|
-
|
|
252
|
-
cursor = conn.cursor()
|
|
253
|
-
cursor.execute("SELECT COUNT(*) FROM cg_files")
|
|
254
|
-
files = cursor.fetchone()[0]
|
|
255
|
-
cursor.execute("SELECT COUNT(*) FROM cg_symbols")
|
|
256
|
-
symbols = cursor.fetchone()[0]
|
|
257
|
-
cursor.execute("SELECT COUNT(*) FROM cg_edges")
|
|
258
|
-
edges = cursor.fetchone()[0]
|
|
259
|
-
return {
|
|
260
|
-
"files": files,
|
|
261
|
-
"symbols": symbols,
|
|
262
|
-
"edges": edges,
|
|
263
|
-
"backend": "postgres" if store.is_postgres else "sqlite"
|
|
264
|
-
}
|
|
282
|
+
return _graph_stats(get_store())
|
|
265
283
|
|
|
266
284
|
@mcp.tool()
|
|
267
285
|
def grep_search(pattern: str, include: list[str] = None, exclude: list[str] = None, limit: int = 30) -> list[dict]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codegraphy
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: SQLite/PostgreSQL codebase knowledge graph and MCP server for Claude Code
|
|
5
5
|
Author: Charan Kulal
|
|
6
6
|
License-Expression: MIT
|
|
@@ -80,6 +80,37 @@ The base PyPI package keeps SQLite support in the standard library path, so Post
|
|
|
80
80
|
|
|
81
81
|
---
|
|
82
82
|
|
|
83
|
+
## PostgreSQL
|
|
84
|
+
|
|
85
|
+
Install PostgreSQL support:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install 'codegraphy[postgres]'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Initialize with a PostgreSQL URL:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
codegraphy init --db postgresql://USER:PASSWORD@HOST:PORT/DBNAME
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
codegraphy init --db postgresql://postgres:postgres@localhost:5432/codegraphy
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Or set `DATABASE_URL` once and reuse it:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codegraphy
|
|
107
|
+
codegraphy init
|
|
108
|
+
codegraphy index .
|
|
109
|
+
codegraphy serve
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
83
114
|
## Quickstart
|
|
84
115
|
|
|
85
116
|
```bash
|
|
@@ -109,6 +140,8 @@ codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
|
|
|
109
140
|
codegraphy stats # Show graph statistics
|
|
110
141
|
```
|
|
111
142
|
|
|
143
|
+
`codegraphy index` and `codegraphy update` show progress while they run, then print a summary with files scanned, files indexed, and elapsed time. `codegraphy serve` also shows startup progress and then reports when the MCP server is ready and waiting for a stdio client.
|
|
144
|
+
|
|
112
145
|
---
|
|
113
146
|
|
|
114
147
|
## MCP Tools
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import sqlite3
|
|
3
|
-
from urllib.parse import urlparse
|
|
4
|
-
from contextlib import contextmanager
|
|
5
|
-
|
|
6
|
-
from .schema import get_schema
|
|
7
|
-
|
|
8
|
-
try:
|
|
9
|
-
import psycopg2
|
|
10
|
-
from psycopg2.extras import Json
|
|
11
|
-
except ImportError:
|
|
12
|
-
psycopg2 = None
|
|
13
|
-
|
|
14
|
-
class Store:
|
|
15
|
-
def __init__(self, db_url: str):
|
|
16
|
-
self.db_url = db_url
|
|
17
|
-
self.is_postgres = db_url.startswith("postgres")
|
|
18
|
-
|
|
19
|
-
if self.is_postgres and psycopg2 is None:
|
|
20
|
-
raise ImportError("psycopg2-binary is required for postgres support")
|
|
21
|
-
|
|
22
|
-
if self.is_postgres:
|
|
23
|
-
parsed = urlparse(db_url)
|
|
24
|
-
self.conn_kwargs = {
|
|
25
|
-
'dbname': parsed.path[1:],
|
|
26
|
-
'user': parsed.username,
|
|
27
|
-
'password': parsed.password,
|
|
28
|
-
'host': parsed.hostname,
|
|
29
|
-
'port': parsed.port,
|
|
30
|
-
}
|
|
31
|
-
# Remove None values
|
|
32
|
-
self.conn_kwargs = {k: v for k, v in self.conn_kwargs.items() if v is not None}
|
|
33
|
-
else:
|
|
34
|
-
# Handle sqlite:///path
|
|
35
|
-
self.db_path = db_url.replace("sqlite:///", "")
|
|
36
|
-
if not self.db_path:
|
|
37
|
-
self.db_path = "codegraphy.db"
|
|
38
|
-
|
|
39
|
-
@contextmanager
|
|
40
|
-
def get_connection(self):
|
|
41
|
-
if self.is_postgres:
|
|
42
|
-
conn = psycopg2.connect(**self.conn_kwargs)
|
|
43
|
-
else:
|
|
44
|
-
conn = sqlite3.connect(self.db_path)
|
|
45
|
-
conn.execute("PRAGMA foreign_keys = ON")
|
|
46
|
-
|
|
47
|
-
try:
|
|
48
|
-
yield conn
|
|
49
|
-
conn.commit()
|
|
50
|
-
except Exception:
|
|
51
|
-
conn.rollback()
|
|
52
|
-
raise
|
|
53
|
-
finally:
|
|
54
|
-
conn.close()
|
|
55
|
-
|
|
56
|
-
def init_schema(self):
|
|
57
|
-
schema_sql = get_schema(self.db_url)
|
|
58
|
-
with self.get_connection() as conn:
|
|
59
|
-
cursor = conn.cursor()
|
|
60
|
-
if self.is_postgres:
|
|
61
|
-
cursor.execute(schema_sql)
|
|
62
|
-
else:
|
|
63
|
-
# sqlite3 executescript for multiple statements
|
|
64
|
-
cursor.executescript(schema_sql)
|
|
65
|
-
|
|
66
|
-
def get_file_hash(self, file_path: str) -> str:
|
|
67
|
-
with self.get_connection() as conn:
|
|
68
|
-
cursor = conn.cursor()
|
|
69
|
-
if self.is_postgres:
|
|
70
|
-
cursor.execute("SELECT git_hash FROM cg_files WHERE file_path = %s", (file_path,))
|
|
71
|
-
else:
|
|
72
|
-
cursor.execute("SELECT git_hash FROM cg_files WHERE file_path = ?", (file_path,))
|
|
73
|
-
row = cursor.fetchone()
|
|
74
|
-
return row[0] if row else None
|
|
75
|
-
|
|
76
|
-
def upsert_file(self, file_path: str, git_hash: str, symbols: list, edges: list):
|
|
77
|
-
"""
|
|
78
|
-
Replace symbols and edges for a file.
|
|
79
|
-
"""
|
|
80
|
-
with self.get_connection() as conn:
|
|
81
|
-
cursor = conn.cursor()
|
|
82
|
-
|
|
83
|
-
# Param style
|
|
84
|
-
p = "%s" if self.is_postgres else "?"
|
|
85
|
-
|
|
86
|
-
# Upsert file
|
|
87
|
-
if self.is_postgres:
|
|
88
|
-
cursor.execute(f"""
|
|
89
|
-
INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
|
|
90
|
-
VALUES ({p}, {p}, {p}, NOW())
|
|
91
|
-
ON CONFLICT (file_path) DO UPDATE
|
|
92
|
-
SET git_hash = EXCLUDED.git_hash, symbol_count = EXCLUDED.symbol_count, last_indexed = NOW()
|
|
93
|
-
""", (file_path, git_hash, len(symbols)))
|
|
94
|
-
else:
|
|
95
|
-
cursor.execute(f"""
|
|
96
|
-
INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
|
|
97
|
-
VALUES ({p}, {p}, {p}, CURRENT_TIMESTAMP)
|
|
98
|
-
ON CONFLICT(file_path) DO UPDATE
|
|
99
|
-
SET git_hash=excluded.git_hash, symbol_count=excluded.symbol_count, last_indexed=CURRENT_TIMESTAMP
|
|
100
|
-
""", (file_path, git_hash, len(symbols)))
|
|
101
|
-
|
|
102
|
-
# Delete old symbols (cascade deletes edges)
|
|
103
|
-
cursor.execute(f"DELETE FROM cg_symbols WHERE file_path = {p}", (file_path,))
|
|
104
|
-
|
|
105
|
-
# Insert new symbols
|
|
106
|
-
if symbols:
|
|
107
|
-
symbol_records = []
|
|
108
|
-
for s in symbols:
|
|
109
|
-
extra_val = Json(s.extra) if self.is_postgres else json.dumps(s.extra)
|
|
110
|
-
symbol_records.append((
|
|
111
|
-
s.name, s.qualified_name, s.kind, s.file_path,
|
|
112
|
-
s.line_start, s.line_end, s.summary, s.raw_signature, extra_val
|
|
113
|
-
))
|
|
114
|
-
|
|
115
|
-
cursor.executemany(f"""
|
|
116
|
-
INSERT INTO cg_symbols (name, qualified_name, kind, file_path, line_start, line_end, summary, raw_signature, extra)
|
|
117
|
-
VALUES ({p}, {p}, {p}, {p}, {p}, {p}, {p}, {p}, {p})
|
|
118
|
-
""", symbol_records)
|
|
119
|
-
|
|
120
|
-
# To insert edges, we need their IDs. The simplest way is to map qualified_name -> id
|
|
121
|
-
# Note: For edges where the target doesn't exist yet, we might have missing IDs.
|
|
122
|
-
# To handle this robustly without failing, we only insert edges where both from and to exist.
|
|
123
|
-
# However, the spec says from_id, to_id.
|
|
124
|
-
# We must get IDs for all symbols first.
|
|
125
|
-
|
|
126
|
-
# For simplicity in this step, we will bulk insert edges later or inside a second pass?
|
|
127
|
-
# Actually we can just look up ids.
|
|
128
|
-
# If to_qualified doesn't exist in DB, the edge is dropped.
|
|
129
|
-
if edges:
|
|
130
|
-
quals = set()
|
|
131
|
-
for e in edges:
|
|
132
|
-
quals.add(e.from_qualified)
|
|
133
|
-
quals.add(e.to_qualified)
|
|
134
|
-
|
|
135
|
-
if quals:
|
|
136
|
-
# SQLite limit for variables is 999, but a single file rarely exceeds that.
|
|
137
|
-
# For safety, we can query in batches, or use placeholders.
|
|
138
|
-
quals_list = list(quals)
|
|
139
|
-
qual_to_id = {}
|
|
140
|
-
|
|
141
|
-
# Batch fetch to avoid hitting sqlite limits
|
|
142
|
-
batch_size = 500
|
|
143
|
-
for i in range(0, len(quals_list), batch_size):
|
|
144
|
-
batch = quals_list[i:i+batch_size]
|
|
145
|
-
placeholders = ",".join([p] * len(batch))
|
|
146
|
-
cursor.execute(f"SELECT id, qualified_name FROM cg_symbols WHERE qualified_name IN ({placeholders})", tuple(batch))
|
|
147
|
-
for row in cursor.fetchall():
|
|
148
|
-
qual_to_id[row[1]] = row[0]
|
|
149
|
-
|
|
150
|
-
edge_records = []
|
|
151
|
-
for e in edges:
|
|
152
|
-
from_id = qual_to_id.get(e.from_qualified)
|
|
153
|
-
to_id = qual_to_id.get(e.to_qualified)
|
|
154
|
-
if from_id and to_id:
|
|
155
|
-
edge_records.append((from_id, to_id, e.relation))
|
|
156
|
-
|
|
157
|
-
if edge_records:
|
|
158
|
-
cursor.executemany(f"""
|
|
159
|
-
INSERT INTO cg_edges (from_id, to_id, relation)
|
|
160
|
-
VALUES ({p}, {p}, {p})
|
|
161
|
-
ON CONFLICT DO NOTHING
|
|
162
|
-
""", edge_records)
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import hashlib
|
|
3
|
-
import subprocess
|
|
4
|
-
from .python import PythonIndexer
|
|
5
|
-
from ..db.store import Store
|
|
6
|
-
|
|
7
|
-
INDEXERS = [PythonIndexer()]
|
|
8
|
-
|
|
9
|
-
def sha256(content: bytes) -> str:
|
|
10
|
-
return hashlib.sha256(content).hexdigest()
|
|
11
|
-
|
|
12
|
-
def get_files_to_index(root: str, exclude: list[str]) -> list[str]:
|
|
13
|
-
# Use git ls-files if possible
|
|
14
|
-
try:
|
|
15
|
-
result = subprocess.run(
|
|
16
|
-
['git', 'ls-files'],
|
|
17
|
-
cwd=root,
|
|
18
|
-
capture_output=True,
|
|
19
|
-
text=True,
|
|
20
|
-
check=True
|
|
21
|
-
)
|
|
22
|
-
files = result.stdout.splitlines()
|
|
23
|
-
# Make paths absolute
|
|
24
|
-
files = [os.path.join(root, f) for f in files]
|
|
25
|
-
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
26
|
-
# Fallback to os.walk
|
|
27
|
-
files = []
|
|
28
|
-
for dirpath, dirnames, filenames in os.walk(root):
|
|
29
|
-
# rudimentary exclude
|
|
30
|
-
dirnames[:] = [d for d in dirnames if d not in exclude and not d.startswith('.')]
|
|
31
|
-
for f in filenames:
|
|
32
|
-
files.append(os.path.join(dirpath, f))
|
|
33
|
-
|
|
34
|
-
# Filter excludes
|
|
35
|
-
if exclude:
|
|
36
|
-
filtered = []
|
|
37
|
-
for f in files:
|
|
38
|
-
if not any(ex in f for ex in exclude):
|
|
39
|
-
filtered.append(f)
|
|
40
|
-
files = filtered
|
|
41
|
-
|
|
42
|
-
return files
|
|
43
|
-
|
|
44
|
-
def index_path(root: str, store: Store, plugins: list, exclude: list[str] = None):
|
|
45
|
-
exclude = exclude or ['.git', 'node_modules', '__pycache__', '.venv']
|
|
46
|
-
files = get_files_to_index(root, exclude)
|
|
47
|
-
|
|
48
|
-
indexed_count = 0
|
|
49
|
-
for path in files:
|
|
50
|
-
indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
|
|
51
|
-
if not indexer:
|
|
52
|
-
continue
|
|
53
|
-
|
|
54
|
-
try:
|
|
55
|
-
with open(path, 'rb') as f:
|
|
56
|
-
content_bytes = f.read()
|
|
57
|
-
except OSError:
|
|
58
|
-
continue
|
|
59
|
-
|
|
60
|
-
file_hash = sha256(content_bytes)
|
|
61
|
-
|
|
62
|
-
# Check if unchanged
|
|
63
|
-
if store.get_file_hash(path) == file_hash:
|
|
64
|
-
continue
|
|
65
|
-
|
|
66
|
-
content_str = content_bytes.decode('utf-8', errors='replace')
|
|
67
|
-
symbols, edges = indexer.index_file(path, content_str)
|
|
68
|
-
|
|
69
|
-
# Apply plugins
|
|
70
|
-
for plugin in plugins:
|
|
71
|
-
symbols = [plugin.on_symbol(s) for s in symbols]
|
|
72
|
-
edges.extend(plugin.extra_edges(symbols))
|
|
73
|
-
|
|
74
|
-
store.upsert_file(path, file_hash, symbols, edges)
|
|
75
|
-
indexed_count += 1
|
|
76
|
-
|
|
77
|
-
return indexed_count
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|