codegraphy 0.1.1__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {codegraphy-0.1.1 → codegraphy-2.0.0}/PKG-INFO +34 -1
  2. {codegraphy-0.1.1 → codegraphy-2.0.0}/README.md +33 -0
  3. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/__init__.py +1 -1
  4. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/cli.py +77 -47
  5. codegraphy-2.0.0/codegraphy/db/store.py +204 -0
  6. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/indexer/python.py +10 -2
  7. codegraphy-2.0.0/codegraphy/indexer/walker.py +104 -0
  8. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/mcp/server.py +33 -15
  9. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/PKG-INFO +34 -1
  10. codegraphy-0.1.1/codegraphy/db/store.py +0 -162
  11. codegraphy-0.1.1/codegraphy/indexer/walker.py +0 -77
  12. {codegraphy-0.1.1 → codegraphy-2.0.0}/LICENSE +0 -0
  13. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/config.py +0 -0
  14. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/db/__init__.py +0 -0
  15. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/db/schema.py +0 -0
  16. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/indexer/__init__.py +0 -0
  17. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/indexer/base.py +0 -0
  18. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/mcp/__init__.py +0 -0
  19. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/plugins/__init__.py +0 -0
  20. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/plugins/base.py +0 -0
  21. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy/plugins/django.py +0 -0
  22. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/SOURCES.txt +0 -0
  23. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/dependency_links.txt +0 -0
  24. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/entry_points.txt +0 -0
  25. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/requires.txt +0 -0
  26. {codegraphy-0.1.1 → codegraphy-2.0.0}/codegraphy.egg-info/top_level.txt +0 -0
  27. {codegraphy-0.1.1 → codegraphy-2.0.0}/pyproject.toml +0 -0
  28. {codegraphy-0.1.1 → codegraphy-2.0.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codegraphy
3
- Version: 0.1.1
3
+ Version: 2.0.0
4
4
  Summary: SQLite/PostgreSQL codebase knowledge graph and MCP server for Claude Code
5
5
  Author: Charan Kulal
6
6
  License-Expression: MIT
@@ -80,6 +80,37 @@ The base PyPI package keeps SQLite support in the standard library path, so Post
80
80
 
81
81
  ---
82
82
 
83
+ ## PostgreSQL
84
+
85
+ Install PostgreSQL support:
86
+
87
+ ```bash
88
+ pip install 'codegraphy[postgres]'
89
+ ```
90
+
91
+ Initialize with a PostgreSQL URL:
92
+
93
+ ```bash
94
+ codegraphy init --db postgresql://USER:PASSWORD@HOST:PORT/DBNAME
95
+ ```
96
+
97
+ Example:
98
+
99
+ ```bash
100
+ codegraphy init --db postgresql://postgres:postgres@localhost:5432/codegraphy
101
+ ```
102
+
103
+ Or set `DATABASE_URL` once and reuse it:
104
+
105
+ ```bash
106
+ export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codegraphy
107
+ codegraphy init
108
+ codegraphy index .
109
+ codegraphy serve
110
+ ```
111
+
112
+ ---
113
+
83
114
  ## Quickstart
84
115
 
85
116
  ```bash
@@ -109,6 +140,8 @@ codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
109
140
  codegraphy stats # Show graph statistics
110
141
  ```
111
142
 
143
+ `codegraphy index` and `codegraphy update` show progress while they run, then print a summary with files scanned, files indexed, and elapsed time. `codegraphy serve` also shows startup progress and then reports when the MCP server is ready and waiting for a stdio client.
144
+
112
145
  ---
113
146
 
114
147
  ## MCP Tools
@@ -40,6 +40,37 @@ The base PyPI package keeps SQLite support in the standard library path, so Post
40
40
 
41
41
  ---
42
42
 
43
+ ## PostgreSQL
44
+
45
+ Install PostgreSQL support:
46
+
47
+ ```bash
48
+ pip install 'codegraphy[postgres]'
49
+ ```
50
+
51
+ Initialize with a PostgreSQL URL:
52
+
53
+ ```bash
54
+ codegraphy init --db postgresql://USER:PASSWORD@HOST:PORT/DBNAME
55
+ ```
56
+
57
+ Example:
58
+
59
+ ```bash
60
+ codegraphy init --db postgresql://postgres:postgres@localhost:5432/codegraphy
61
+ ```
62
+
63
+ Or set `DATABASE_URL` once and reuse it:
64
+
65
+ ```bash
66
+ export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codegraphy
67
+ codegraphy init
68
+ codegraphy index .
69
+ codegraphy serve
70
+ ```
71
+
72
+ ---
73
+
43
74
  ## Quickstart
44
75
 
45
76
  ```bash
@@ -69,6 +100,8 @@ codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
69
100
  codegraphy stats # Show graph statistics
70
101
  ```
71
102
 
103
+ `codegraphy index` and `codegraphy update` show progress while they run, then print a summary with files scanned, files indexed, and elapsed time. `codegraphy serve` also shows startup progress and then reports when the MCP server is ready and waiting for a stdio client.
104
+
72
105
  ---
73
106
 
74
107
  ## MCP Tools
@@ -2,4 +2,4 @@
2
2
  codegraphy: Parses a codebase into a Postgres/SQLite knowledge graph and exposes it as an MCP server.
3
3
  """
4
4
 
5
- __version__ = "0.1.1"
5
+ __version__ = "2.0.0"
@@ -1,5 +1,31 @@
1
1
  import click
2
2
  from .config import load_config
3
+ import time
4
+
5
+ def _format_elapsed(seconds: float) -> str:
6
+ if seconds < 60:
7
+ return f"{seconds:.1f}s"
8
+ minutes, remaining = divmod(seconds, 60)
9
+ return f"{int(minutes)}m {remaining:.1f}s"
10
+
11
+ def _run_with_progress(label: str, files: list[str], runner):
12
+ total_files = len(files)
13
+ start = time.monotonic()
14
+
15
+ if total_files == 0:
16
+ click.echo(f"{label}...")
17
+ click.echo("Scanned 0 files, indexed 0 files in 0.0s.")
18
+ return 0
19
+
20
+ with click.progressbar(length=total_files, label=label, show_eta=True, show_percent=True) as bar:
21
+ def progress_callback(path, scanned_count, indexed_count, total_count):
22
+ bar.update(scanned_count - bar.pos)
23
+
24
+ indexed_count = runner(progress_callback)
25
+
26
+ elapsed = _format_elapsed(time.monotonic() - start)
27
+ click.echo(f"Scanned {total_files} files, indexed {indexed_count} files in {elapsed}.")
28
+ return indexed_count
3
29
 
4
30
  @click.group()
5
31
  def cli():
@@ -26,26 +52,30 @@ def index(path, exclude):
26
52
  """Index a directory into the graph."""
27
53
  import codegraphy.config as config
28
54
  from codegraphy.db.store import Store
29
- from codegraphy.indexer.walker import index_path
30
-
31
- click.echo(f"Indexing {path}...")
55
+ from codegraphy.indexer.walker import DEFAULT_EXCLUDE, get_files_to_index, index_files
56
+
32
57
  store = Store(config.DATABASE_URL)
33
- exclude_list = exclude.split(',') if exclude else None
34
-
58
+ exclude_list = exclude.split(',') if exclude else DEFAULT_EXCLUDE
59
+ files = get_files_to_index(path, exclude_list)
60
+
35
61
  # Load plugins
36
62
  plugins = [] # TODO: instantiate from config.CODEGRAPHY_PLUGINS
37
-
38
- count = index_path(path, store, plugins, exclude_list)
39
- click.echo(f"Indexed {count} files.")
63
+
64
+ _run_with_progress(
65
+ f"Indexing {path}",
66
+ files,
67
+ lambda progress_callback: index_files(files, store, plugins, progress_callback=progress_callback),
68
+ )
40
69
 
41
70
  @cli.command()
42
71
  def update():
43
72
  """Update index incrementally based on git diff."""
44
73
  import subprocess
74
+ import os
45
75
  import codegraphy.config as config
46
76
  from codegraphy.db.store import Store
77
+ from codegraphy.indexer.walker import index_files
47
78
 
48
- click.echo("Updating index...")
49
79
  try:
50
80
  res = subprocess.run(['git', 'diff', '--name-only', 'HEAD'], capture_output=True, text=True)
51
81
  changed_files = res.stdout.splitlines()
@@ -55,49 +85,49 @@ def update():
55
85
 
56
86
  store = Store(config.DATABASE_URL)
57
87
  plugins = [] # TODO
58
-
59
- count = 0
88
+
89
+ paths = []
60
90
  for file_path in changed_files:
61
- # Instead of calling index_path, we should just index the specific files
62
- # Re-using index_path is tricky since it takes a root.
63
- # We can just write a small loop here for the changed files.
64
- import os
65
- from codegraphy.indexer.walker import INDEXERS, sha256
66
-
67
- path = os.path.abspath(file_path)
68
- if not os.path.exists(path):
69
- continue
70
-
71
- indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
72
- if not indexer:
73
- continue
74
-
75
- try:
76
- with open(path, 'rb') as f:
77
- content_bytes = f.read()
78
- file_hash = sha256(content_bytes)
79
- if store.get_file_hash(path) == file_hash:
80
- continue
81
-
82
- content_str = content_bytes.decode('utf-8', errors='replace')
83
- symbols, edges = indexer.index_file(path, content_str)
84
-
85
- for plugin in plugins:
86
- symbols = [plugin.on_symbol(s) for s in symbols]
87
- edges.extend(plugin.extra_edges(symbols))
88
-
89
- store.upsert_file(path, file_hash, symbols, edges)
90
- count += 1
91
- except Exception:
92
- pass
93
-
94
- click.echo(f"Updated {count} files.")
91
+ path = os.path.join('.', file_path)
92
+ if os.path.exists(path):
93
+ paths.append(path)
94
+
95
+ _run_with_progress(
96
+ "Updating index",
97
+ paths,
98
+ lambda progress_callback: index_files(paths, store, plugins, progress_callback=progress_callback),
99
+ )
95
100
 
96
101
  @cli.command()
97
102
  def serve():
98
103
  """Start the MCP server over stdio."""
99
- from codegraphy.mcp.server import start_server
100
- click.echo("Starting MCP server...", err=True)
104
+ from codegraphy.mcp.server import prepare_server, start_server
105
+
106
+ startup_steps = [
107
+ "Loading MCP tool registry",
108
+ "Connecting to graph database",
109
+ "Starting stdio transport",
110
+ ]
111
+ server_info = None
112
+
113
+ with click.progressbar(
114
+ length=len(startup_steps),
115
+ label="Starting MCP server",
116
+ show_eta=False,
117
+ show_percent=True,
118
+ file=click.get_text_stream('stderr'),
119
+ ) as bar:
120
+ bar.update(1)
121
+ server_info = prepare_server()
122
+ bar.update(1)
123
+ bar.update(1)
124
+
125
+ click.echo(
126
+ f"MCP server ready on stdio "
127
+ f"(backend: {server_info['backend']}, files: {server_info['files']}, symbols: {server_info['symbols']}). "
128
+ f"Waiting for client...",
129
+ err=True,
130
+ )
101
131
  start_server()
102
132
 
103
133
  @cli.command()
@@ -0,0 +1,204 @@
1
+ import json
2
+ import sqlite3
3
+ from urllib.parse import urlparse
4
+ from contextlib import contextmanager
5
+
6
+ from .schema import get_schema
7
+
8
+ try:
9
+ import psycopg2
10
+ from psycopg2.extras import Json
11
+ except ImportError:
12
+ psycopg2 = None
13
+
14
+ class Store:
15
+ def __init__(self, db_url: str):
16
+ self.db_url = db_url
17
+ self.is_postgres = db_url.startswith("postgres")
18
+
19
+ if self.is_postgres and psycopg2 is None:
20
+ raise ImportError("psycopg2-binary is required for postgres support")
21
+
22
+ if self.is_postgres:
23
+ parsed = urlparse(db_url)
24
+ self.conn_kwargs = {
25
+ 'dbname': parsed.path[1:],
26
+ 'user': parsed.username,
27
+ 'password': parsed.password,
28
+ 'host': parsed.hostname,
29
+ 'port': parsed.port,
30
+ }
31
+ # Remove None values
32
+ self.conn_kwargs = {k: v for k, v in self.conn_kwargs.items() if v is not None}
33
+ else:
34
+ # Handle sqlite:///path
35
+ self.db_path = db_url.replace("sqlite:///", "")
36
+ if not self.db_path:
37
+ self.db_path = "codegraphy.db"
38
+
39
+ @contextmanager
40
+ def get_connection(self):
41
+ if self.is_postgres:
42
+ conn = psycopg2.connect(**self.conn_kwargs)
43
+ else:
44
+ conn = sqlite3.connect(self.db_path)
45
+ conn.execute("PRAGMA foreign_keys = ON")
46
+
47
+ try:
48
+ yield conn
49
+ conn.commit()
50
+ except Exception:
51
+ conn.rollback()
52
+ raise
53
+ finally:
54
+ conn.close()
55
+
56
+ def init_schema(self):
57
+ schema_sql = get_schema(self.db_url)
58
+ with self.get_connection() as conn:
59
+ cursor = conn.cursor()
60
+ if self.is_postgres:
61
+ cursor.execute(schema_sql)
62
+ else:
63
+ # sqlite3 executescript for multiple statements
64
+ cursor.executescript(schema_sql)
65
+
66
+ def _placeholder(self) -> str:
67
+ return "%s" if self.is_postgres else "?"
68
+
69
+ def _cursor(self, conn):
70
+ return conn.cursor()
71
+
72
+ def get_file_hash(self, file_path: str, conn=None) -> str:
73
+ if conn is None:
74
+ with self.get_connection() as managed_conn:
75
+ return self.get_file_hash(file_path, managed_conn)
76
+
77
+ cursor = self._cursor(conn)
78
+ cursor.execute(
79
+ f"SELECT git_hash FROM cg_files WHERE file_path = {self._placeholder()}",
80
+ (file_path,),
81
+ )
82
+ row = cursor.fetchone()
83
+ return row[0] if row else None
84
+
85
+ def get_file_hashes(self, file_paths: list[str], conn=None) -> dict[str, str]:
86
+ if not file_paths:
87
+ return {}
88
+
89
+ if conn is None:
90
+ with self.get_connection() as managed_conn:
91
+ return self.get_file_hashes(file_paths, managed_conn)
92
+
93
+ cursor = self._cursor(conn)
94
+ placeholder = self._placeholder()
95
+ file_hashes = {}
96
+
97
+ batch_size = 500
98
+ for i in range(0, len(file_paths), batch_size):
99
+ batch = file_paths[i:i + batch_size]
100
+ placeholders = ",".join([placeholder] * len(batch))
101
+ cursor.execute(
102
+ f"SELECT file_path, git_hash FROM cg_files WHERE file_path IN ({placeholders})",
103
+ tuple(batch),
104
+ )
105
+ for file_path, git_hash in cursor.fetchall():
106
+ file_hashes[file_path] = git_hash
107
+
108
+ return file_hashes
109
+
110
+ def _dedupe_symbols(self, symbols: list) -> list:
111
+ deduped = []
112
+ seen = set()
113
+ for symbol in symbols:
114
+ if symbol.qualified_name in seen:
115
+ continue
116
+ seen.add(symbol.qualified_name)
117
+ deduped.append(symbol)
118
+ return deduped
119
+
120
+ def _upsert_file_with_cursor(self, cursor, file_path: str, git_hash: str, symbols: list, edges: list):
121
+ placeholder = self._placeholder()
122
+
123
+ # Upsert file
124
+ if self.is_postgres:
125
+ cursor.execute(f"""
126
+ INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
127
+ VALUES ({placeholder}, {placeholder}, {placeholder}, NOW())
128
+ ON CONFLICT (file_path) DO UPDATE
129
+ SET git_hash = EXCLUDED.git_hash, symbol_count = EXCLUDED.symbol_count, last_indexed = NOW()
130
+ """, (file_path, git_hash, len(symbols)))
131
+ else:
132
+ cursor.execute(f"""
133
+ INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
134
+ VALUES ({placeholder}, {placeholder}, {placeholder}, CURRENT_TIMESTAMP)
135
+ ON CONFLICT(file_path) DO UPDATE
136
+ SET git_hash=excluded.git_hash, symbol_count=excluded.symbol_count, last_indexed=CURRENT_TIMESTAMP
137
+ """, (file_path, git_hash, len(symbols)))
138
+
139
+ # Delete old symbols (cascade deletes edges)
140
+ cursor.execute(f"DELETE FROM cg_symbols WHERE file_path = {placeholder}", (file_path,))
141
+
142
+ # Insert new symbols
143
+ if symbols:
144
+ symbol_records = []
145
+ for s in symbols:
146
+ extra_val = Json(s.extra) if self.is_postgres else json.dumps(s.extra)
147
+ symbol_records.append((
148
+ s.name, s.qualified_name, s.kind, s.file_path,
149
+ s.line_start, s.line_end, s.summary, s.raw_signature, extra_val
150
+ ))
151
+
152
+ cursor.executemany(f"""
153
+ INSERT INTO cg_symbols (name, qualified_name, kind, file_path, line_start, line_end, summary, raw_signature, extra)
154
+ VALUES ({placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder}, {placeholder})
155
+ """, symbol_records)
156
+
157
+ if edges:
158
+ quals = set()
159
+ for e in edges:
160
+ quals.add(e.from_qualified)
161
+ quals.add(e.to_qualified)
162
+
163
+ if quals:
164
+ quals_list = list(quals)
165
+ qual_to_id = {}
166
+
167
+ batch_size = 500
168
+ for i in range(0, len(quals_list), batch_size):
169
+ batch = quals_list[i:i + batch_size]
170
+ placeholders = ",".join([placeholder] * len(batch))
171
+ cursor.execute(
172
+ f"SELECT id, qualified_name FROM cg_symbols WHERE qualified_name IN ({placeholders})",
173
+ tuple(batch),
174
+ )
175
+ for row in cursor.fetchall():
176
+ qual_to_id[row[1]] = row[0]
177
+
178
+ edge_records = []
179
+ for e in edges:
180
+ from_id = qual_to_id.get(e.from_qualified)
181
+ to_id = qual_to_id.get(e.to_qualified)
182
+ if from_id and to_id:
183
+ edge_records.append((from_id, to_id, e.relation))
184
+
185
+ if edge_records:
186
+ cursor.executemany(f"""
187
+ INSERT INTO cg_edges (from_id, to_id, relation)
188
+ VALUES ({placeholder}, {placeholder}, {placeholder})
189
+ ON CONFLICT DO NOTHING
190
+ """, edge_records)
191
+
192
+ def upsert_file(self, file_path: str, git_hash: str, symbols: list, edges: list, conn=None):
193
+ """
194
+ Replace symbols and edges for a file.
195
+ """
196
+ symbols = self._dedupe_symbols(symbols)
197
+ if conn is None:
198
+ with self.get_connection() as managed_conn:
199
+ cursor = self._cursor(managed_conn)
200
+ self._upsert_file_with_cursor(cursor, file_path, git_hash, symbols, edges)
201
+ return
202
+
203
+ cursor = self._cursor(conn)
204
+ self._upsert_file_with_cursor(cursor, file_path, git_hash, symbols, edges)
@@ -44,6 +44,14 @@ class PythonIndexer(BaseIndexer):
44
44
  return f"{module_path}.{name}" if module_path else name
45
45
  return f"{module_path}.{'.'.join(self.current_scope)}.{name}"
46
46
 
47
+ def get_import_qualname(self, node, imported_name, source_module):
48
+ scope = ".".join(self.current_scope)
49
+ parts = [module_path] if module_path else []
50
+ if scope:
51
+ parts.append(scope)
52
+ parts.extend(["__import__", str(node.lineno), str(node.col_offset), source_module, imported_name])
53
+ return ".".join(parts)
54
+
47
55
  def visit_ClassDef(self, node):
48
56
  qualname = self.get_qualname(node.name)
49
57
  summary = ast.get_docstring(node) or ""
@@ -108,7 +116,7 @@ class PythonIndexer(BaseIndexer):
108
116
  for alias in node.names:
109
117
  # module level import
110
118
  # e.g., import os
111
- qualname = self.get_qualname(alias.asname or alias.name)
119
+ qualname = self.get_import_qualname(node, alias.asname or alias.name, alias.name)
112
120
  symbols.append(Symbol(
113
121
  name=alias.asname or alias.name,
114
122
  qualified_name=qualname,
@@ -129,7 +137,7 @@ class PythonIndexer(BaseIndexer):
129
137
  if node.module:
130
138
  for alias in node.names:
131
139
  name = alias.asname or alias.name
132
- qualname = self.get_qualname(name)
140
+ qualname = self.get_import_qualname(node, name, node.module)
133
141
  symbols.append(Symbol(
134
142
  name=name,
135
143
  qualified_name=qualname,
@@ -0,0 +1,104 @@
1
+ import os
2
+ import hashlib
3
+ import subprocess
4
+ from .python import PythonIndexer
5
+ from ..db.store import Store
6
+
7
+ INDEXERS = [PythonIndexer()]
8
+ DEFAULT_EXCLUDE = [
9
+ '.git',
10
+ 'node_modules',
11
+ '__pycache__',
12
+ '.venv',
13
+ 'dist',
14
+ 'build',
15
+ '.tox',
16
+ '.pytest_cache',
17
+ 'migrations',
18
+ ]
19
+
20
+ def sha256(content: bytes) -> str:
21
+ return hashlib.sha256(content).hexdigest()
22
+
23
+ def get_files_to_index(root: str, exclude: list[str]) -> list[str]:
24
+ # Use git ls-files if possible
25
+ try:
26
+ result = subprocess.run(
27
+ ['git', 'ls-files'],
28
+ cwd=root,
29
+ capture_output=True,
30
+ text=True,
31
+ check=True
32
+ )
33
+ files = result.stdout.splitlines()
34
+ # Make paths absolute
35
+ files = [os.path.join(root, f) for f in files]
36
+ except (subprocess.CalledProcessError, FileNotFoundError):
37
+ # Fallback to os.walk
38
+ files = []
39
+ for dirpath, dirnames, filenames in os.walk(root):
40
+ # rudimentary exclude
41
+ dirnames[:] = [d for d in dirnames if d not in exclude and not d.startswith('.')]
42
+ for f in filenames:
43
+ files.append(os.path.join(dirpath, f))
44
+
45
+ # Filter excludes
46
+ if exclude:
47
+ filtered = []
48
+ for f in files:
49
+ if not any(ex in f for ex in exclude):
50
+ filtered.append(f)
51
+ files = filtered
52
+
53
+ return files
54
+
55
+ def index_files(files: list[str], store: Store, plugins: list, progress_callback=None):
56
+ indexed_count = 0
57
+ if not files:
58
+ return indexed_count
59
+
60
+ with store.get_connection() as conn:
61
+ existing_hashes = store.get_file_hashes(files, conn=conn)
62
+
63
+ total_files = len(files)
64
+ for scanned_count, path in enumerate(files, start=1):
65
+ indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
66
+ if not indexer:
67
+ if progress_callback:
68
+ progress_callback(path, scanned_count, indexed_count, total_files)
69
+ continue
70
+
71
+ try:
72
+ with open(path, 'rb') as f:
73
+ content_bytes = f.read()
74
+ except OSError:
75
+ if progress_callback:
76
+ progress_callback(path, scanned_count, indexed_count, total_files)
77
+ continue
78
+
79
+ file_hash = sha256(content_bytes)
80
+ if existing_hashes.get(path) == file_hash:
81
+ if progress_callback:
82
+ progress_callback(path, scanned_count, indexed_count, total_files)
83
+ continue
84
+
85
+ content_str = content_bytes.decode('utf-8', errors='replace')
86
+ symbols, edges = indexer.index_file(path, content_str)
87
+
88
+ for plugin in plugins:
89
+ symbols = [plugin.on_symbol(s) for s in symbols]
90
+ edges.extend(plugin.extra_edges(symbols))
91
+
92
+ store.upsert_file(path, file_hash, symbols, edges, conn=conn)
93
+ existing_hashes[path] = file_hash
94
+ indexed_count += 1
95
+
96
+ if progress_callback:
97
+ progress_callback(path, scanned_count, indexed_count, total_files)
98
+
99
+ return indexed_count
100
+
101
+ def index_path(root: str, store: Store, plugins: list, exclude: list[str] = None, progress_callback=None):
102
+ exclude = exclude or DEFAULT_EXCLUDE
103
+ files = get_files_to_index(root, exclude)
104
+ return index_files(files, store, plugins, progress_callback=progress_callback)
@@ -4,13 +4,40 @@ from ..config import DATABASE_URL, CODEGRAPHY_ROOT
4
4
  import subprocess
5
5
 
6
6
  mcp = FastMCP("codegraphy")
7
- store = Store(DATABASE_URL)
7
+ _store = None
8
+
9
+ def get_store() -> Store:
10
+ global _store
11
+ if _store is None:
12
+ _store = Store(DATABASE_URL)
13
+ return _store
14
+
15
+ def _graph_stats(store: Store) -> dict:
16
+ with store.get_connection() as conn:
17
+ cursor = conn.cursor()
18
+ cursor.execute("SELECT COUNT(*) FROM cg_files")
19
+ files = cursor.fetchone()[0]
20
+ cursor.execute("SELECT COUNT(*) FROM cg_symbols")
21
+ symbols = cursor.fetchone()[0]
22
+ cursor.execute("SELECT COUNT(*) FROM cg_edges")
23
+ edges = cursor.fetchone()[0]
24
+ return {
25
+ "files": files,
26
+ "symbols": symbols,
27
+ "edges": edges,
28
+ "backend": "postgres" if store.is_postgres else "sqlite"
29
+ }
30
+
31
+ def prepare_server() -> dict:
32
+ store = get_store()
33
+ return _graph_stats(store)
8
34
 
9
35
  @mcp.tool()
10
36
  def search_symbol(name: str, kind: str = None, limit: int = 10, fallback_grep: bool = True) -> list[dict]:
11
37
  """
12
38
  Find symbols by name (exact, prefix, or substring match).
13
39
  """
40
+ store = get_store()
14
41
  results = []
15
42
 
16
43
  with store.get_connection() as conn:
@@ -75,6 +102,7 @@ def get_file_summary(file_path: str) -> dict:
75
102
  """
76
103
  One-shot summary of a file: classes, functions, imports.
77
104
  """
105
+ store = get_store()
78
106
  with store.get_connection() as conn:
79
107
  cursor = conn.cursor()
80
108
  p = "%s" if store.is_postgres else "?"
@@ -114,6 +142,7 @@ def find_usages(qualified_name: str, limit: int = 20, fallback_grep: bool = True
114
142
  """
115
143
  Find every symbol that imports, calls, or references this symbol.
116
144
  """
145
+ store = get_store()
117
146
  results = []
118
147
  with store.get_connection() as conn:
119
148
  cursor = conn.cursor()
@@ -187,6 +216,7 @@ def path_between(from_qualified: str, to_qualified: str, max_depth: int = 6) ->
187
216
  """
188
217
  BFS shortest path through the edge graph between two symbols.
189
218
  """
219
+ store = get_store()
190
220
  with store.get_connection() as conn:
191
221
  cursor = conn.cursor()
192
222
  p = "%s" if store.is_postgres else "?"
@@ -239,6 +269,7 @@ def search_semantic(query: str, limit: int = 10) -> list[dict]:
239
269
  pgvector semantic search over symbol summaries.
240
270
  No-ops on SQLite.
241
271
  """
272
+ store = get_store()
242
273
  if not store.is_postgres:
243
274
  return []
244
275
 
@@ -248,20 +279,7 @@ def search_semantic(query: str, limit: int = 10) -> list[dict]:
248
279
  @mcp.tool()
249
280
  def graph_stats() -> dict:
250
281
  """Quick health check."""
251
- with store.get_connection() as conn:
252
- cursor = conn.cursor()
253
- cursor.execute("SELECT COUNT(*) FROM cg_files")
254
- files = cursor.fetchone()[0]
255
- cursor.execute("SELECT COUNT(*) FROM cg_symbols")
256
- symbols = cursor.fetchone()[0]
257
- cursor.execute("SELECT COUNT(*) FROM cg_edges")
258
- edges = cursor.fetchone()[0]
259
- return {
260
- "files": files,
261
- "symbols": symbols,
262
- "edges": edges,
263
- "backend": "postgres" if store.is_postgres else "sqlite"
264
- }
282
+ return _graph_stats(get_store())
265
283
 
266
284
  @mcp.tool()
267
285
  def grep_search(pattern: str, include: list[str] = None, exclude: list[str] = None, limit: int = 30) -> list[dict]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codegraphy
3
- Version: 0.1.1
3
+ Version: 2.0.0
4
4
  Summary: SQLite/PostgreSQL codebase knowledge graph and MCP server for Claude Code
5
5
  Author: Charan Kulal
6
6
  License-Expression: MIT
@@ -80,6 +80,37 @@ The base PyPI package keeps SQLite support in the standard library path, so Post
80
80
 
81
81
  ---
82
82
 
83
+ ## PostgreSQL
84
+
85
+ Install PostgreSQL support:
86
+
87
+ ```bash
88
+ pip install 'codegraphy[postgres]'
89
+ ```
90
+
91
+ Initialize with a PostgreSQL URL:
92
+
93
+ ```bash
94
+ codegraphy init --db postgresql://USER:PASSWORD@HOST:PORT/DBNAME
95
+ ```
96
+
97
+ Example:
98
+
99
+ ```bash
100
+ codegraphy init --db postgresql://postgres:postgres@localhost:5432/codegraphy
101
+ ```
102
+
103
+ Or set `DATABASE_URL` once and reuse it:
104
+
105
+ ```bash
106
+ export DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codegraphy
107
+ codegraphy init
108
+ codegraphy index .
109
+ codegraphy serve
110
+ ```
111
+
112
+ ---
113
+
83
114
  ## Quickstart
84
115
 
85
116
  ```bash
@@ -109,6 +140,8 @@ codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
109
140
  codegraphy stats # Show graph statistics
110
141
  ```
111
142
 
143
+ `codegraphy index` and `codegraphy update` show progress while they run, then print a summary with files scanned, files indexed, and elapsed time. `codegraphy serve` also shows startup progress and then reports when the MCP server is ready and waiting for a stdio client.
144
+
112
145
  ---
113
146
 
114
147
  ## MCP Tools
@@ -1,162 +0,0 @@
1
- import json
2
- import sqlite3
3
- from urllib.parse import urlparse
4
- from contextlib import contextmanager
5
-
6
- from .schema import get_schema
7
-
8
- try:
9
- import psycopg2
10
- from psycopg2.extras import Json
11
- except ImportError:
12
- psycopg2 = None
13
-
14
- class Store:
15
- def __init__(self, db_url: str):
16
- self.db_url = db_url
17
- self.is_postgres = db_url.startswith("postgres")
18
-
19
- if self.is_postgres and psycopg2 is None:
20
- raise ImportError("psycopg2-binary is required for postgres support")
21
-
22
- if self.is_postgres:
23
- parsed = urlparse(db_url)
24
- self.conn_kwargs = {
25
- 'dbname': parsed.path[1:],
26
- 'user': parsed.username,
27
- 'password': parsed.password,
28
- 'host': parsed.hostname,
29
- 'port': parsed.port,
30
- }
31
- # Remove None values
32
- self.conn_kwargs = {k: v for k, v in self.conn_kwargs.items() if v is not None}
33
- else:
34
- # Handle sqlite:///path
35
- self.db_path = db_url.replace("sqlite:///", "")
36
- if not self.db_path:
37
- self.db_path = "codegraphy.db"
38
-
39
- @contextmanager
40
- def get_connection(self):
41
- if self.is_postgres:
42
- conn = psycopg2.connect(**self.conn_kwargs)
43
- else:
44
- conn = sqlite3.connect(self.db_path)
45
- conn.execute("PRAGMA foreign_keys = ON")
46
-
47
- try:
48
- yield conn
49
- conn.commit()
50
- except Exception:
51
- conn.rollback()
52
- raise
53
- finally:
54
- conn.close()
55
-
56
- def init_schema(self):
57
- schema_sql = get_schema(self.db_url)
58
- with self.get_connection() as conn:
59
- cursor = conn.cursor()
60
- if self.is_postgres:
61
- cursor.execute(schema_sql)
62
- else:
63
- # sqlite3 executescript for multiple statements
64
- cursor.executescript(schema_sql)
65
-
66
- def get_file_hash(self, file_path: str) -> str:
67
- with self.get_connection() as conn:
68
- cursor = conn.cursor()
69
- if self.is_postgres:
70
- cursor.execute("SELECT git_hash FROM cg_files WHERE file_path = %s", (file_path,))
71
- else:
72
- cursor.execute("SELECT git_hash FROM cg_files WHERE file_path = ?", (file_path,))
73
- row = cursor.fetchone()
74
- return row[0] if row else None
75
-
76
- def upsert_file(self, file_path: str, git_hash: str, symbols: list, edges: list):
77
- """
78
- Replace symbols and edges for a file.
79
- """
80
- with self.get_connection() as conn:
81
- cursor = conn.cursor()
82
-
83
- # Param style
84
- p = "%s" if self.is_postgres else "?"
85
-
86
- # Upsert file
87
- if self.is_postgres:
88
- cursor.execute(f"""
89
- INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
90
- VALUES ({p}, {p}, {p}, NOW())
91
- ON CONFLICT (file_path) DO UPDATE
92
- SET git_hash = EXCLUDED.git_hash, symbol_count = EXCLUDED.symbol_count, last_indexed = NOW()
93
- """, (file_path, git_hash, len(symbols)))
94
- else:
95
- cursor.execute(f"""
96
- INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
97
- VALUES ({p}, {p}, {p}, CURRENT_TIMESTAMP)
98
- ON CONFLICT(file_path) DO UPDATE
99
- SET git_hash=excluded.git_hash, symbol_count=excluded.symbol_count, last_indexed=CURRENT_TIMESTAMP
100
- """, (file_path, git_hash, len(symbols)))
101
-
102
- # Delete old symbols (cascade deletes edges)
103
- cursor.execute(f"DELETE FROM cg_symbols WHERE file_path = {p}", (file_path,))
104
-
105
- # Insert new symbols
106
- if symbols:
107
- symbol_records = []
108
- for s in symbols:
109
- extra_val = Json(s.extra) if self.is_postgres else json.dumps(s.extra)
110
- symbol_records.append((
111
- s.name, s.qualified_name, s.kind, s.file_path,
112
- s.line_start, s.line_end, s.summary, s.raw_signature, extra_val
113
- ))
114
-
115
- cursor.executemany(f"""
116
- INSERT INTO cg_symbols (name, qualified_name, kind, file_path, line_start, line_end, summary, raw_signature, extra)
117
- VALUES ({p}, {p}, {p}, {p}, {p}, {p}, {p}, {p}, {p})
118
- """, symbol_records)
119
-
120
- # To insert edges, we need their IDs. The simplest way is to map qualified_name -> id
121
- # Note: For edges where the target doesn't exist yet, we might have missing IDs.
122
- # To handle this robustly without failing, we only insert edges where both from and to exist.
123
- # However, the spec says from_id, to_id.
124
- # We must get IDs for all symbols first.
125
-
126
- # For simplicity in this step, we will bulk insert edges later or inside a second pass?
127
- # Actually we can just look up ids.
128
- # If to_qualified doesn't exist in DB, the edge is dropped.
129
- if edges:
130
- quals = set()
131
- for e in edges:
132
- quals.add(e.from_qualified)
133
- quals.add(e.to_qualified)
134
-
135
- if quals:
136
- # SQLite limit for variables is 999, but a single file rarely exceeds that.
137
- # For safety, we can query in batches, or use placeholders.
138
- quals_list = list(quals)
139
- qual_to_id = {}
140
-
141
- # Batch fetch to avoid hitting sqlite limits
142
- batch_size = 500
143
- for i in range(0, len(quals_list), batch_size):
144
- batch = quals_list[i:i+batch_size]
145
- placeholders = ",".join([p] * len(batch))
146
- cursor.execute(f"SELECT id, qualified_name FROM cg_symbols WHERE qualified_name IN ({placeholders})", tuple(batch))
147
- for row in cursor.fetchall():
148
- qual_to_id[row[1]] = row[0]
149
-
150
- edge_records = []
151
- for e in edges:
152
- from_id = qual_to_id.get(e.from_qualified)
153
- to_id = qual_to_id.get(e.to_qualified)
154
- if from_id and to_id:
155
- edge_records.append((from_id, to_id, e.relation))
156
-
157
- if edge_records:
158
- cursor.executemany(f"""
159
- INSERT INTO cg_edges (from_id, to_id, relation)
160
- VALUES ({p}, {p}, {p})
161
- ON CONFLICT DO NOTHING
162
- """, edge_records)
@@ -1,77 +0,0 @@
1
- import os
2
- import hashlib
3
- import subprocess
4
- from .python import PythonIndexer
5
- from ..db.store import Store
6
-
7
- INDEXERS = [PythonIndexer()]
8
-
9
- def sha256(content: bytes) -> str:
10
- return hashlib.sha256(content).hexdigest()
11
-
12
- def get_files_to_index(root: str, exclude: list[str]) -> list[str]:
13
- # Use git ls-files if possible
14
- try:
15
- result = subprocess.run(
16
- ['git', 'ls-files'],
17
- cwd=root,
18
- capture_output=True,
19
- text=True,
20
- check=True
21
- )
22
- files = result.stdout.splitlines()
23
- # Make paths absolute
24
- files = [os.path.join(root, f) for f in files]
25
- except (subprocess.CalledProcessError, FileNotFoundError):
26
- # Fallback to os.walk
27
- files = []
28
- for dirpath, dirnames, filenames in os.walk(root):
29
- # rudimentary exclude
30
- dirnames[:] = [d for d in dirnames if d not in exclude and not d.startswith('.')]
31
- for f in filenames:
32
- files.append(os.path.join(dirpath, f))
33
-
34
- # Filter excludes
35
- if exclude:
36
- filtered = []
37
- for f in files:
38
- if not any(ex in f for ex in exclude):
39
- filtered.append(f)
40
- files = filtered
41
-
42
- return files
43
-
44
- def index_path(root: str, store: Store, plugins: list, exclude: list[str] = None):
45
- exclude = exclude or ['.git', 'node_modules', '__pycache__', '.venv']
46
- files = get_files_to_index(root, exclude)
47
-
48
- indexed_count = 0
49
- for path in files:
50
- indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
51
- if not indexer:
52
- continue
53
-
54
- try:
55
- with open(path, 'rb') as f:
56
- content_bytes = f.read()
57
- except OSError:
58
- continue
59
-
60
- file_hash = sha256(content_bytes)
61
-
62
- # Check if unchanged
63
- if store.get_file_hash(path) == file_hash:
64
- continue
65
-
66
- content_str = content_bytes.decode('utf-8', errors='replace')
67
- symbols, edges = indexer.index_file(path, content_str)
68
-
69
- # Apply plugins
70
- for plugin in plugins:
71
- symbols = [plugin.on_symbol(s) for s in symbols]
72
- edges.extend(plugin.extra_edges(symbols))
73
-
74
- store.upsert_file(path, file_hash, symbols, edges)
75
- indexed_count += 1
76
-
77
- return indexed_count
File without changes
File without changes
File without changes