codegraphy 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraphy-0.1.0.dist-info/METADATA +310 -0
- codegraphy-0.1.0.dist-info/RECORD +21 -0
- codegraphy-0.1.0.dist-info/WHEEL +5 -0
- codegraphy-0.1.0.dist-info/entry_points.txt +2 -0
- codegraphy-0.1.0.dist-info/licenses/LICENSE +21 -0
- codegraphy-0.1.0.dist-info/top_level.txt +1 -0
- repolens/__init__.py +5 -0
- repolens/cli.py +141 -0
- repolens/config.py +13 -0
- repolens/db/__init__.py +3 -0
- repolens/db/schema.py +84 -0
- repolens/db/store.py +162 -0
- repolens/indexer/__init__.py +5 -0
- repolens/indexer/base.py +27 -0
- repolens/indexer/python.py +177 -0
- repolens/indexer/walker.py +77 -0
- repolens/mcp/__init__.py +3 -0
- repolens/mcp/server.py +306 -0
- repolens/plugins/__init__.py +3 -0
- repolens/plugins/base.py +10 -0
- repolens/plugins/django.py +24 -0
repolens/db/store.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sqlite3
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
|
|
6
|
+
from .schema import get_schema
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import psycopg2
|
|
10
|
+
from psycopg2.extras import Json
|
|
11
|
+
except ImportError:
|
|
12
|
+
psycopg2 = None
|
|
13
|
+
|
|
14
|
+
class Store:
|
|
15
|
+
def __init__(self, db_url: str):
|
|
16
|
+
self.db_url = db_url
|
|
17
|
+
self.is_postgres = db_url.startswith("postgres")
|
|
18
|
+
|
|
19
|
+
if self.is_postgres and psycopg2 is None:
|
|
20
|
+
raise ImportError("psycopg2-binary is required for postgres support")
|
|
21
|
+
|
|
22
|
+
if self.is_postgres:
|
|
23
|
+
parsed = urlparse(db_url)
|
|
24
|
+
self.conn_kwargs = {
|
|
25
|
+
'dbname': parsed.path[1:],
|
|
26
|
+
'user': parsed.username,
|
|
27
|
+
'password': parsed.password,
|
|
28
|
+
'host': parsed.hostname,
|
|
29
|
+
'port': parsed.port,
|
|
30
|
+
}
|
|
31
|
+
# Remove None values
|
|
32
|
+
self.conn_kwargs = {k: v for k, v in self.conn_kwargs.items() if v is not None}
|
|
33
|
+
else:
|
|
34
|
+
# Handle sqlite:///path
|
|
35
|
+
self.db_path = db_url.replace("sqlite:///", "")
|
|
36
|
+
if not self.db_path:
|
|
37
|
+
self.db_path = "codegraphy.db"
|
|
38
|
+
|
|
39
|
+
@contextmanager
|
|
40
|
+
def get_connection(self):
|
|
41
|
+
if self.is_postgres:
|
|
42
|
+
conn = psycopg2.connect(**self.conn_kwargs)
|
|
43
|
+
else:
|
|
44
|
+
conn = sqlite3.connect(self.db_path)
|
|
45
|
+
conn.execute("PRAGMA foreign_keys = ON")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
yield conn
|
|
49
|
+
conn.commit()
|
|
50
|
+
except Exception:
|
|
51
|
+
conn.rollback()
|
|
52
|
+
raise
|
|
53
|
+
finally:
|
|
54
|
+
conn.close()
|
|
55
|
+
|
|
56
|
+
def init_schema(self):
|
|
57
|
+
schema_sql = get_schema(self.db_url)
|
|
58
|
+
with self.get_connection() as conn:
|
|
59
|
+
cursor = conn.cursor()
|
|
60
|
+
if self.is_postgres:
|
|
61
|
+
cursor.execute(schema_sql)
|
|
62
|
+
else:
|
|
63
|
+
# sqlite3 executescript for multiple statements
|
|
64
|
+
cursor.executescript(schema_sql)
|
|
65
|
+
|
|
66
|
+
def get_file_hash(self, file_path: str) -> str:
|
|
67
|
+
with self.get_connection() as conn:
|
|
68
|
+
cursor = conn.cursor()
|
|
69
|
+
if self.is_postgres:
|
|
70
|
+
cursor.execute("SELECT git_hash FROM cg_files WHERE file_path = %s", (file_path,))
|
|
71
|
+
else:
|
|
72
|
+
cursor.execute("SELECT git_hash FROM cg_files WHERE file_path = ?", (file_path,))
|
|
73
|
+
row = cursor.fetchone()
|
|
74
|
+
return row[0] if row else None
|
|
75
|
+
|
|
76
|
+
def upsert_file(self, file_path: str, git_hash: str, symbols: list, edges: list):
|
|
77
|
+
"""
|
|
78
|
+
Replace symbols and edges for a file.
|
|
79
|
+
"""
|
|
80
|
+
with self.get_connection() as conn:
|
|
81
|
+
cursor = conn.cursor()
|
|
82
|
+
|
|
83
|
+
# Param style
|
|
84
|
+
p = "%s" if self.is_postgres else "?"
|
|
85
|
+
|
|
86
|
+
# Upsert file
|
|
87
|
+
if self.is_postgres:
|
|
88
|
+
cursor.execute(f"""
|
|
89
|
+
INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
|
|
90
|
+
VALUES ({p}, {p}, {p}, NOW())
|
|
91
|
+
ON CONFLICT (file_path) DO UPDATE
|
|
92
|
+
SET git_hash = EXCLUDED.git_hash, symbol_count = EXCLUDED.symbol_count, last_indexed = NOW()
|
|
93
|
+
""", (file_path, git_hash, len(symbols)))
|
|
94
|
+
else:
|
|
95
|
+
cursor.execute(f"""
|
|
96
|
+
INSERT INTO cg_files (file_path, git_hash, symbol_count, last_indexed)
|
|
97
|
+
VALUES ({p}, {p}, {p}, CURRENT_TIMESTAMP)
|
|
98
|
+
ON CONFLICT(file_path) DO UPDATE
|
|
99
|
+
SET git_hash=excluded.git_hash, symbol_count=excluded.symbol_count, last_indexed=CURRENT_TIMESTAMP
|
|
100
|
+
""", (file_path, git_hash, len(symbols)))
|
|
101
|
+
|
|
102
|
+
# Delete old symbols (cascade deletes edges)
|
|
103
|
+
cursor.execute(f"DELETE FROM cg_symbols WHERE file_path = {p}", (file_path,))
|
|
104
|
+
|
|
105
|
+
# Insert new symbols
|
|
106
|
+
if symbols:
|
|
107
|
+
symbol_records = []
|
|
108
|
+
for s in symbols:
|
|
109
|
+
extra_val = Json(s.extra) if self.is_postgres else json.dumps(s.extra)
|
|
110
|
+
symbol_records.append((
|
|
111
|
+
s.name, s.qualified_name, s.kind, s.file_path,
|
|
112
|
+
s.line_start, s.line_end, s.summary, s.raw_signature, extra_val
|
|
113
|
+
))
|
|
114
|
+
|
|
115
|
+
cursor.executemany(f"""
|
|
116
|
+
INSERT INTO cg_symbols (name, qualified_name, kind, file_path, line_start, line_end, summary, raw_signature, extra)
|
|
117
|
+
VALUES ({p}, {p}, {p}, {p}, {p}, {p}, {p}, {p}, {p})
|
|
118
|
+
""", symbol_records)
|
|
119
|
+
|
|
120
|
+
# To insert edges, we need their IDs. The simplest way is to map qualified_name -> id
|
|
121
|
+
# Note: For edges where the target doesn't exist yet, we might have missing IDs.
|
|
122
|
+
# To handle this robustly without failing, we only insert edges where both from and to exist.
|
|
123
|
+
# However, the spec says from_id, to_id.
|
|
124
|
+
# We must get IDs for all symbols first.
|
|
125
|
+
|
|
126
|
+
# For simplicity in this step, we will bulk insert edges later or inside a second pass?
|
|
127
|
+
# Actually we can just look up ids.
|
|
128
|
+
# If to_qualified doesn't exist in DB, the edge is dropped.
|
|
129
|
+
if edges:
|
|
130
|
+
quals = set()
|
|
131
|
+
for e in edges:
|
|
132
|
+
quals.add(e.from_qualified)
|
|
133
|
+
quals.add(e.to_qualified)
|
|
134
|
+
|
|
135
|
+
if quals:
|
|
136
|
+
# SQLite limit for variables is 999, but a single file rarely exceeds that.
|
|
137
|
+
# For safety, we can query in batches, or use placeholders.
|
|
138
|
+
quals_list = list(quals)
|
|
139
|
+
qual_to_id = {}
|
|
140
|
+
|
|
141
|
+
# Batch fetch to avoid hitting sqlite limits
|
|
142
|
+
batch_size = 500
|
|
143
|
+
for i in range(0, len(quals_list), batch_size):
|
|
144
|
+
batch = quals_list[i:i+batch_size]
|
|
145
|
+
placeholders = ",".join([p] * len(batch))
|
|
146
|
+
cursor.execute(f"SELECT id, qualified_name FROM cg_symbols WHERE qualified_name IN ({placeholders})", tuple(batch))
|
|
147
|
+
for row in cursor.fetchall():
|
|
148
|
+
qual_to_id[row[1]] = row[0]
|
|
149
|
+
|
|
150
|
+
edge_records = []
|
|
151
|
+
for e in edges:
|
|
152
|
+
from_id = qual_to_id.get(e.from_qualified)
|
|
153
|
+
to_id = qual_to_id.get(e.to_qualified)
|
|
154
|
+
if from_id and to_id:
|
|
155
|
+
edge_records.append((from_id, to_id, e.relation))
|
|
156
|
+
|
|
157
|
+
if edge_records:
|
|
158
|
+
cursor.executemany(f"""
|
|
159
|
+
INSERT INTO cg_edges (from_id, to_id, relation)
|
|
160
|
+
VALUES ({p}, {p}, {p})
|
|
161
|
+
ON CONFLICT DO NOTHING
|
|
162
|
+
""", edge_records)
|
repolens/indexer/base.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
@dataclass
|
|
4
|
+
class Symbol:
|
|
5
|
+
name: str
|
|
6
|
+
qualified_name: str
|
|
7
|
+
kind: str
|
|
8
|
+
file_path: str
|
|
9
|
+
line_start: int
|
|
10
|
+
line_end: int
|
|
11
|
+
summary: str = ""
|
|
12
|
+
raw_signature: str = ""
|
|
13
|
+
extra: dict = field(default_factory=dict)
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Edge:
|
|
17
|
+
from_qualified: str
|
|
18
|
+
to_qualified: str
|
|
19
|
+
relation: str
|
|
20
|
+
|
|
21
|
+
class BaseIndexer:
|
|
22
|
+
def can_handle(self, file_path: str) -> bool:
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
def index_file(self, file_path: str, source: str) -> tuple[list[Symbol], list[Edge]]:
|
|
26
|
+
"""Returns symbols and edges found in the file."""
|
|
27
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import os
|
|
3
|
+
from .base import BaseIndexer, Symbol, Edge
|
|
4
|
+
|
|
5
|
+
def _get_module_path(file_path: str) -> str:
|
|
6
|
+
# Basic conversion from file path to dotted module path
|
|
7
|
+
# e.g., src/foo/bar.py -> src.foo.bar
|
|
8
|
+
# This is a simple approximation.
|
|
9
|
+
base = os.path.splitext(file_path)[0]
|
|
10
|
+
return base.replace(os.sep, '.')
|
|
11
|
+
|
|
12
|
+
class PythonIndexer(BaseIndexer):
|
|
13
|
+
def can_handle(self, file_path: str) -> bool:
|
|
14
|
+
return file_path.endswith('.py')
|
|
15
|
+
|
|
16
|
+
def index_file(self, file_path: str, source: str) -> tuple[list[Symbol], list[Edge]]:
|
|
17
|
+
try:
|
|
18
|
+
tree = ast.parse(source, filename=file_path)
|
|
19
|
+
except SyntaxError:
|
|
20
|
+
return [], []
|
|
21
|
+
|
|
22
|
+
module_path = _get_module_path(file_path)
|
|
23
|
+
|
|
24
|
+
symbols = []
|
|
25
|
+
edges = []
|
|
26
|
+
|
|
27
|
+
# We need a visitor to traverse the AST.
|
|
28
|
+
class Visitor(ast.NodeVisitor):
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self.current_scope = []
|
|
31
|
+
self.current_class = None
|
|
32
|
+
|
|
33
|
+
def _get_dotted_name(self, node):
|
|
34
|
+
if isinstance(node, ast.Name):
|
|
35
|
+
return node.id
|
|
36
|
+
elif isinstance(node, ast.Attribute):
|
|
37
|
+
val = self._get_dotted_name(node.value)
|
|
38
|
+
if val:
|
|
39
|
+
return f"{val}.{node.attr}"
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
def get_qualname(self, name):
|
|
43
|
+
if not self.current_scope:
|
|
44
|
+
return f"{module_path}.{name}" if module_path else name
|
|
45
|
+
return f"{module_path}.{'.'.join(self.current_scope)}.{name}"
|
|
46
|
+
|
|
47
|
+
def visit_ClassDef(self, node):
|
|
48
|
+
qualname = self.get_qualname(node.name)
|
|
49
|
+
summary = ast.get_docstring(node) or ""
|
|
50
|
+
if summary:
|
|
51
|
+
summary = summary.splitlines()[0]
|
|
52
|
+
|
|
53
|
+
symbols.append(Symbol(
|
|
54
|
+
name=node.name,
|
|
55
|
+
qualified_name=qualname,
|
|
56
|
+
kind="class",
|
|
57
|
+
file_path=file_path,
|
|
58
|
+
line_start=node.lineno,
|
|
59
|
+
line_end=node.end_lineno,
|
|
60
|
+
summary=summary
|
|
61
|
+
))
|
|
62
|
+
|
|
63
|
+
# Inherits edges
|
|
64
|
+
for base in node.bases:
|
|
65
|
+
target = self._get_dotted_name(base)
|
|
66
|
+
if target:
|
|
67
|
+
edges.append(Edge(
|
|
68
|
+
from_qualified=qualname,
|
|
69
|
+
to_qualified=target, # Approximated base name
|
|
70
|
+
relation="inherits"
|
|
71
|
+
))
|
|
72
|
+
|
|
73
|
+
self.current_scope.append(node.name)
|
|
74
|
+
prev_class = self.current_class
|
|
75
|
+
self.current_class = node.name
|
|
76
|
+
self.generic_visit(node)
|
|
77
|
+
self.current_class = prev_class
|
|
78
|
+
self.current_scope.pop()
|
|
79
|
+
|
|
80
|
+
def visit_FunctionDef(self, node):
|
|
81
|
+
self._visit_func(node)
|
|
82
|
+
def visit_AsyncFunctionDef(self, node):
|
|
83
|
+
self._visit_func(node)
|
|
84
|
+
|
|
85
|
+
def _visit_func(self, node):
|
|
86
|
+
qualname = self.get_qualname(node.name)
|
|
87
|
+
summary = ast.get_docstring(node) or ""
|
|
88
|
+
if summary:
|
|
89
|
+
summary = summary.splitlines()[0]
|
|
90
|
+
|
|
91
|
+
kind = "method" if self.current_class else "function"
|
|
92
|
+
|
|
93
|
+
symbols.append(Symbol(
|
|
94
|
+
name=node.name,
|
|
95
|
+
qualified_name=qualname,
|
|
96
|
+
kind=kind,
|
|
97
|
+
file_path=file_path,
|
|
98
|
+
line_start=node.lineno,
|
|
99
|
+
line_end=node.end_lineno,
|
|
100
|
+
summary=summary
|
|
101
|
+
))
|
|
102
|
+
|
|
103
|
+
self.current_scope.append(node.name)
|
|
104
|
+
self.generic_visit(node)
|
|
105
|
+
self.current_scope.pop()
|
|
106
|
+
|
|
107
|
+
def visit_Import(self, node):
|
|
108
|
+
for alias in node.names:
|
|
109
|
+
# module level import
|
|
110
|
+
# e.g., import os
|
|
111
|
+
qualname = self.get_qualname(alias.asname or alias.name)
|
|
112
|
+
symbols.append(Symbol(
|
|
113
|
+
name=alias.asname or alias.name,
|
|
114
|
+
qualified_name=qualname,
|
|
115
|
+
kind="import",
|
|
116
|
+
file_path=file_path,
|
|
117
|
+
line_start=node.lineno,
|
|
118
|
+
line_end=node.end_lineno,
|
|
119
|
+
extra={"module": alias.name}
|
|
120
|
+
))
|
|
121
|
+
|
|
122
|
+
edges.append(Edge(
|
|
123
|
+
from_qualified=module_path,
|
|
124
|
+
to_qualified=alias.name,
|
|
125
|
+
relation="imports"
|
|
126
|
+
))
|
|
127
|
+
|
|
128
|
+
def visit_ImportFrom(self, node):
|
|
129
|
+
if node.module:
|
|
130
|
+
for alias in node.names:
|
|
131
|
+
name = alias.asname or alias.name
|
|
132
|
+
qualname = self.get_qualname(name)
|
|
133
|
+
symbols.append(Symbol(
|
|
134
|
+
name=name,
|
|
135
|
+
qualified_name=qualname,
|
|
136
|
+
kind="import",
|
|
137
|
+
file_path=file_path,
|
|
138
|
+
line_start=node.lineno,
|
|
139
|
+
line_end=node.end_lineno,
|
|
140
|
+
extra={"module": node.module, "original_name": alias.name}
|
|
141
|
+
))
|
|
142
|
+
|
|
143
|
+
edges.append(Edge(
|
|
144
|
+
from_qualified=module_path,
|
|
145
|
+
to_qualified=f"{node.module}.{alias.name}",
|
|
146
|
+
relation="imports"
|
|
147
|
+
))
|
|
148
|
+
|
|
149
|
+
def visit_Call(self, node):
|
|
150
|
+
# Extract simple calls
|
|
151
|
+
target = self._get_dotted_name(node.func)
|
|
152
|
+
|
|
153
|
+
if target and self.current_scope:
|
|
154
|
+
caller = self.get_qualname("")[:-1] # strip trailing dot
|
|
155
|
+
edges.append(Edge(
|
|
156
|
+
from_qualified=caller,
|
|
157
|
+
to_qualified=target, # We don't have full res, will be resolved or approximated during usages query
|
|
158
|
+
relation="calls"
|
|
159
|
+
))
|
|
160
|
+
|
|
161
|
+
self.generic_visit(node)
|
|
162
|
+
|
|
163
|
+
visitor = Visitor()
|
|
164
|
+
visitor.visit(tree)
|
|
165
|
+
|
|
166
|
+
# Add a symbol for the file itself
|
|
167
|
+
symbols.append(Symbol(
|
|
168
|
+
name=os.path.basename(file_path),
|
|
169
|
+
qualified_name=module_path,
|
|
170
|
+
kind="file",
|
|
171
|
+
file_path=file_path,
|
|
172
|
+
line_start=1,
|
|
173
|
+
line_end=len(source.splitlines()),
|
|
174
|
+
summary=""
|
|
175
|
+
))
|
|
176
|
+
|
|
177
|
+
return symbols, edges
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import hashlib
|
|
3
|
+
import subprocess
|
|
4
|
+
from .python import PythonIndexer
|
|
5
|
+
from ..db.store import Store
|
|
6
|
+
|
|
7
|
+
INDEXERS = [PythonIndexer()]
|
|
8
|
+
|
|
9
|
+
def sha256(content: bytes) -> str:
|
|
10
|
+
return hashlib.sha256(content).hexdigest()
|
|
11
|
+
|
|
12
|
+
def get_files_to_index(root: str, exclude: list[str]) -> list[str]:
|
|
13
|
+
# Use git ls-files if possible
|
|
14
|
+
try:
|
|
15
|
+
result = subprocess.run(
|
|
16
|
+
['git', 'ls-files'],
|
|
17
|
+
cwd=root,
|
|
18
|
+
capture_output=True,
|
|
19
|
+
text=True,
|
|
20
|
+
check=True
|
|
21
|
+
)
|
|
22
|
+
files = result.stdout.splitlines()
|
|
23
|
+
# Make paths absolute
|
|
24
|
+
files = [os.path.join(root, f) for f in files]
|
|
25
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
26
|
+
# Fallback to os.walk
|
|
27
|
+
files = []
|
|
28
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
29
|
+
# rudimentary exclude
|
|
30
|
+
dirnames[:] = [d for d in dirnames if d not in exclude and not d.startswith('.')]
|
|
31
|
+
for f in filenames:
|
|
32
|
+
files.append(os.path.join(dirpath, f))
|
|
33
|
+
|
|
34
|
+
# Filter excludes
|
|
35
|
+
if exclude:
|
|
36
|
+
filtered = []
|
|
37
|
+
for f in files:
|
|
38
|
+
if not any(ex in f for ex in exclude):
|
|
39
|
+
filtered.append(f)
|
|
40
|
+
files = filtered
|
|
41
|
+
|
|
42
|
+
return files
|
|
43
|
+
|
|
44
|
+
def index_path(root: str, store: Store, plugins: list, exclude: list[str] = None):
|
|
45
|
+
exclude = exclude or ['.git', 'node_modules', '__pycache__', '.venv']
|
|
46
|
+
files = get_files_to_index(root, exclude)
|
|
47
|
+
|
|
48
|
+
indexed_count = 0
|
|
49
|
+
for path in files:
|
|
50
|
+
indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
|
|
51
|
+
if not indexer:
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
with open(path, 'rb') as f:
|
|
56
|
+
content_bytes = f.read()
|
|
57
|
+
except OSError:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
file_hash = sha256(content_bytes)
|
|
61
|
+
|
|
62
|
+
# Check if unchanged
|
|
63
|
+
if store.get_file_hash(path) == file_hash:
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
content_str = content_bytes.decode('utf-8', errors='replace')
|
|
67
|
+
symbols, edges = indexer.index_file(path, content_str)
|
|
68
|
+
|
|
69
|
+
# Apply plugins
|
|
70
|
+
for plugin in plugins:
|
|
71
|
+
symbols = [plugin.on_symbol(s) for s in symbols]
|
|
72
|
+
edges.extend(plugin.extra_edges(symbols))
|
|
73
|
+
|
|
74
|
+
store.upsert_file(path, file_hash, symbols, edges)
|
|
75
|
+
indexed_count += 1
|
|
76
|
+
|
|
77
|
+
return indexed_count
|
repolens/mcp/__init__.py
ADDED