codegraphy 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraphy-0.1.0.dist-info/METADATA +310 -0
- codegraphy-0.1.0.dist-info/RECORD +21 -0
- codegraphy-0.1.0.dist-info/WHEEL +5 -0
- codegraphy-0.1.0.dist-info/entry_points.txt +2 -0
- codegraphy-0.1.0.dist-info/licenses/LICENSE +21 -0
- codegraphy-0.1.0.dist-info/top_level.txt +1 -0
- repolens/__init__.py +5 -0
- repolens/cli.py +141 -0
- repolens/config.py +13 -0
- repolens/db/__init__.py +3 -0
- repolens/db/schema.py +84 -0
- repolens/db/store.py +162 -0
- repolens/indexer/__init__.py +5 -0
- repolens/indexer/base.py +27 -0
- repolens/indexer/python.py +177 -0
- repolens/indexer/walker.py +77 -0
- repolens/mcp/__init__.py +3 -0
- repolens/mcp/server.py +306 -0
- repolens/plugins/__init__.py +3 -0
- repolens/plugins/base.py +10 -0
- repolens/plugins/django.py +24 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codegraphy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SQLite/PostgreSQL codebase knowledge graph and MCP server for Claude Code
|
|
5
|
+
Author: Charan Kulal
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: mcp,code-analysis,knowledge-graph,claude-code,sqlite,postgresql
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: click>=8.0
|
|
20
|
+
Requires-Dist: mcp>=1.0
|
|
21
|
+
Provides-Extra: postgres
|
|
22
|
+
Requires-Dist: psycopg2-binary; extra == "postgres"
|
|
23
|
+
Provides-Extra: pgvector
|
|
24
|
+
Requires-Dist: pgvector; extra == "pgvector"
|
|
25
|
+
Provides-Extra: js
|
|
26
|
+
Requires-Dist: tree-sitter; extra == "js"
|
|
27
|
+
Requires-Dist: tree-sitter-javascript; extra == "js"
|
|
28
|
+
Requires-Dist: tree-sitter-typescript; extra == "js"
|
|
29
|
+
Provides-Extra: html
|
|
30
|
+
Requires-Dist: tree-sitter; extra == "html"
|
|
31
|
+
Requires-Dist: tree-sitter-html; extra == "html"
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: psycopg2-binary; extra == "all"
|
|
34
|
+
Requires-Dist: pgvector; extra == "all"
|
|
35
|
+
Requires-Dist: tree-sitter; extra == "all"
|
|
36
|
+
Requires-Dist: tree-sitter-javascript; extra == "all"
|
|
37
|
+
Requires-Dist: tree-sitter-typescript; extra == "all"
|
|
38
|
+
Requires-Dist: tree-sitter-html; extra == "all"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# codegraphy
|
|
42
|
+
|
|
43
|
+
Standalone Python package that parses a codebase into a knowledge graph (PostgreSQL or SQLite) and exposes it as an [MCP](https://modelcontextprotocol.io/) server for Claude Code. Claude calls graph tools instead of `Read` + `Bash(grep)` — cuts exploration token cost by 5–10×.
|
|
44
|
+
|
|
45
|
+
**Python:** 3.10+
|
|
46
|
+
**License:** MIT
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Why
|
|
51
|
+
|
|
52
|
+
Claude exploring an unfamiliar codebase today:
|
|
53
|
+
|
|
54
|
+
| Task | Without codegraphy | With codegraphy |
|
|
55
|
+
|------|-------------------|----------------|
|
|
56
|
+
| Find where `Something` is defined | Read 10 files (~15k tokens) | `search_symbol("Something")` (~200 tokens) |
|
|
57
|
+
| Understand a file's structure | Read full file (~3k tokens) | `get_file_summary("views.py")` (~300 tokens) |
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# SQLite-only install (default, zero config):
|
|
65
|
+
pip install codegraphy
|
|
66
|
+
|
|
67
|
+
# For PostgreSQL support:
|
|
68
|
+
pip install codegraphy[postgres]
|
|
69
|
+
|
|
70
|
+
# For JS/TS parsing (planned):
|
|
71
|
+
pip install codegraphy[js]
|
|
72
|
+
|
|
73
|
+
# Everything:
|
|
74
|
+
pip install codegraphy[all]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The base PyPI package keeps SQLite support in the standard library path, so PostgreSQL stays opt-in.
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quickstart
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# 1. Initialize the database (SQLite by default)
|
|
85
|
+
codegraphy init
|
|
86
|
+
|
|
87
|
+
# 2. Index your project
|
|
88
|
+
codegraphy index .
|
|
89
|
+
|
|
90
|
+
# 3. Start the MCP server (stdio, for Claude Code)
|
|
91
|
+
codegraphy serve
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
That's it. Claude can now query your codebase graph instead of reading files.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## CLI Reference
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
codegraphy init [--db URL] # Create tables (SQLite default, or pass Postgres URL)
|
|
102
|
+
codegraphy index PATH [--exclude] # Full index of a directory
|
|
103
|
+
codegraphy update # Incremental re-index via git diff
|
|
104
|
+
codegraphy serve # Start MCP server over stdio
|
|
105
|
+
codegraphy search NAME # Search symbols (debug, not MCP)
|
|
106
|
+
codegraphy usages QUALIFIED_NAME # Find usages (debug, not MCP)
|
|
107
|
+
codegraphy stats # Show graph statistics
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## MCP Tools
|
|
113
|
+
|
|
114
|
+
When running as an MCP server, codegraphy exposes these tools to Claude:
|
|
115
|
+
|
|
116
|
+
| Tool | Description |
|
|
117
|
+
|------|-------------|
|
|
118
|
+
| `search_symbol(name, kind?, limit?, fallback_grep?)` | Find symbols by name — exact, then substring, then grep fallback |
|
|
119
|
+
| `get_file_summary(file_path)` | Classes, functions, imports in a file without reading it |
|
|
120
|
+
| `find_usages(qualified_name, limit?, fallback_grep?)` | Who imports/calls/references this symbol |
|
|
121
|
+
| `get_context(file_path, line, radius?)` | Read N lines around a line number |
|
|
122
|
+
| `path_between(from_qualified, to_qualified, max_depth?)` | BFS shortest path between two symbols |
|
|
123
|
+
| `grep_search(pattern, include?, exclude?, limit?)` | Direct grep — bypass the graph |
|
|
124
|
+
| `graph_stats()` | File/symbol/edge counts, backend type |
|
|
125
|
+
| `what_touches_model(model_name)` | Django: views, admin, signals referencing a model |
|
|
126
|
+
| `search_semantic(query, limit?)` | pgvector semantic search (Postgres only, planned) |
|
|
127
|
+
|
|
128
|
+
All tools return a `source` field (`"graph"` or `"grep"`) so Claude can gauge confidence.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Configuration
|
|
133
|
+
|
|
134
|
+
Priority: CLI flag → environment variable → `codegraphy.toml` → defaults.
|
|
135
|
+
|
|
136
|
+
### Environment Variables
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
DATABASE_URL=sqlite:///codegraphy.db # or postgresql://localhost/codegraphy
|
|
140
|
+
REPOLENS_ROOT=. # project root for grep fallback
|
|
141
|
+
REPOLENS_PLUGINS=repolens.plugins.django
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Config File (optional)
|
|
145
|
+
|
|
146
|
+
```toml
|
|
147
|
+
# codegraphy.toml (place at project root)
|
|
148
|
+
database_url = "postgresql://localhost/codegraphy"
|
|
149
|
+
root = "."
|
|
150
|
+
exclude = ["migrations", "node_modules", ".venv", "__pycache__"]
|
|
151
|
+
plugins = ["repolens.plugins.django"]
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Claude Code Integration
|
|
157
|
+
|
|
158
|
+
### Register the MCP server
|
|
159
|
+
|
|
160
|
+
```json
|
|
161
|
+
// .claude/settings.json
|
|
162
|
+
{
|
|
163
|
+
"mcpServers": {
|
|
164
|
+
"codegraphy": {
|
|
165
|
+
"command": "codegraphy",
|
|
166
|
+
"args": ["serve"],
|
|
167
|
+
"env": {
|
|
168
|
+
"DATABASE_URL": "sqlite:///codegraphy.db"
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Auto-update on session end (optional)
|
|
176
|
+
|
|
177
|
+
```json
|
|
178
|
+
// .claude/settings.json
|
|
179
|
+
{
|
|
180
|
+
"hooks": {
|
|
181
|
+
"Stop": [{
|
|
182
|
+
"type": "command",
|
|
183
|
+
"command": "codegraphy update"
|
|
184
|
+
}]
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Architecture
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
repolens/
|
|
195
|
+
├── cli.py # Click CLI entry points
|
|
196
|
+
├── config.py # DATABASE_URL, REPOLENS_ROOT, plugin list
|
|
197
|
+
├── db/
|
|
198
|
+
│ ├── schema.py # CREATE TABLE statements (PG + SQLite)
|
|
199
|
+
│ └── store.py # upsert_symbol, upsert_edge, query helpers
|
|
200
|
+
├── indexer/
|
|
201
|
+
│ ├── base.py # BaseIndexer ABC, Symbol/Edge dataclasses
|
|
202
|
+
│ ├── python.py # ast-based Python indexer
|
|
203
|
+
│ └── walker.py # Filesystem walk + git-diff incremental
|
|
204
|
+
├── mcp/
|
|
205
|
+
│ └── server.py # FastMCP server + all tool definitions
|
|
206
|
+
├── plugins/
|
|
207
|
+
│ ├── base.py # BasePlugin ABC
|
|
208
|
+
│ └── django.py # Django-aware: models, views, signals
|
|
209
|
+
└── session/ # (planned) git-diff hook + memory write
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### Database Schema
|
|
213
|
+
|
|
214
|
+
Three tables power the graph:
|
|
215
|
+
|
|
216
|
+
- **`cg_files`** — indexed files with git hash for deduplication
|
|
217
|
+
- **`cg_symbols`** — every class, function, method, import with location + summary
|
|
218
|
+
- **`cg_edges`** — relationships: `imports`, `calls`, `inherits`, `references`, `registers`, `handles_signal`
|
|
219
|
+
|
|
220
|
+
### Indexing Strategy
|
|
221
|
+
|
|
222
|
+
1. Walk files via `git ls-files` (falls back to `os.walk`)
|
|
223
|
+
2. SHA-256 content hash skips unchanged files
|
|
224
|
+
3. AST parsing extracts symbols and edges
|
|
225
|
+
4. Plugins post-process symbols (e.g., Django re-tags `class` → `model`)
|
|
226
|
+
5. Upsert into database with cascade delete for clean re-indexing
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Plugin System
|
|
231
|
+
|
|
232
|
+
Plugins implement two hooks:
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
class BasePlugin:
|
|
236
|
+
def on_symbol(self, symbol: Symbol) -> Symbol:
|
|
237
|
+
"""Mutate or re-tag a symbol after parsing."""
|
|
238
|
+
return symbol
|
|
239
|
+
|
|
240
|
+
def extra_edges(self, symbols: list[Symbol]) -> list[Edge]:
|
|
241
|
+
"""Derive additional edges from the symbol list."""
|
|
242
|
+
return []
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Built-in: Django Plugin
|
|
246
|
+
|
|
247
|
+
Detects Django patterns by file naming convention:
|
|
248
|
+
- Classes in `models.py` → `kind = "model"`
|
|
249
|
+
- Classes/functions in `views.py` → `kind = "view"`
|
|
250
|
+
|
|
251
|
+
Enable via environment variable:
|
|
252
|
+
```bash
|
|
253
|
+
REPOLENS_PLUGINS=repolens.plugins.django
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Current Status
|
|
259
|
+
|
|
260
|
+
| Milestone | Status |
|
|
261
|
+
|-----------|--------|
|
|
262
|
+
| M1 — Schema + Python indexer + `codegraphy index` | ✅ Complete |
|
|
263
|
+
| M2 — `search_symbol` + `get_file_summary` + MCP serve | ✅ Complete |
|
|
264
|
+
| M3 — `find_usages` + `path_between` + `get_context` + grep fallback | ✅ Complete |
|
|
265
|
+
| M4 — `codegraphy update` (incremental) | ✅ Complete |
|
|
266
|
+
| M5 — Django plugin | 🔶 Partial (symbol re-tagging, no admin/signal edges) |
|
|
267
|
+
| M6 — Semantic search (pgvector) | ⬜ Stub only |
|
|
268
|
+
| M7 — JS/TS indexer (tree-sitter) | ⬜ Planned |
|
|
269
|
+
| M8 — HTML/Template indexer | ⬜ Planned |
|
|
270
|
+
| M9 — `grep_search` tool + cross-language edges | 🔶 grep_search done, cross-lang edges planned |
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## Development
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
# Clone and install in editable mode
|
|
278
|
+
git clone <repo-url> && cd codegraphy
|
|
279
|
+
python -m venv .venv && source .venv/bin/activate
|
|
280
|
+
pip install -e .
|
|
281
|
+
|
|
282
|
+
# Initialize local DB and index this project
|
|
283
|
+
codegraphy init
|
|
284
|
+
codegraphy index .
|
|
285
|
+
|
|
286
|
+
# Check stats
|
|
287
|
+
codegraphy stats
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Publishing
|
|
291
|
+
|
|
292
|
+
`codegraphy` is configured to build as a standard PyPI distribution from `pyproject.toml`.
|
|
293
|
+
|
|
294
|
+
For PyPI trusted publishing, use **`publish.yml`** as the workflow name. The workflow file lives at `.github/workflows/publish.yml`.
|
|
295
|
+
|
|
296
|
+
```bash
|
|
297
|
+
python -m pip install --upgrade build twine
|
|
298
|
+
python -m build
|
|
299
|
+
python -m twine check dist/*
|
|
300
|
+
python -m twine upload dist/*
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## What It Is NOT
|
|
306
|
+
|
|
307
|
+
- Not a code execution sandbox
|
|
308
|
+
- Not a test runner or linter
|
|
309
|
+
- Not a replacement for LSP/IDE features
|
|
310
|
+
- Not AI-generated summaries by default (uses docstrings; AI summaries are opt-in future)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
codegraphy-0.1.0.dist-info/licenses/LICENSE,sha256=Rp72yjCco7pl_tUJuWcoVaQJbavF_o1EduIzGlWgMBs,1069
|
|
2
|
+
repolens/__init__.py,sha256=oiUP7r1EzCXbTJlVA0uaqMFzlgb485PpzsgPSxc3tHU,133
|
|
3
|
+
repolens/cli.py,sha256=YHklhHZL3c48jBRVGCMiILOLXHNz-tqA9LPF-d_hR3Y,4764
|
|
4
|
+
repolens/config.py,sha256=FsBleokRpJLo6uY9Hxep8ba9Wona9SBwERbtQOfPN7Y,445
|
|
5
|
+
repolens/db/__init__.py,sha256=N0we97ndqage4suzlmrky148cqc077smh9yRW7fRmtk,46
|
|
6
|
+
repolens/db/schema.py,sha256=b0LXLgDe7pAIilraeHf8RwlSHCvXAm_zJyfWLDGLuB8,3073
|
|
7
|
+
repolens/db/store.py,sha256=K60Q4GaFDMD2UrME8h5UCEbSz7JyNTtjhohPTF7K034,6911
|
|
8
|
+
repolens/indexer/__init__.py,sha256=323iYhXcMZCFSi9BbD9eFIrvKn31hgxvx8oLt5fOawQ,185
|
|
9
|
+
repolens/indexer/base.py,sha256=Jhe_1EROe1qn2BKwiWI90HFT76fQC3fqKsuZoTXWoqc,648
|
|
10
|
+
repolens/indexer/python.py,sha256=0IbfCAxQYPpArNocdEOZRVq_PVwvGKznV6efTI0uPvU,6764
|
|
11
|
+
repolens/indexer/walker.py,sha256=Uexo2C6WUIh9F5N3hZK1f9LHIqRgAn_D4BDEM0j3AG4,2397
|
|
12
|
+
repolens/mcp/__init__.py,sha256=uJZf2o3FWA4zVaWMAOdQNMl2JF23TD2dfCrD9Rw2Syg,61
|
|
13
|
+
repolens/mcp/server.py,sha256=fdDRMSlKjlHKylijHMu5rL4YOg0sK2bCF-G3920WyGw,10534
|
|
14
|
+
repolens/plugins/__init__.py,sha256=ex5qITE4l8sI_bPiywyXw_THj1MylMagOgXK0_0w9Sw,55
|
|
15
|
+
repolens/plugins/base.py,sha256=rGZ3XRxTFSZBZ_Wad-b2EFtIjotA-TOHHAAaabvIYTY,374
|
|
16
|
+
repolens/plugins/django.py,sha256=6ZNacSklsL5S4oWPzlaM1bA295Wpo-Vry9tUUddab0g,996
|
|
17
|
+
codegraphy-0.1.0.dist-info/METADATA,sha256=0o_E0X9rU0pxr7AGlVh4nmMUkYDl_uuWScvDQAE0OTM,9266
|
|
18
|
+
codegraphy-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
19
|
+
codegraphy-0.1.0.dist-info/entry_points.txt,sha256=3lQGELX3dsARcq4DzrX1wskfIj86KCp7bXL8Qv-D8EA,48
|
|
20
|
+
codegraphy-0.1.0.dist-info/top_level.txt,sha256=7EK5ft9AlN9kOeOjzFv_PgnLxREn2EGvgu5jZdcL9CA,9
|
|
21
|
+
codegraphy-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Charan Kulal
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
repolens
|
repolens/__init__.py
ADDED
repolens/cli.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from .config import load_config
|
|
3
|
+
|
|
4
|
+
@click.group()
|
|
5
|
+
def cli():
|
|
6
|
+
"""codegraphy: Codebase knowledge graph & MCP server."""
|
|
7
|
+
load_config()
|
|
8
|
+
|
|
9
|
+
@cli.command()
|
|
10
|
+
@click.option('--db', help='Database URL (e.g. postgresql://localhost/codegraphy)')
|
|
11
|
+
def init(db):
|
|
12
|
+
"""Initialize the database schema."""
|
|
13
|
+
import repolens.config as config
|
|
14
|
+
from repolens.db.store import Store
|
|
15
|
+
|
|
16
|
+
db_url = db or config.DATABASE_URL
|
|
17
|
+
click.echo(f"Initializing schema for {db_url}...")
|
|
18
|
+
store = Store(db_url)
|
|
19
|
+
store.init_schema()
|
|
20
|
+
click.echo("Schema initialized.")
|
|
21
|
+
|
|
22
|
+
@cli.command()
|
|
23
|
+
@click.argument('path', default='.')
|
|
24
|
+
@click.option('--exclude', help='Comma-separated list of directories to exclude')
|
|
25
|
+
def index(path, exclude):
|
|
26
|
+
"""Index a directory into the graph."""
|
|
27
|
+
import repolens.config as config
|
|
28
|
+
from repolens.db.store import Store
|
|
29
|
+
from repolens.indexer.walker import index_path
|
|
30
|
+
|
|
31
|
+
click.echo(f"Indexing {path}...")
|
|
32
|
+
store = Store(config.DATABASE_URL)
|
|
33
|
+
exclude_list = exclude.split(',') if exclude else None
|
|
34
|
+
|
|
35
|
+
# Load plugins
|
|
36
|
+
plugins = [] # TODO: instantiate from config.REPOLENS_PLUGINS
|
|
37
|
+
|
|
38
|
+
count = index_path(path, store, plugins, exclude_list)
|
|
39
|
+
click.echo(f"Indexed {count} files.")
|
|
40
|
+
|
|
41
|
+
@cli.command()
|
|
42
|
+
def update():
|
|
43
|
+
"""Update index incrementally based on git diff."""
|
|
44
|
+
import subprocess
|
|
45
|
+
import repolens.config as config
|
|
46
|
+
from repolens.db.store import Store
|
|
47
|
+
from repolens.indexer.walker import index_path
|
|
48
|
+
|
|
49
|
+
click.echo("Updating index...")
|
|
50
|
+
try:
|
|
51
|
+
res = subprocess.run(['git', 'diff', '--name-only', 'HEAD'], capture_output=True, text=True)
|
|
52
|
+
changed_files = res.stdout.splitlines()
|
|
53
|
+
except Exception:
|
|
54
|
+
click.echo("Not a git repository or no HEAD.")
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
store = Store(config.DATABASE_URL)
|
|
58
|
+
plugins = [] # TODO
|
|
59
|
+
|
|
60
|
+
count = 0
|
|
61
|
+
for file_path in changed_files:
|
|
62
|
+
# Instead of calling index_path, we should just index the specific files
|
|
63
|
+
# Re-using index_path is tricky since it takes a root.
|
|
64
|
+
# We can just write a small loop here for the changed files.
|
|
65
|
+
import os
|
|
66
|
+
from repolens.indexer.walker import INDEXERS, sha256
|
|
67
|
+
|
|
68
|
+
path = os.path.abspath(file_path)
|
|
69
|
+
if not os.path.exists(path):
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
indexer = next((i for i in INDEXERS if i.can_handle(path)), None)
|
|
73
|
+
if not indexer:
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
with open(path, 'rb') as f:
|
|
78
|
+
content_bytes = f.read()
|
|
79
|
+
file_hash = sha256(content_bytes)
|
|
80
|
+
if store.get_file_hash(path) == file_hash:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
content_str = content_bytes.decode('utf-8', errors='replace')
|
|
84
|
+
symbols, edges = indexer.index_file(path, content_str)
|
|
85
|
+
|
|
86
|
+
for plugin in plugins:
|
|
87
|
+
symbols = [plugin.on_symbol(s) for s in symbols]
|
|
88
|
+
edges.extend(plugin.extra_edges(symbols))
|
|
89
|
+
|
|
90
|
+
store.upsert_file(path, file_hash, symbols, edges)
|
|
91
|
+
count += 1
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
click.echo(f"Updated {count} files.")
|
|
96
|
+
|
|
97
|
+
@cli.command()
|
|
98
|
+
def serve():
|
|
99
|
+
"""Start the MCP server over stdio."""
|
|
100
|
+
from repolens.mcp.server import start_server
|
|
101
|
+
click.echo("Starting MCP server...", err=True)
|
|
102
|
+
start_server()
|
|
103
|
+
|
|
104
|
+
@cli.command()
|
|
105
|
+
@click.argument('name')
|
|
106
|
+
def search(name):
|
|
107
|
+
"""Search for a symbol in the graph."""
|
|
108
|
+
from repolens.mcp.server import search_symbol
|
|
109
|
+
results = search_symbol(name)
|
|
110
|
+
for res in results:
|
|
111
|
+
click.echo(f"[{res['source']}] {res.get('qualified_name') or res.get('file_path')} - {res.get('kind', 'grep')} @ {res['line_start']}")
|
|
112
|
+
|
|
113
|
+
@cli.command()
|
|
114
|
+
@click.argument('name')
|
|
115
|
+
def usages(name):
|
|
116
|
+
"""Find usages of a symbol."""
|
|
117
|
+
from repolens.mcp.server import find_usages
|
|
118
|
+
results = find_usages(name)
|
|
119
|
+
for res in results:
|
|
120
|
+
click.echo(f"[{res['source']}] {res.get('from_qualified') or res.get('file_path')} - {res.get('relation', 'grep')} @ {res['line_start']}")
|
|
121
|
+
|
|
122
|
+
@cli.command()
|
|
123
|
+
def stats():
|
|
124
|
+
"""Show graph statistics."""
|
|
125
|
+
from repolens.db.store import Store
|
|
126
|
+
import repolens.config as config
|
|
127
|
+
store = Store(config.DATABASE_URL)
|
|
128
|
+
with store.get_connection() as conn:
|
|
129
|
+
cursor = conn.cursor()
|
|
130
|
+
cursor.execute("SELECT COUNT(*) FROM cg_files")
|
|
131
|
+
files = cursor.fetchone()[0]
|
|
132
|
+
cursor.execute("SELECT COUNT(*) FROM cg_symbols")
|
|
133
|
+
symbols = cursor.fetchone()[0]
|
|
134
|
+
cursor.execute("SELECT COUNT(*) FROM cg_edges")
|
|
135
|
+
edges = cursor.fetchone()[0]
|
|
136
|
+
click.echo(f"Files: {files}")
|
|
137
|
+
click.echo(f"Symbols: {symbols}")
|
|
138
|
+
click.echo(f"Edges: {edges}")
|
|
139
|
+
|
|
140
|
+
if __name__ == '__main__':
|
|
141
|
+
cli()
|
repolens/config.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# Default configuration
|
|
4
|
+
DATABASE_URL = os.environ.get("DATABASE_URL", "sqlite:///codegraphy.db")
|
|
5
|
+
REPOLENS_ROOT = os.environ.get("REPOLENS_ROOT", ".")
|
|
6
|
+
|
|
7
|
+
# Plugin list can be derived from env vars or TOML
|
|
8
|
+
_plugins_env = os.environ.get("REPOLENS_PLUGINS", "")
|
|
9
|
+
REPOLENS_PLUGINS = [p.strip() for p in _plugins_env.split(",")] if _plugins_env else []
|
|
10
|
+
|
|
11
|
+
def load_config():
|
|
12
|
+
# Placeholder for loading from codegraphy.toml if needed
|
|
13
|
+
pass
|
repolens/db/__init__.py
ADDED
repolens/db/schema.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
PG_SCHEMA = """
|
|
2
|
+
CREATE TABLE IF NOT EXISTS cg_files (
|
|
3
|
+
id SERIAL PRIMARY KEY,
|
|
4
|
+
file_path TEXT UNIQUE NOT NULL,
|
|
5
|
+
module_path TEXT,
|
|
6
|
+
summary TEXT,
|
|
7
|
+
symbol_count INTEGER DEFAULT 0,
|
|
8
|
+
git_hash TEXT,
|
|
9
|
+
last_indexed TIMESTAMPTZ DEFAULT NOW()
|
|
10
|
+
);
|
|
11
|
+
|
|
12
|
+
CREATE TABLE IF NOT EXISTS cg_symbols (
|
|
13
|
+
id SERIAL PRIMARY KEY,
|
|
14
|
+
name TEXT NOT NULL,
|
|
15
|
+
qualified_name TEXT UNIQUE NOT NULL,
|
|
16
|
+
kind TEXT NOT NULL,
|
|
17
|
+
file_path TEXT NOT NULL REFERENCES cg_files(file_path) ON DELETE CASCADE,
|
|
18
|
+
line_start INTEGER,
|
|
19
|
+
line_end INTEGER,
|
|
20
|
+
summary TEXT,
|
|
21
|
+
raw_signature TEXT,
|
|
22
|
+
extra JSONB,
|
|
23
|
+
last_indexed TIMESTAMPTZ DEFAULT NOW()
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE TABLE IF NOT EXISTS cg_edges (
|
|
27
|
+
from_id INTEGER NOT NULL REFERENCES cg_symbols(id) ON DELETE CASCADE,
|
|
28
|
+
to_id INTEGER NOT NULL REFERENCES cg_symbols(id) ON DELETE CASCADE,
|
|
29
|
+
relation TEXT NOT NULL,
|
|
30
|
+
PRIMARY KEY (from_id, to_id, relation)
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON cg_symbols(name);
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_file ON cg_symbols(file_path);
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_kind ON cg_symbols(kind);
|
|
36
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_qualname ON cg_symbols(qualified_name);
|
|
37
|
+
CREATE INDEX IF NOT EXISTS idx_edges_from ON cg_edges(from_id);
|
|
38
|
+
CREATE INDEX IF NOT EXISTS idx_edges_to ON cg_edges(to_id);
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
SQLITE_SCHEMA = """
|
|
42
|
+
CREATE TABLE IF NOT EXISTS cg_files (
|
|
43
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
44
|
+
file_path TEXT UNIQUE NOT NULL,
|
|
45
|
+
module_path TEXT,
|
|
46
|
+
summary TEXT,
|
|
47
|
+
symbol_count INTEGER DEFAULT 0,
|
|
48
|
+
git_hash TEXT,
|
|
49
|
+
last_indexed DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
CREATE TABLE IF NOT EXISTS cg_symbols (
|
|
53
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
54
|
+
name TEXT NOT NULL,
|
|
55
|
+
qualified_name TEXT UNIQUE NOT NULL,
|
|
56
|
+
kind TEXT NOT NULL,
|
|
57
|
+
file_path TEXT NOT NULL REFERENCES cg_files(file_path) ON DELETE CASCADE,
|
|
58
|
+
line_start INTEGER,
|
|
59
|
+
line_end INTEGER,
|
|
60
|
+
summary TEXT,
|
|
61
|
+
raw_signature TEXT,
|
|
62
|
+
extra TEXT,
|
|
63
|
+
last_indexed DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
CREATE TABLE IF NOT EXISTS cg_edges (
|
|
67
|
+
from_id INTEGER NOT NULL REFERENCES cg_symbols(id) ON DELETE CASCADE,
|
|
68
|
+
to_id INTEGER NOT NULL REFERENCES cg_symbols(id) ON DELETE CASCADE,
|
|
69
|
+
relation TEXT NOT NULL,
|
|
70
|
+
PRIMARY KEY (from_id, to_id, relation)
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON cg_symbols(name);
|
|
74
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_file ON cg_symbols(file_path);
|
|
75
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_kind ON cg_symbols(kind);
|
|
76
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_qualname ON cg_symbols(qualified_name);
|
|
77
|
+
CREATE INDEX IF NOT EXISTS idx_edges_from ON cg_edges(from_id);
|
|
78
|
+
CREATE INDEX IF NOT EXISTS idx_edges_to ON cg_edges(to_id);
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def get_schema(db_url: str) -> str:
|
|
82
|
+
if db_url.startswith("postgres"):
|
|
83
|
+
return PG_SCHEMA
|
|
84
|
+
return SQLITE_SCHEMA
|