codegraph-ai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_ai-0.1.0/PKG-INFO +14 -0
- codegraph_ai-0.1.0/README.md +346 -0
- codegraph_ai-0.1.0/codegraph/__init__.py +1 -0
- codegraph_ai-0.1.0/codegraph/__main__.py +5 -0
- codegraph_ai-0.1.0/codegraph/adapters/__init__.py +1 -0
- codegraph_ai-0.1.0/codegraph/adapters/base.py +38 -0
- codegraph_ai-0.1.0/codegraph/adapters/c_adapter.py +520 -0
- codegraph_ai-0.1.0/codegraph/adapters/js_adapter.py +556 -0
- codegraph_ai-0.1.0/codegraph/adapters/python_adapter.py +337 -0
- codegraph_ai-0.1.0/codegraph/analyzer.py +432 -0
- codegraph_ai-0.1.0/codegraph/cli.py +463 -0
- codegraph_ai-0.1.0/codegraph/core.py +3606 -0
- codegraph_ai-0.1.0/codegraph/mcp_server.py +588 -0
- codegraph_ai-0.1.0/codegraph/models.py +284 -0
- codegraph_ai-0.1.0/codegraph/qa.py +471 -0
- codegraph_ai-0.1.0/codegraph_ai.egg-info/PKG-INFO +14 -0
- codegraph_ai-0.1.0/codegraph_ai.egg-info/SOURCES.txt +32 -0
- codegraph_ai-0.1.0/codegraph_ai.egg-info/dependency_links.txt +1 -0
- codegraph_ai-0.1.0/codegraph_ai.egg-info/entry_points.txt +2 -0
- codegraph_ai-0.1.0/codegraph_ai.egg-info/requires.txt +11 -0
- codegraph_ai-0.1.0/codegraph_ai.egg-info/top_level.txt +1 -0
- codegraph_ai-0.1.0/pyproject.toml +33 -0
- codegraph_ai-0.1.0/setup.cfg +4 -0
- codegraph_ai-0.1.0/tests/test_adapters.py +202 -0
- codegraph_ai-0.1.0/tests/test_advanced.py +549 -0
- codegraph_ai-0.1.0/tests/test_core_schema.py +36 -0
- codegraph_ai-0.1.0/tests/test_cross_locate.py +97 -0
- codegraph_ai-0.1.0/tests/test_impact.py +68 -0
- codegraph_ai-0.1.0/tests/test_incremental.py +455 -0
- codegraph_ai-0.1.0/tests/test_indexing.py +88 -0
- codegraph_ai-0.1.0/tests/test_integration.py +195 -0
- codegraph_ai-0.1.0/tests/test_js_adapter.py +272 -0
- codegraph_ai-0.1.0/tests/test_models.py +85 -0
- codegraph_ai-0.1.0/tests/test_similar.py +64 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codegraph-ai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Hybrid graph + vector code intelligence powered by NeuG and zvec
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: neug
|
|
7
|
+
Requires-Dist: zvec
|
|
8
|
+
Requires-Dist: tree-sitter-language-pack
|
|
9
|
+
Requires-Dist: sentence-transformers
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Provides-Extra: server
|
|
12
|
+
Requires-Dist: fastmcp; extra == "server"
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
# CodeScope
|
|
2
|
+
|
|
3
|
+
Hybrid **graph + vector** code intelligence, powered by [NeuG](https://github.com/GraphScope/neug) (embedded graph database) and [zvec](https://github.com/alibaba/zvec) (embedded vector database).
|
|
4
|
+
|
|
5
|
+
CodeScope indexes source code into a knowledge graph of functions, calls, imports, classes, modules, commits, and semantic embeddings — enabling analyses impossible with grep, LSP, or vector search alone.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install -e .
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
This installs the `codegraph` CLI. Verify:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
codegraph --help
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Dependencies:
|
|
20
|
+
|
|
21
|
+
| Package | Purpose |
|
|
22
|
+
|---------|---------|
|
|
23
|
+
| `neug` | Embedded graph database (Cypher) |
|
|
24
|
+
| `zvec` | Embedded vector database |
|
|
25
|
+
| `tree-sitter-language-pack` | AST parsing for Python / JavaScript / C |
|
|
26
|
+
| `sentence-transformers` | Local embedding model (`all-MiniLM-L6-v2`) |
|
|
27
|
+
| `numpy` | Vector operations |
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
### 1. Index a repository
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Index the current directory (auto-detects language)
|
|
35
|
+
codegraph init
|
|
36
|
+
|
|
37
|
+
# Index a specific repo with options
|
|
38
|
+
codegraph init --repo /path/to/project --lang c --commits 500
|
|
39
|
+
|
|
40
|
+
# Index with git history and MODIFIES backfill
|
|
41
|
+
codegraph init --repo . --lang python --commits 1000 --backfill-limit 200
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 2. Check index status
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
codegraph status
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 3. Ask questions
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
codegraph query "who calls free_irq?"
|
|
54
|
+
codegraph query "find functions related to memory allocation" --json
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### 4. Generate an architecture analysis report
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
codegraph analyze
|
|
61
|
+
codegraph analyze --output my-report.md
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### 5. Ingest more git history
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
codegraph ingest --commits 2000
|
|
68
|
+
codegraph ingest --backfill-limit 500
|
|
69
|
+
codegraph ingest --commits 1000 --backfill-limit 300 --vectors
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 6. Start the MCP server (for Cursor, OpenClaw, etc.)
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
codegraph server
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## CLI Reference
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
codegraph init [--repo PATH] [--db PATH] [--lang LANG] [--commits N] [--backfill-limit N]
|
|
82
|
+
Create a new index for a repository.
|
|
83
|
+
--repo Path to repository (default: .)
|
|
84
|
+
--db Database directory (default: .codegraph)
|
|
85
|
+
--lang Language: auto, python, c, javascript, typescript (default: auto)
|
|
86
|
+
--commits Number of git commits to ingest (default: 0)
|
|
87
|
+
--backfill-limit Max commits to compute MODIFIES edges (default: 0)
|
|
88
|
+
|
|
89
|
+
codegraph open [--db PATH]
|
|
90
|
+
Open an existing index and print a summary.
|
|
91
|
+
|
|
92
|
+
codegraph query "<question>" [--db PATH] [--topk N] [--json]
|
|
93
|
+
Ask a question against the index.
|
|
94
|
+
Supports structural, semantic, historical, and intent queries.
|
|
95
|
+
|
|
96
|
+
codegraph analyze [--db PATH] [--output PATH]
|
|
97
|
+
Generate a comprehensive architecture analysis report (Markdown).
|
|
98
|
+
|
|
99
|
+
codegraph status [--db PATH]
|
|
100
|
+
Show index health, counts, and backfill progress.
|
|
101
|
+
|
|
102
|
+
codegraph ingest [--repo PATH] [--db PATH] [--commits N] [--backfill-limit N] [--vectors]
|
|
103
|
+
Ingest additional git history into an existing index.
|
|
104
|
+
--commits Commits to ingest (default: 500)
|
|
105
|
+
--backfill-limit Max commits to compute MODIFIES edges (default: 0)
|
|
106
|
+
--vectors Rebuild vectors after ingestion
|
|
107
|
+
|
|
108
|
+
codegraph server [--db PATH] [--repo PATH]
|
|
109
|
+
Start the MCP server for AI coding agents.
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Use with AI Coding Agents
|
|
113
|
+
|
|
114
|
+
### Claude Code CLI / QwenCode
|
|
115
|
+
|
|
116
|
+
After indexing, AI agents can invoke `codegraph` subcommands directly:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Agent workflow:
|
|
120
|
+
codegraph init --repo . --lang python --commits 200
|
|
121
|
+
codegraph query "what functions handle authentication?"
|
|
122
|
+
codegraph analyze
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Cursor (MCP Integration)
|
|
126
|
+
|
|
127
|
+
Add to `.cursor/mcp.json`:
|
|
128
|
+
|
|
129
|
+
```json
|
|
130
|
+
{
|
|
131
|
+
"mcpServers": {
|
|
132
|
+
"codegraph": {
|
|
133
|
+
"command": "codegraph",
|
|
134
|
+
"args": ["server"],
|
|
135
|
+
"env": {
|
|
136
|
+
"CODESCOPE_DB_DIR": "/absolute/path/to/.codegraph",
|
|
137
|
+
"HF_HUB_OFFLINE": "1"
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Agent Skill
|
|
145
|
+
|
|
146
|
+
A skill file is provided at `skill/codegraph-qa/SKILL.md` that teaches AI agents how to use CodeScope — including the full graph schema, Cypher query patterns, and composition strategies for custom analyses.
|
|
147
|
+
|
|
148
|
+
## Python API
|
|
149
|
+
|
|
150
|
+
All CLI commands are backed by the `CodeScope` class, which can be used directly:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from codegraph.core import CodeScope
|
|
154
|
+
|
|
155
|
+
cs = CodeScope(".codegraph")
|
|
156
|
+
cs.index("/path/to/project", languages=["python"])
|
|
157
|
+
|
|
158
|
+
# Structural analysis
|
|
159
|
+
results = cs.impact("execute_query", "return type changed", max_hops=3)
|
|
160
|
+
spots = cs.hotspots(topk=10)
|
|
161
|
+
dead = cs.dead_code()
|
|
162
|
+
|
|
163
|
+
# Architecture intelligence
|
|
164
|
+
bridges = cs.bridge_functions(topk=30)
|
|
165
|
+
layers = cs.layer_discovery(topk=30)
|
|
166
|
+
coupling = cs.module_coupling(topk=10)
|
|
167
|
+
stability = cs.stability_analysis(topk=50)
|
|
168
|
+
|
|
169
|
+
# Semantic search
|
|
170
|
+
results = cs.vector_only_search("error handling pattern", topk=10)
|
|
171
|
+
|
|
172
|
+
# Evolution
|
|
173
|
+
history = cs.change_attribution("sched_fork")
|
|
174
|
+
coupled = cs.co_change("kmalloc")
|
|
175
|
+
|
|
176
|
+
# Report generation
|
|
177
|
+
from codegraph.analyzer import generate_report
|
|
178
|
+
report = generate_report(cs)
|
|
179
|
+
|
|
180
|
+
cs.close()
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Architecture
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
Source Code
|
|
187
|
+
|
|
|
188
|
+
v
|
|
189
|
+
+----------+ +---------------------------------+
|
|
190
|
+
|tree-sitter|---->| NeuG (Graph) |
|
|
191
|
+
| parsers | | File, Function, Class, Module |
|
|
192
|
+
| (AST) | | Commit, CALLS, MODIFIES, ... |
|
|
193
|
+
+----------+ +---------------------------------+
|
|
194
|
+
|
|
|
195
|
+
| embeddings +---------------------------------+
|
|
196
|
+
+------------>| zvec (Vector) |
|
|
197
|
+
| Semantic function embeddings |
|
|
198
|
+
+---------------------------------+
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Graph Schema
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
Node Tables:
|
|
205
|
+
File(id, path, language, loc, is_external)
|
|
206
|
+
Function(id, name, qualified_name, signature, file_path, is_historical)
|
|
207
|
+
Class(id, name, qualified_name, file_path)
|
|
208
|
+
Module(id, name, path_prefix)
|
|
209
|
+
Commit(id, hash, message, author, timestamp, version_tag)
|
|
210
|
+
|
|
211
|
+
Relationship Tables:
|
|
212
|
+
CALLS(Function -> Function)
|
|
213
|
+
DEFINES_FUNC(File -> Function)
|
|
214
|
+
DEFINES_CLASS(File -> Class)
|
|
215
|
+
HAS_METHOD(Class -> Function)
|
|
216
|
+
IMPORTS(File -> File)
|
|
217
|
+
BELONGS_TO(File -> Module)
|
|
218
|
+
INHERITS(Class -> Class)
|
|
219
|
+
MODIFIES(Commit -> Function) -- requires backfill
|
|
220
|
+
TOUCHES(Commit -> File)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
| `version_tag` | Meaning |
|
|
224
|
+
|---------------|---------|
|
|
225
|
+
| `''` (empty) | Commit ingested, MODIFIES edges not yet computed |
|
|
226
|
+
| `'bf'` | Backfill completed, MODIFIES edges have been computed |
|
|
227
|
+
|
|
228
|
+
## Supported Languages
|
|
229
|
+
|
|
230
|
+
| Language | Adapter | Status |
|
|
231
|
+
|----------|---------|--------|
|
|
232
|
+
| C / C headers | `CAdapter` | Supported |
|
|
233
|
+
| Python | `PythonAdapter` | Supported |
|
|
234
|
+
| JavaScript / TypeScript | `JsAdapter` | Supported |
|
|
235
|
+
|
|
236
|
+
Adding a new language requires implementing `BaseAdapter` (see `codegraph/adapters/base.py`).
|
|
237
|
+
|
|
238
|
+
## Project Structure
|
|
239
|
+
|
|
240
|
+
```
|
|
241
|
+
codegraph/
|
|
242
|
+
codegraph/
|
|
243
|
+
cli.py # CLI entry point (codegraph command)
|
|
244
|
+
core.py # CodeScope class — graph + vector orchestration
|
|
245
|
+
analyzer.py # Architecture report generator
|
|
246
|
+
models.py # Data models (dataclasses)
|
|
247
|
+
qa.py # Natural-language Q&A classifier
|
|
248
|
+
mcp_server.py # MCP server (FastMCP)
|
|
249
|
+
__main__.py # python -m codegraph support
|
|
250
|
+
adapters/
|
|
251
|
+
base.py # BaseAdapter interface
|
|
252
|
+
c_adapter.py # C parser (tree-sitter)
|
|
253
|
+
python_adapter.py
|
|
254
|
+
js_adapter.py
|
|
255
|
+
skill/codegraph-qa/ # Agent skill files
|
|
256
|
+
sample_project/ # Demo Python codebase
|
|
257
|
+
tests/ # Unit and integration tests
|
|
258
|
+
pyproject.toml # Package metadata + CLI entry point
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Troubleshooting
|
|
262
|
+
|
|
263
|
+
### neug: "Database locked"
|
|
264
|
+
|
|
265
|
+
```
|
|
266
|
+
RuntimeError: Database locked: The database is already locked for write access
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
**Cause**: A previous process crashed without releasing the neug write lock.
|
|
270
|
+
|
|
271
|
+
**Fix**:
|
|
272
|
+
```bash
|
|
273
|
+
rm <db>/graph.db/neugdb.lock
|
|
274
|
+
# Example: rm .codegraph/graph.db/neugdb.lock
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
The CLI auto-cleans this on startup if it detects a lock issue.
|
|
278
|
+
|
|
279
|
+
### zvec: "Can't open lock file"
|
|
280
|
+
|
|
281
|
+
```
|
|
282
|
+
RuntimeError: Can't open lock file
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
**Cause**: The zvec `LOCK` file was accidentally deleted.
|
|
286
|
+
|
|
287
|
+
**Fix**:
|
|
288
|
+
```bash
|
|
289
|
+
touch <db>/vectors/LOCK
|
|
290
|
+
# Example: touch .codegraph/vectors/LOCK
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### zvec: "Can't lock read-write collection"
|
|
294
|
+
|
|
295
|
+
```
|
|
296
|
+
RuntimeError: Can't lock read-write collection
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
**Cause**: Another process is currently holding the zvec lock.
|
|
300
|
+
|
|
301
|
+
**Fix**:
|
|
302
|
+
```bash
|
|
303
|
+
# Find and kill the process holding the lock
|
|
304
|
+
lsof <db>/vectors/idmap.0/LOCK
|
|
305
|
+
kill <pid>
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### zvec: "recovery idmap failed"
|
|
309
|
+
|
|
310
|
+
```
|
|
311
|
+
RuntimeError: recovery idmap failed
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
**Cause**: Stale RocksDB WAL files left by repeated crashes.
|
|
315
|
+
|
|
316
|
+
**Fix**:
|
|
317
|
+
```bash
|
|
318
|
+
# Remove empty WAL files from the idmap directory
|
|
319
|
+
find <db>/vectors/idmap.0/ -name "*.log" -size 0 -delete
|
|
320
|
+
# Example: find .codegraph/vectors/idmap.0/ -name "*.log" -size 0 -delete
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### SentenceTransformer: network errors
|
|
324
|
+
|
|
325
|
+
```
|
|
326
|
+
ConnectionError / ReadTimeoutError during model download
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
**Cause**: The embedding model needs to be downloaded on first use.
|
|
330
|
+
|
|
331
|
+
**Fix**: Download the model once with internet access, then use offline mode:
|
|
332
|
+
```bash
|
|
333
|
+
# First run (needs internet):
|
|
334
|
+
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
|
|
335
|
+
|
|
336
|
+
# All subsequent runs (offline):
|
|
337
|
+
export HF_HUB_OFFLINE=1
|
|
338
|
+
export TRANSFORMERS_OFFLINE=1
|
|
339
|
+
codegraph init --repo .
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
The CLI sets these environment variables automatically after the model is cached.
|
|
343
|
+
|
|
344
|
+
## Privacy
|
|
345
|
+
|
|
346
|
+
All processing is fully local. Code never leaves your machine. The embedding model (`all-MiniLM-L6-v2`) runs locally, and both NeuG and zvec are embedded databases with no network requirements.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CodeScope: Graph + Vector Code Intelligence powered by neug and zvec."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CodeScope language adapters for parsing source code via tree-sitter."""
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Abstract base class for language-specific source code adapters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
from codegraph.models import ParseResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseAdapter(ABC):
|
|
11
|
+
"""Parse a single source file and extract structural information."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def language_name(self) -> str:
|
|
15
|
+
"""Return the canonical language name, e.g. ``'python'``, ``'c'``."""
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def supported_extensions(self) -> list[str]:
|
|
20
|
+
"""Return file extensions this adapter handles, e.g. ['.py']."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def parse_file(self, source: bytes, file_path: str) -> ParseResult:
|
|
25
|
+
"""Parse *source* bytes and return structured code elements.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
source:
|
|
30
|
+
Raw bytes of the source file.
|
|
31
|
+
file_path:
|
|
32
|
+
Repository-relative path used to generate unique IDs.
|
|
33
|
+
"""
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
def can_handle(self, file_path: str) -> bool:
|
|
37
|
+
"""Return ``True`` if *file_path* has a supported extension."""
|
|
38
|
+
return any(file_path.endswith(ext) for ext in self.supported_extensions())
|