codegraph-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. codegraph_ai-0.1.0/PKG-INFO +14 -0
  2. codegraph_ai-0.1.0/README.md +346 -0
  3. codegraph_ai-0.1.0/codegraph/__init__.py +1 -0
  4. codegraph_ai-0.1.0/codegraph/__main__.py +5 -0
  5. codegraph_ai-0.1.0/codegraph/adapters/__init__.py +1 -0
  6. codegraph_ai-0.1.0/codegraph/adapters/base.py +38 -0
  7. codegraph_ai-0.1.0/codegraph/adapters/c_adapter.py +520 -0
  8. codegraph_ai-0.1.0/codegraph/adapters/js_adapter.py +556 -0
  9. codegraph_ai-0.1.0/codegraph/adapters/python_adapter.py +337 -0
  10. codegraph_ai-0.1.0/codegraph/analyzer.py +432 -0
  11. codegraph_ai-0.1.0/codegraph/cli.py +463 -0
  12. codegraph_ai-0.1.0/codegraph/core.py +3606 -0
  13. codegraph_ai-0.1.0/codegraph/mcp_server.py +588 -0
  14. codegraph_ai-0.1.0/codegraph/models.py +284 -0
  15. codegraph_ai-0.1.0/codegraph/qa.py +471 -0
  16. codegraph_ai-0.1.0/codegraph_ai.egg-info/PKG-INFO +14 -0
  17. codegraph_ai-0.1.0/codegraph_ai.egg-info/SOURCES.txt +32 -0
  18. codegraph_ai-0.1.0/codegraph_ai.egg-info/dependency_links.txt +1 -0
  19. codegraph_ai-0.1.0/codegraph_ai.egg-info/entry_points.txt +2 -0
  20. codegraph_ai-0.1.0/codegraph_ai.egg-info/requires.txt +11 -0
  21. codegraph_ai-0.1.0/codegraph_ai.egg-info/top_level.txt +1 -0
  22. codegraph_ai-0.1.0/pyproject.toml +33 -0
  23. codegraph_ai-0.1.0/setup.cfg +4 -0
  24. codegraph_ai-0.1.0/tests/test_adapters.py +202 -0
  25. codegraph_ai-0.1.0/tests/test_advanced.py +549 -0
  26. codegraph_ai-0.1.0/tests/test_core_schema.py +36 -0
  27. codegraph_ai-0.1.0/tests/test_cross_locate.py +97 -0
  28. codegraph_ai-0.1.0/tests/test_impact.py +68 -0
  29. codegraph_ai-0.1.0/tests/test_incremental.py +455 -0
  30. codegraph_ai-0.1.0/tests/test_indexing.py +88 -0
  31. codegraph_ai-0.1.0/tests/test_integration.py +195 -0
  32. codegraph_ai-0.1.0/tests/test_js_adapter.py +272 -0
  33. codegraph_ai-0.1.0/tests/test_models.py +85 -0
  34. codegraph_ai-0.1.0/tests/test_similar.py +64 -0
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: codegraph-ai
3
+ Version: 0.1.0
4
+ Summary: Hybrid graph + vector code intelligence powered by NeuG and zvec
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: neug
7
+ Requires-Dist: zvec
8
+ Requires-Dist: tree-sitter-language-pack
9
+ Requires-Dist: sentence-transformers
10
+ Requires-Dist: numpy
11
+ Provides-Extra: server
12
+ Requires-Dist: fastmcp; extra == "server"
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest>=7.0; extra == "dev"
@@ -0,0 +1,346 @@
1
+ # CodeScope
2
+
3
+ Hybrid **graph + vector** code intelligence, powered by [NeuG](https://github.com/GraphScope/neug) (embedded graph database) and [zvec](https://github.com/alibaba/zvec) (embedded vector database).
4
+
5
+ CodeScope indexes source code into a knowledge graph of functions, calls, imports, classes, modules, commits, and semantic embeddings — enabling analyses impossible with grep, LSP, or vector search alone.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install -e .
11
+ ```
12
+
13
+ This installs the `codegraph` CLI. Verify:
14
+
15
+ ```bash
16
+ codegraph --help
17
+ ```
18
+
19
+ Dependencies:
20
+
21
+ | Package | Purpose |
22
+ |---------|---------|
23
+ | `neug` | Embedded graph database (Cypher) |
24
+ | `zvec` | Embedded vector database |
25
+ | `tree-sitter-language-pack` | AST parsing for Python / JavaScript / C |
26
+ | `sentence-transformers` | Local embedding model (`all-MiniLM-L6-v2`) |
27
+ | `numpy` | Vector operations |
28
+
29
+ ## Quick Start
30
+
31
+ ### 1. Index a repository
32
+
33
+ ```bash
34
+ # Index the current directory (auto-detects language)
35
+ codegraph init
36
+
37
+ # Index a specific repo with options
38
+ codegraph init --repo /path/to/project --lang c --commits 500
39
+
40
+ # Index with git history and MODIFIES backfill
41
+ codegraph init --repo . --lang python --commits 1000 --backfill-limit 200
42
+ ```
43
+
44
+ ### 2. Check index status
45
+
46
+ ```bash
47
+ codegraph status
48
+ ```
49
+
50
+ ### 3. Ask questions
51
+
52
+ ```bash
53
+ codegraph query "who calls free_irq?"
54
+ codegraph query "find functions related to memory allocation" --json
55
+ ```
56
+
57
+ ### 4. Generate an architecture analysis report
58
+
59
+ ```bash
60
+ codegraph analyze
61
+ codegraph analyze --output my-report.md
62
+ ```
63
+
64
+ ### 5. Ingest more git history
65
+
66
+ ```bash
67
+ codegraph ingest --commits 2000
68
+ codegraph ingest --backfill-limit 500
69
+ codegraph ingest --commits 1000 --backfill-limit 300 --vectors
70
+ ```
71
+
72
+ ### 6. Start the MCP server (for Cursor, OpenClaw, etc.)
73
+
74
+ ```bash
75
+ codegraph server
76
+ ```
77
+
78
+ ## CLI Reference
79
+
80
+ ```
81
+ codegraph init [--repo PATH] [--db PATH] [--lang LANG] [--commits N] [--backfill-limit N]
82
+ Create a new index for a repository.
83
+ --repo Path to repository (default: .)
84
+ --db Database directory (default: .codegraph)
85
+ --lang Language: auto, python, c, javascript, typescript (default: auto)
86
+ --commits Number of git commits to ingest (default: 0)
87
+ --backfill-limit Max commits to compute MODIFIES edges (default: 0)
88
+
89
+ codegraph open [--db PATH]
90
+ Open an existing index and print a summary.
91
+
92
+ codegraph query "<question>" [--db PATH] [--topk N] [--json]
93
+ Ask a question against the index.
94
+ Supports structural, semantic, historical, and intent queries.
95
+
96
+ codegraph analyze [--db PATH] [--output PATH]
97
+ Generate a comprehensive architecture analysis report (Markdown).
98
+
99
+ codegraph status [--db PATH]
100
+ Show index health, counts, and backfill progress.
101
+
102
+ codegraph ingest [--repo PATH] [--db PATH] [--commits N] [--backfill-limit N] [--vectors]
103
+ Ingest additional git history into an existing index.
104
+ --commits Commits to ingest (default: 500)
105
+ --backfill-limit Max commits to compute MODIFIES edges (default: 0)
106
+ --vectors Rebuild vectors after ingestion
107
+
108
+ codegraph server [--db PATH] [--repo PATH]
109
+ Start the MCP server for AI coding agents.
110
+ ```
111
+
112
+ ## Use with AI Coding Agents
113
+
114
+ ### Claude Code CLI / QwenCode
115
+
116
+ After indexing, AI agents can invoke `codegraph` subcommands directly:
117
+
118
+ ```bash
119
+ # Agent workflow:
120
+ codegraph init --repo . --lang python --commits 200
121
+ codegraph query "what functions handle authentication?"
122
+ codegraph analyze
123
+ ```
124
+
125
+ ### Cursor (MCP Integration)
126
+
127
+ Add to `.cursor/mcp.json`:
128
+
129
+ ```json
130
+ {
131
+ "mcpServers": {
132
+ "codegraph": {
133
+ "command": "codegraph",
134
+ "args": ["server"],
135
+ "env": {
136
+ "CODESCOPE_DB_DIR": "/absolute/path/to/.codegraph",
137
+ "HF_HUB_OFFLINE": "1"
138
+ }
139
+ }
140
+ }
141
+ }
142
+ ```
143
+
144
+ ### Agent Skill
145
+
146
+ A skill file is provided at `skill/codegraph-qa/SKILL.md` that teaches AI agents how to use CodeScope — including the full graph schema, Cypher query patterns, and composition strategies for custom analyses.
147
+
148
+ ## Python API
149
+
150
+ All CLI commands are backed by the `CodeScope` class, which can be used directly:
151
+
152
+ ```python
153
+ from codegraph.core import CodeScope
154
+
155
+ cs = CodeScope(".codegraph")
156
+ cs.index("/path/to/project", languages=["python"])
157
+
158
+ # Structural analysis
159
+ results = cs.impact("execute_query", "return type changed", max_hops=3)
160
+ spots = cs.hotspots(topk=10)
161
+ dead = cs.dead_code()
162
+
163
+ # Architecture intelligence
164
+ bridges = cs.bridge_functions(topk=30)
165
+ layers = cs.layer_discovery(topk=30)
166
+ coupling = cs.module_coupling(topk=10)
167
+ stability = cs.stability_analysis(topk=50)
168
+
169
+ # Semantic search
170
+ results = cs.vector_only_search("error handling pattern", topk=10)
171
+
172
+ # Evolution
173
+ history = cs.change_attribution("sched_fork")
174
+ coupled = cs.co_change("kmalloc")
175
+
176
+ # Report generation
177
+ from codegraph.analyzer import generate_report
178
+ report = generate_report(cs)
179
+
180
+ cs.close()
181
+ ```
182
+
183
+ ## Architecture
184
+
185
+ ```
186
+ Source Code
187
+ |
188
+ v
189
+ +----------+ +---------------------------------+
190
+ |tree-sitter|---->| NeuG (Graph) |
191
+ | parsers | | File, Function, Class, Module |
192
+ | (AST) | | Commit, CALLS, MODIFIES, ... |
193
+ +----------+ +---------------------------------+
194
+ |
195
+ | embeddings +---------------------------------+
196
+ +------------>| zvec (Vector) |
197
+ | Semantic function embeddings |
198
+ +---------------------------------+
199
+ ```
200
+
201
+ ## Graph Schema
202
+
203
+ ```
204
+ Node Tables:
205
+ File(id, path, language, loc, is_external)
206
+ Function(id, name, qualified_name, signature, file_path, is_historical)
207
+ Class(id, name, qualified_name, file_path)
208
+ Module(id, name, path_prefix)
209
+ Commit(id, hash, message, author, timestamp, version_tag)
210
+
211
+ Relationship Tables:
212
+ CALLS(Function -> Function)
213
+ DEFINES_FUNC(File -> Function)
214
+ DEFINES_CLASS(File -> Class)
215
+ HAS_METHOD(Class -> Function)
216
+ IMPORTS(File -> File)
217
+ BELONGS_TO(File -> Module)
218
+ INHERITS(Class -> Class)
219
+ MODIFIES(Commit -> Function) -- requires backfill
220
+ TOUCHES(Commit -> File)
221
+ ```
222
+
223
+ | `version_tag` | Meaning |
224
+ |---------------|---------|
225
+ | `''` (empty) | Commit ingested, MODIFIES edges not yet computed |
226
+ | `'bf'` | Backfill completed, MODIFIES edges have been computed |
227
+
228
+ ## Supported Languages
229
+
230
+ | Language | Adapter | Status |
231
+ |----------|---------|--------|
232
+ | C / C headers | `CAdapter` | Supported |
233
+ | Python | `PythonAdapter` | Supported |
234
+ | JavaScript / TypeScript | `JsAdapter` | Supported |
235
+
236
+ Adding a new language requires implementing `BaseAdapter` (see `codegraph/adapters/base.py`).
237
+
238
+ ## Project Structure
239
+
240
+ ```
241
+ codegraph/
242
+ codegraph/
243
+ cli.py # CLI entry point (codegraph command)
244
+ core.py # CodeScope class — graph + vector orchestration
245
+ analyzer.py # Architecture report generator
246
+ models.py # Data models (dataclasses)
247
+ qa.py # Natural-language Q&A classifier
248
+ mcp_server.py # MCP server (FastMCP)
249
+ __main__.py # python -m codegraph support
250
+ adapters/
251
+ base.py # BaseAdapter interface
252
+ c_adapter.py # C parser (tree-sitter)
253
+ python_adapter.py
254
+ js_adapter.py
255
+ skill/codegraph-qa/ # Agent skill files
256
+ sample_project/ # Demo Python codebase
257
+ tests/ # Unit and integration tests
258
+ pyproject.toml # Package metadata + CLI entry point
259
+ ```
260
+
261
+ ## Troubleshooting
262
+
263
+ ### neug: "Database locked"
264
+
265
+ ```
266
+ RuntimeError: Database locked: The database is already locked for write access
267
+ ```
268
+
269
+ **Cause**: A previous process crashed without releasing the neug write lock.
270
+
271
+ **Fix**:
272
+ ```bash
273
+ rm <db>/graph.db/neugdb.lock
274
+ # Example: rm .codegraph/graph.db/neugdb.lock
275
+ ```
276
+
277
+ The CLI auto-cleans this on startup if it detects a lock issue.
278
+
279
+ ### zvec: "Can't open lock file"
280
+
281
+ ```
282
+ RuntimeError: Can't open lock file
283
+ ```
284
+
285
+ **Cause**: The zvec `LOCK` file was accidentally deleted.
286
+
287
+ **Fix**:
288
+ ```bash
289
+ touch <db>/vectors/LOCK
290
+ # Example: touch .codegraph/vectors/LOCK
291
+ ```
292
+
293
+ ### zvec: "Can't lock read-write collection"
294
+
295
+ ```
296
+ RuntimeError: Can't lock read-write collection
297
+ ```
298
+
299
+ **Cause**: Another process is currently holding the zvec lock.
300
+
301
+ **Fix**:
302
+ ```bash
303
+ # Find and kill the process holding the lock
304
+ lsof <db>/vectors/idmap.0/LOCK
305
+ kill <pid>
306
+ ```
307
+
308
+ ### zvec: "recovery idmap failed"
309
+
310
+ ```
311
+ RuntimeError: recovery idmap failed
312
+ ```
313
+
314
+ **Cause**: Stale RocksDB WAL files left by repeated crashes.
315
+
316
+ **Fix**:
317
+ ```bash
318
+ # Remove empty WAL files from the idmap directory
319
+ find <db>/vectors/idmap.0/ -name "*.log" -size 0 -delete
320
+ # Example: find .codegraph/vectors/idmap.0/ -name "*.log" -size 0 -delete
321
+ ```
322
+
323
+ ### SentenceTransformer: network errors
324
+
325
+ ```
326
+ ConnectionError / ReadTimeoutError during model download
327
+ ```
328
+
329
+ **Cause**: The embedding model needs to be downloaded on first use.
330
+
331
+ **Fix**: Download the model once with internet access, then use offline mode:
332
+ ```bash
333
+ # First run (needs internet):
334
+ python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
335
+
336
+ # All subsequent runs (offline):
337
+ export HF_HUB_OFFLINE=1
338
+ export TRANSFORMERS_OFFLINE=1
339
+ codegraph init --repo .
340
+ ```
341
+
342
+ The CLI sets these environment variables automatically after the model is cached.
343
+
344
+ ## Privacy
345
+
346
+ All processing is fully local. Code never leaves your machine. The embedding model (`all-MiniLM-L6-v2`) runs locally, and both NeuG and zvec are embedded databases with no network requirements.
@@ -0,0 +1 @@
1
+ """CodeScope: Graph + Vector Code Intelligence powered by neug and zvec."""
@@ -0,0 +1,5 @@
1
+ """Allow ``python -m codegraph`` to invoke the CLI."""
2
+
3
+ from codegraph.cli import main
4
+
5
+ main()
@@ -0,0 +1 @@
1
+ """CodeScope language adapters for parsing source code via tree-sitter."""
@@ -0,0 +1,38 @@
1
+ """Abstract base class for language-specific source code adapters."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+
7
+ from codegraph.models import ParseResult
8
+
9
+
10
+ class BaseAdapter(ABC):
11
+ """Parse a single source file and extract structural information."""
12
+
13
+ @abstractmethod
14
+ def language_name(self) -> str:
15
+ """Return the canonical language name, e.g. ``'python'``, ``'c'``."""
16
+ ...
17
+
18
+ @abstractmethod
19
+ def supported_extensions(self) -> list[str]:
20
+ """Return file extensions this adapter handles, e.g. ['.py']."""
21
+ ...
22
+
23
+ @abstractmethod
24
+ def parse_file(self, source: bytes, file_path: str) -> ParseResult:
25
+ """Parse *source* bytes and return structured code elements.
26
+
27
+ Parameters
28
+ ----------
29
+ source:
30
+ Raw bytes of the source file.
31
+ file_path:
32
+ Repository-relative path used to generate unique IDs.
33
+ """
34
+ ...
35
+
36
+ def can_handle(self, file_path: str) -> bool:
37
+ """Return ``True`` if *file_path* has a supported extension."""
38
+ return any(file_path.endswith(ext) for ext in self.supported_extensions())