code-graph-rag 0.0.79__tar.gz → 0.0.88__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_rag-0.0.88/PKG-INFO +219 -0
- code_graph_rag-0.0.88/PYPI_README.md +160 -0
- code_graph_rag-0.0.88/code_graph_rag.egg-info/PKG-INFO +219 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/SOURCES.txt +1 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/config.py +2 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/constants.py +4 -1
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/exceptions.py +4 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/graph_updater.py +1 -1
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/main.py +2 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/pyproject.toml +7 -2
- code_graph_rag-0.0.79/PKG-INFO +0 -948
- code_graph_rag-0.0.79/code_graph_rag.egg-info/PKG-INFO +0 -948
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/LICENSE +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/README.md +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/cgr/__init__.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/dependency_links.txt +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/entry_points.txt +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/requires.txt +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/top_level.txt +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/__init__.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/cli.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/cli_help.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/cypher_queries.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/decorators.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/embedder.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/graph_loader.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/language_spec.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/logs.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/models.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/parser_loader.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/prompts.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/readme_sections.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/schema_builder.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/schemas.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/tool_errors.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/types_defs.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/unixcoder.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/vector_store.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codec/__init__.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codec/schema_pb2.py +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codec/schema_pb2.pyi +0 -0
- {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/setup.cfg +0 -0
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-graph-rag
|
|
3
|
+
Version: 0.0.88
|
|
4
|
+
Summary: The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Keywords: rag,retrieval-augmented-generation,knowledge-graph,code-analysis,tree-sitter,mcp,mcp-server,llm,graph-database,semantic-search,codebase,memgraph,developer-tools,monorepo
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
13
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Requires-Python: >=3.12
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: loguru>=0.7.3
|
|
21
|
+
Requires-Dist: mcp>=1.21.1
|
|
22
|
+
Requires-Dist: pydantic-ai>=1.27.0
|
|
23
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
24
|
+
Requires-Dist: pymgclient>=1.4.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
26
|
+
Requires-Dist: toml>=0.10.2
|
|
27
|
+
Requires-Dist: tree-sitter-python>=0.23.6
|
|
28
|
+
Requires-Dist: tree-sitter==0.25.0
|
|
29
|
+
Requires-Dist: watchdog>=6.0.0
|
|
30
|
+
Requires-Dist: typer>=0.12.5
|
|
31
|
+
Requires-Dist: rich>=13.7.1
|
|
32
|
+
Requires-Dist: prompt-toolkit>=3.0.0
|
|
33
|
+
Requires-Dist: diff-match-patch>=20241021
|
|
34
|
+
Requires-Dist: click>=8.0.0
|
|
35
|
+
Requires-Dist: protobuf>=5.27.0
|
|
36
|
+
Requires-Dist: defusedxml>=0.7.1
|
|
37
|
+
Requires-Dist: huggingface-hub[hf-xet]>=0.36.0
|
|
38
|
+
Provides-Extra: test
|
|
39
|
+
Requires-Dist: pytest>=8.4.1; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
|
|
41
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
42
|
+
Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
|
|
43
|
+
Requires-Dist: testcontainers>=4.9.0; extra == "test"
|
|
44
|
+
Provides-Extra: treesitter-full
|
|
45
|
+
Requires-Dist: tree-sitter-python>=0.23.6; extra == "treesitter-full"
|
|
46
|
+
Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "treesitter-full"
|
|
47
|
+
Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "treesitter-full"
|
|
48
|
+
Requires-Dist: tree-sitter-rust>=0.24.0; extra == "treesitter-full"
|
|
49
|
+
Requires-Dist: tree-sitter-go>=0.23.4; extra == "treesitter-full"
|
|
50
|
+
Requires-Dist: tree-sitter-scala>=0.24.0; extra == "treesitter-full"
|
|
51
|
+
Requires-Dist: tree-sitter-java>=0.23.5; extra == "treesitter-full"
|
|
52
|
+
Requires-Dist: tree-sitter-cpp>=0.23.0; extra == "treesitter-full"
|
|
53
|
+
Requires-Dist: tree-sitter-lua>=0.0.19; extra == "treesitter-full"
|
|
54
|
+
Provides-Extra: semantic
|
|
55
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == "semantic"
|
|
56
|
+
Requires-Dist: torch>=2.6.0; extra == "semantic"
|
|
57
|
+
Requires-Dist: transformers>=4.0.0; extra == "semantic"
|
|
58
|
+
Dynamic: license-file
|
|
59
|
+
|
|
60
|
+
# Code-Graph-RAG
|
|
61
|
+
|
|
62
|
+
A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization.
|
|
63
|
+
|
|
64
|
+
## Install
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install code-graph-rag
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua):
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install 'code-graph-rag[treesitter-full]'
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
With semantic code search (UniXcoder embeddings):
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install 'code-graph-rag[semantic]'
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Prerequisites
|
|
83
|
+
|
|
84
|
+
- Python 3.12+
|
|
85
|
+
- Docker (for Memgraph)
|
|
86
|
+
- `cmake` (for building pymgclient)
|
|
87
|
+
- `ripgrep` (`rg`) (for shell command text searching)
|
|
88
|
+
|
|
89
|
+
## CLI Quick Start
|
|
90
|
+
|
|
91
|
+
The package installs a `cgr` command.
|
|
92
|
+
|
|
93
|
+
**Start Memgraph, parse a repo, and query it:**
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
docker compose up -d # start Memgraph
|
|
97
|
+
cgr start --repo-path ./my-project \
|
|
98
|
+
--update-graph --clean # parse & launch interactive chat
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Index to protobuf for offline use:**
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
cgr index -o ./index-output --repo-path ./my-project
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Export knowledge graph to JSON:**
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
cgr export -o graph.json
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**AI-guided optimization:**
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
cgr optimize python --repo-path ./my-project
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Run as an MCP server (for Claude Code):**
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
cgr mcp-server
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
**Check your setup:**
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
cgr doctor
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Python SDK
|
|
132
|
+
|
|
133
|
+
The `cgr` package provides short imports for programmatic use.
|
|
134
|
+
|
|
135
|
+
### Load and query an exported graph
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from cgr import load_graph
|
|
139
|
+
|
|
140
|
+
graph = load_graph("graph.json")
|
|
141
|
+
print(graph.summary())
|
|
142
|
+
|
|
143
|
+
functions = graph.find_nodes_by_label("Function")
|
|
144
|
+
for fn in functions[:5]:
|
|
145
|
+
rels = graph.get_relationships_for_node(fn.node_id)
|
|
146
|
+
print(f"{fn.properties['name']}: {len(rels)} relationships")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Query Memgraph with Cypher
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
from cgr import MemgraphIngestor
|
|
153
|
+
|
|
154
|
+
with MemgraphIngestor(host="localhost", port=7687) as db:
|
|
155
|
+
rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10")
|
|
156
|
+
for row in rows:
|
|
157
|
+
print(row)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Generate Cypher from natural language
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
import asyncio
|
|
164
|
+
from cgr import CypherGenerator
|
|
165
|
+
|
|
166
|
+
async def main():
|
|
167
|
+
gen = CypherGenerator()
|
|
168
|
+
cypher = await gen.generate("Find all classes that inherit from BaseModel")
|
|
169
|
+
print(cypher)
|
|
170
|
+
|
|
171
|
+
asyncio.run(main())
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Semantic code search
|
|
175
|
+
|
|
176
|
+
Requires the `semantic` extra.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from cgr import embed_code
|
|
180
|
+
|
|
181
|
+
embedding = embed_code("def authenticate(user, password): ...")
|
|
182
|
+
print(f"Embedding dimension: {len(embedding)}")
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Configuration
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
from cgr import settings
|
|
189
|
+
|
|
190
|
+
settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...")
|
|
191
|
+
settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Environment Variables
|
|
195
|
+
|
|
196
|
+
Configure via `.env` or environment variables:
|
|
197
|
+
|
|
198
|
+
| Variable | Default | Description |
|
|
199
|
+
|----------|---------|-------------|
|
|
200
|
+
| `MEMGRAPH_HOST` | `localhost` | Memgraph hostname |
|
|
201
|
+
| `MEMGRAPH_PORT` | `7687` | Memgraph port |
|
|
202
|
+
| `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` |
|
|
203
|
+
| `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) |
|
|
204
|
+
| `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) |
|
|
205
|
+
| `CYPHER_PROVIDER` | | Provider for Cypher generation |
|
|
206
|
+
| `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) |
|
|
207
|
+
| `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) |
|
|
208
|
+
| `TARGET_REPO_PATH` | `.` | Default repository path |
|
|
209
|
+
|
|
210
|
+
## Documentation
|
|
211
|
+
|
|
212
|
+
Full documentation, architecture details, and contribution guide:
|
|
213
|
+
[docs.code-graph-rag.com](https://docs.code-graph-rag.com)
|
|
214
|
+
|
|
215
|
+
## License
|
|
216
|
+
|
|
217
|
+
MIT
|
|
218
|
+
|
|
219
|
+
<!-- mcp-name: io.github.vitali87/code-graph-rag -->
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Code-Graph-RAG
|
|
2
|
+
|
|
3
|
+
A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install code-graph-rag
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua):
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install 'code-graph-rag[treesitter-full]'
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
With semantic code search (UniXcoder embeddings):
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install 'code-graph-rag[semantic]'
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Prerequisites
|
|
24
|
+
|
|
25
|
+
- Python 3.12+
|
|
26
|
+
- Docker (for Memgraph)
|
|
27
|
+
- `cmake` (for building pymgclient)
|
|
28
|
+
- `ripgrep` (`rg`) (for shell command text searching)
|
|
29
|
+
|
|
30
|
+
## CLI Quick Start
|
|
31
|
+
|
|
32
|
+
The package installs a `cgr` command.
|
|
33
|
+
|
|
34
|
+
**Start Memgraph, parse a repo, and query it:**
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
docker compose up -d # start Memgraph
|
|
38
|
+
cgr start --repo-path ./my-project \
|
|
39
|
+
--update-graph --clean # parse & launch interactive chat
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Index to protobuf for offline use:**
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
cgr index -o ./index-output --repo-path ./my-project
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Export knowledge graph to JSON:**
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
cgr export -o graph.json
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**AI-guided optimization:**
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
cgr optimize python --repo-path ./my-project
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Run as an MCP server (for Claude Code):**
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
cgr mcp-server
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Check your setup:**
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
cgr doctor
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Python SDK
|
|
73
|
+
|
|
74
|
+
The `cgr` package provides short imports for programmatic use.
|
|
75
|
+
|
|
76
|
+
### Load and query an exported graph
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from cgr import load_graph
|
|
80
|
+
|
|
81
|
+
graph = load_graph("graph.json")
|
|
82
|
+
print(graph.summary())
|
|
83
|
+
|
|
84
|
+
functions = graph.find_nodes_by_label("Function")
|
|
85
|
+
for fn in functions[:5]:
|
|
86
|
+
rels = graph.get_relationships_for_node(fn.node_id)
|
|
87
|
+
print(f"{fn.properties['name']}: {len(rels)} relationships")
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Query Memgraph with Cypher
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from cgr import MemgraphIngestor
|
|
94
|
+
|
|
95
|
+
with MemgraphIngestor(host="localhost", port=7687) as db:
|
|
96
|
+
rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10")
|
|
97
|
+
for row in rows:
|
|
98
|
+
print(row)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Generate Cypher from natural language
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import asyncio
|
|
105
|
+
from cgr import CypherGenerator
|
|
106
|
+
|
|
107
|
+
async def main():
|
|
108
|
+
gen = CypherGenerator()
|
|
109
|
+
cypher = await gen.generate("Find all classes that inherit from BaseModel")
|
|
110
|
+
print(cypher)
|
|
111
|
+
|
|
112
|
+
asyncio.run(main())
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Semantic code search
|
|
116
|
+
|
|
117
|
+
Requires the `semantic` extra.
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from cgr import embed_code
|
|
121
|
+
|
|
122
|
+
embedding = embed_code("def authenticate(user, password): ...")
|
|
123
|
+
print(f"Embedding dimension: {len(embedding)}")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Configuration
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from cgr import settings
|
|
130
|
+
|
|
131
|
+
settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...")
|
|
132
|
+
settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Environment Variables
|
|
136
|
+
|
|
137
|
+
Configure via `.env` or environment variables:
|
|
138
|
+
|
|
139
|
+
| Variable | Default | Description |
|
|
140
|
+
|----------|---------|-------------|
|
|
141
|
+
| `MEMGRAPH_HOST` | `localhost` | Memgraph hostname |
|
|
142
|
+
| `MEMGRAPH_PORT` | `7687` | Memgraph port |
|
|
143
|
+
| `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` |
|
|
144
|
+
| `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) |
|
|
145
|
+
| `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) |
|
|
146
|
+
| `CYPHER_PROVIDER` | | Provider for Cypher generation |
|
|
147
|
+
| `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) |
|
|
148
|
+
| `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) |
|
|
149
|
+
| `TARGET_REPO_PATH` | `.` | Default repository path |
|
|
150
|
+
|
|
151
|
+
## Documentation
|
|
152
|
+
|
|
153
|
+
Full documentation, architecture details, and contribution guide:
|
|
154
|
+
[docs.code-graph-rag.com](https://docs.code-graph-rag.com)
|
|
155
|
+
|
|
156
|
+
## License
|
|
157
|
+
|
|
158
|
+
MIT
|
|
159
|
+
|
|
160
|
+
<!-- mcp-name: io.github.vitali87/code-graph-rag -->
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: code-graph-rag
|
|
3
|
+
Version: 0.0.88
|
|
4
|
+
Summary: The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Keywords: rag,retrieval-augmented-generation,knowledge-graph,code-analysis,tree-sitter,mcp,mcp-server,llm,graph-database,semantic-search,codebase,memgraph,developer-tools,monorepo
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
13
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Requires-Python: >=3.12
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: loguru>=0.7.3
|
|
21
|
+
Requires-Dist: mcp>=1.21.1
|
|
22
|
+
Requires-Dist: pydantic-ai>=1.27.0
|
|
23
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
24
|
+
Requires-Dist: pymgclient>=1.4.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
26
|
+
Requires-Dist: toml>=0.10.2
|
|
27
|
+
Requires-Dist: tree-sitter-python>=0.23.6
|
|
28
|
+
Requires-Dist: tree-sitter==0.25.0
|
|
29
|
+
Requires-Dist: watchdog>=6.0.0
|
|
30
|
+
Requires-Dist: typer>=0.12.5
|
|
31
|
+
Requires-Dist: rich>=13.7.1
|
|
32
|
+
Requires-Dist: prompt-toolkit>=3.0.0
|
|
33
|
+
Requires-Dist: diff-match-patch>=20241021
|
|
34
|
+
Requires-Dist: click>=8.0.0
|
|
35
|
+
Requires-Dist: protobuf>=5.27.0
|
|
36
|
+
Requires-Dist: defusedxml>=0.7.1
|
|
37
|
+
Requires-Dist: huggingface-hub[hf-xet]>=0.36.0
|
|
38
|
+
Provides-Extra: test
|
|
39
|
+
Requires-Dist: pytest>=8.4.1; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
|
|
41
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
42
|
+
Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
|
|
43
|
+
Requires-Dist: testcontainers>=4.9.0; extra == "test"
|
|
44
|
+
Provides-Extra: treesitter-full
|
|
45
|
+
Requires-Dist: tree-sitter-python>=0.23.6; extra == "treesitter-full"
|
|
46
|
+
Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "treesitter-full"
|
|
47
|
+
Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "treesitter-full"
|
|
48
|
+
Requires-Dist: tree-sitter-rust>=0.24.0; extra == "treesitter-full"
|
|
49
|
+
Requires-Dist: tree-sitter-go>=0.23.4; extra == "treesitter-full"
|
|
50
|
+
Requires-Dist: tree-sitter-scala>=0.24.0; extra == "treesitter-full"
|
|
51
|
+
Requires-Dist: tree-sitter-java>=0.23.5; extra == "treesitter-full"
|
|
52
|
+
Requires-Dist: tree-sitter-cpp>=0.23.0; extra == "treesitter-full"
|
|
53
|
+
Requires-Dist: tree-sitter-lua>=0.0.19; extra == "treesitter-full"
|
|
54
|
+
Provides-Extra: semantic
|
|
55
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == "semantic"
|
|
56
|
+
Requires-Dist: torch>=2.6.0; extra == "semantic"
|
|
57
|
+
Requires-Dist: transformers>=4.0.0; extra == "semantic"
|
|
58
|
+
Dynamic: license-file
|
|
59
|
+
|
|
60
|
+
# Code-Graph-RAG
|
|
61
|
+
|
|
62
|
+
A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization.
|
|
63
|
+
|
|
64
|
+
## Install
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install code-graph-rag
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua):
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install 'code-graph-rag[treesitter-full]'
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
With semantic code search (UniXcoder embeddings):
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install 'code-graph-rag[semantic]'
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Prerequisites
|
|
83
|
+
|
|
84
|
+
- Python 3.12+
|
|
85
|
+
- Docker (for Memgraph)
|
|
86
|
+
- `cmake` (for building pymgclient)
|
|
87
|
+
- `ripgrep` (`rg`) (for shell command text searching)
|
|
88
|
+
|
|
89
|
+
## CLI Quick Start
|
|
90
|
+
|
|
91
|
+
The package installs a `cgr` command.
|
|
92
|
+
|
|
93
|
+
**Start Memgraph, parse a repo, and query it:**
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
docker compose up -d # start Memgraph
|
|
97
|
+
cgr start --repo-path ./my-project \
|
|
98
|
+
--update-graph --clean # parse & launch interactive chat
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Index to protobuf for offline use:**
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
cgr index -o ./index-output --repo-path ./my-project
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Export knowledge graph to JSON:**
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
cgr export -o graph.json
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**AI-guided optimization:**
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
cgr optimize python --repo-path ./my-project
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Run as an MCP server (for Claude Code):**
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
cgr mcp-server
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
**Check your setup:**
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
cgr doctor
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Python SDK
|
|
132
|
+
|
|
133
|
+
The `cgr` package provides short imports for programmatic use.
|
|
134
|
+
|
|
135
|
+
### Load and query an exported graph
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from cgr import load_graph
|
|
139
|
+
|
|
140
|
+
graph = load_graph("graph.json")
|
|
141
|
+
print(graph.summary())
|
|
142
|
+
|
|
143
|
+
functions = graph.find_nodes_by_label("Function")
|
|
144
|
+
for fn in functions[:5]:
|
|
145
|
+
rels = graph.get_relationships_for_node(fn.node_id)
|
|
146
|
+
print(f"{fn.properties['name']}: {len(rels)} relationships")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Query Memgraph with Cypher
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
from cgr import MemgraphIngestor
|
|
153
|
+
|
|
154
|
+
with MemgraphIngestor(host="localhost", port=7687) as db:
|
|
155
|
+
rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10")
|
|
156
|
+
for row in rows:
|
|
157
|
+
print(row)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Generate Cypher from natural language
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
import asyncio
|
|
164
|
+
from cgr import CypherGenerator
|
|
165
|
+
|
|
166
|
+
async def main():
|
|
167
|
+
gen = CypherGenerator()
|
|
168
|
+
cypher = await gen.generate("Find all classes that inherit from BaseModel")
|
|
169
|
+
print(cypher)
|
|
170
|
+
|
|
171
|
+
asyncio.run(main())
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Semantic code search
|
|
175
|
+
|
|
176
|
+
Requires the `semantic` extra.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from cgr import embed_code
|
|
180
|
+
|
|
181
|
+
embedding = embed_code("def authenticate(user, password): ...")
|
|
182
|
+
print(f"Embedding dimension: {len(embedding)}")
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Configuration
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
from cgr import settings
|
|
189
|
+
|
|
190
|
+
settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...")
|
|
191
|
+
settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Environment Variables
|
|
195
|
+
|
|
196
|
+
Configure via `.env` or environment variables:
|
|
197
|
+
|
|
198
|
+
| Variable | Default | Description |
|
|
199
|
+
|----------|---------|-------------|
|
|
200
|
+
| `MEMGRAPH_HOST` | `localhost` | Memgraph hostname |
|
|
201
|
+
| `MEMGRAPH_PORT` | `7687` | Memgraph port |
|
|
202
|
+
| `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` |
|
|
203
|
+
| `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) |
|
|
204
|
+
| `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) |
|
|
205
|
+
| `CYPHER_PROVIDER` | | Provider for Cypher generation |
|
|
206
|
+
| `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) |
|
|
207
|
+
| `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) |
|
|
208
|
+
| `TARGET_REPO_PATH` | `.` | Default repository path |
|
|
209
|
+
|
|
210
|
+
## Documentation
|
|
211
|
+
|
|
212
|
+
Full documentation, architecture details, and contribution guide:
|
|
213
|
+
[docs.code-graph-rag.com](https://docs.code-graph-rag.com)
|
|
214
|
+
|
|
215
|
+
## License
|
|
216
|
+
|
|
217
|
+
MIT
|
|
218
|
+
|
|
219
|
+
<!-- mcp-name: io.github.vitali87/code-graph-rag -->
|
|
@@ -145,6 +145,8 @@ class AppConfig(BaseSettings):
|
|
|
145
145
|
MEMGRAPH_HOST: str = "localhost"
|
|
146
146
|
MEMGRAPH_PORT: int = 7687
|
|
147
147
|
MEMGRAPH_HTTP_PORT: int = 7444
|
|
148
|
+
MEMGRAPH_USERNAME: str | None = None
|
|
149
|
+
MEMGRAPH_PASSWORD: str | None = None
|
|
148
150
|
LAB_PORT: int = 3000
|
|
149
151
|
MEMGRAPH_BATCH_SIZE: int = 1000
|
|
150
152
|
AGENT_RETRIES: int = 3
|
|
@@ -420,7 +420,7 @@ CYPHER_DEFAULT_LIMIT = 50
|
|
|
420
420
|
CYPHER_QUERY_EMBEDDINGS = """
|
|
421
421
|
MATCH (m:Module)-[:DEFINES]->(n)
|
|
422
422
|
WHERE (n:Function OR n:Method)
|
|
423
|
-
AND m.qualified_name STARTS WITH $project_name + '.'
|
|
423
|
+
AND m.qualified_name STARTS WITH ($project_name + '.')
|
|
424
424
|
RETURN id(n) AS node_id, n.qualified_name AS qualified_name,
|
|
425
425
|
n.start_line AS start_line, n.end_line AS end_line,
|
|
426
426
|
m.path AS path
|
|
@@ -880,8 +880,11 @@ PYINSTALLER_ARG_CLEAN = "--clean"
|
|
|
880
880
|
PYINSTALLER_ARG_COLLECT_ALL = "--collect-all"
|
|
881
881
|
PYINSTALLER_ARG_COLLECT_DATA = "--collect-data"
|
|
882
882
|
PYINSTALLER_ARG_HIDDEN_IMPORT = "--hidden-import"
|
|
883
|
+
PYINSTALLER_ARG_EXCLUDE_MODULE = "--exclude-module"
|
|
883
884
|
PYINSTALLER_ENTRY_POINT = "main.py"
|
|
884
885
|
|
|
886
|
+
PYINSTALLER_EXCLUDED_MODULES = ["logfire", "logfire_api"]
|
|
887
|
+
|
|
885
888
|
# (H) TOML parsing constants
|
|
886
889
|
TOML_KEY_PROJECT = "project"
|
|
887
890
|
TOML_KEY_OPTIONAL_DEPS = "optional-dependencies"
|
|
@@ -48,6 +48,10 @@ LLM_INIT_ORCHESTRATOR = "Failed to initialize RAG Orchestrator: {error}"
|
|
|
48
48
|
# (H) Graph service errors
|
|
49
49
|
BATCH_SIZE = "batch_size must be a positive integer"
|
|
50
50
|
CONN = "Not connected to Memgraph."
|
|
51
|
+
AUTH_INCOMPLETE = (
|
|
52
|
+
"Both username and password are required for authentication. "
|
|
53
|
+
"Either provide both or neither."
|
|
54
|
+
)
|
|
51
55
|
|
|
52
56
|
# (H) Access control errors (used with raise)
|
|
53
57
|
ACCESS_DENIED = "Access denied: Cannot access files outside the project root."
|
|
@@ -369,7 +369,7 @@ class GraphUpdater:
|
|
|
369
369
|
logger.info(ls.PASS_4_EMBEDDINGS)
|
|
370
370
|
|
|
371
371
|
results = self.ingestor.fetch_all(
|
|
372
|
-
cs.CYPHER_QUERY_EMBEDDINGS, {"project_name": self.project_name
|
|
372
|
+
cs.CYPHER_QUERY_EMBEDDINGS, {"project_name": self.project_name}
|
|
373
373
|
)
|
|
374
374
|
|
|
375
375
|
if not results:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "code-graph-rag"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.88"
|
|
4
4
|
description = "The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs"
|
|
5
|
-
readme = "
|
|
5
|
+
readme = "PYPI_README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
7
|
license = "MIT"
|
|
8
8
|
classifiers = [
|
|
@@ -150,6 +150,11 @@ dev = [
|
|
|
150
150
|
"types-toml>=0.10.8.20240310",
|
|
151
151
|
"vulture>=2.14",
|
|
152
152
|
]
|
|
153
|
+
docs = [
|
|
154
|
+
"mkdocs>=1.6.1,<2",
|
|
155
|
+
"mkdocs-material>=9.7.3",
|
|
156
|
+
"mkdocs-minify-plugin>=0.8.0",
|
|
157
|
+
]
|
|
153
158
|
|
|
154
159
|
[tool.bandit]
|
|
155
160
|
exclude_dirs = ["codebase_rag/tests", "scripts"]
|