code-graph-rag 0.0.79__tar.gz → 0.0.88__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. code_graph_rag-0.0.88/PKG-INFO +219 -0
  2. code_graph_rag-0.0.88/PYPI_README.md +160 -0
  3. code_graph_rag-0.0.88/code_graph_rag.egg-info/PKG-INFO +219 -0
  4. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/SOURCES.txt +1 -0
  5. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/config.py +2 -0
  6. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/constants.py +4 -1
  7. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/exceptions.py +4 -0
  8. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/graph_updater.py +1 -1
  9. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/main.py +2 -0
  10. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/pyproject.toml +7 -2
  11. code_graph_rag-0.0.79/PKG-INFO +0 -948
  12. code_graph_rag-0.0.79/code_graph_rag.egg-info/PKG-INFO +0 -948
  13. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/LICENSE +0 -0
  14. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/README.md +0 -0
  15. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/cgr/__init__.py +0 -0
  16. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/dependency_links.txt +0 -0
  17. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/entry_points.txt +0 -0
  18. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/requires.txt +0 -0
  19. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/code_graph_rag.egg-info/top_level.txt +0 -0
  20. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/__init__.py +0 -0
  21. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/cli.py +0 -0
  22. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/cli_help.py +0 -0
  23. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/cypher_queries.py +0 -0
  24. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/decorators.py +0 -0
  25. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/embedder.py +0 -0
  26. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/graph_loader.py +0 -0
  27. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/language_spec.py +0 -0
  28. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/logs.py +0 -0
  29. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/models.py +0 -0
  30. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/parser_loader.py +0 -0
  31. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/prompts.py +0 -0
  32. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/readme_sections.py +0 -0
  33. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/schema_builder.py +0 -0
  34. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/schemas.py +0 -0
  35. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/tool_errors.py +0 -0
  36. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/types_defs.py +0 -0
  37. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/unixcoder.py +0 -0
  38. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codebase_rag/vector_store.py +0 -0
  39. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codec/__init__.py +0 -0
  40. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codec/schema_pb2.py +0 -0
  41. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/codec/schema_pb2.pyi +0 -0
  42. {code_graph_rag-0.0.79 → code_graph_rag-0.0.88}/setup.cfg +0 -0
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: code-graph-rag
3
+ Version: 0.0.88
4
+ Summary: The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs
5
+ License-Expression: MIT
6
+ Keywords: rag,retrieval-augmented-generation,knowledge-graph,code-analysis,tree-sitter,mcp,mcp-server,llm,graph-database,semantic-search,codebase,memgraph,developer-tools,monorepo
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Environment :: Console
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Software Development :: Code Generators
13
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.12
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: loguru>=0.7.3
21
+ Requires-Dist: mcp>=1.21.1
22
+ Requires-Dist: pydantic-ai>=1.27.0
23
+ Requires-Dist: pydantic-settings>=2.0.0
24
+ Requires-Dist: pymgclient>=1.4.0
25
+ Requires-Dist: python-dotenv>=1.1.0
26
+ Requires-Dist: toml>=0.10.2
27
+ Requires-Dist: tree-sitter-python>=0.23.6
28
+ Requires-Dist: tree-sitter==0.25.0
29
+ Requires-Dist: watchdog>=6.0.0
30
+ Requires-Dist: typer>=0.12.5
31
+ Requires-Dist: rich>=13.7.1
32
+ Requires-Dist: prompt-toolkit>=3.0.0
33
+ Requires-Dist: diff-match-patch>=20241021
34
+ Requires-Dist: click>=8.0.0
35
+ Requires-Dist: protobuf>=5.27.0
36
+ Requires-Dist: defusedxml>=0.7.1
37
+ Requires-Dist: huggingface-hub[hf-xet]>=0.36.0
38
+ Provides-Extra: test
39
+ Requires-Dist: pytest>=8.4.1; extra == "test"
40
+ Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
41
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
42
+ Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
43
+ Requires-Dist: testcontainers>=4.9.0; extra == "test"
44
+ Provides-Extra: treesitter-full
45
+ Requires-Dist: tree-sitter-python>=0.23.6; extra == "treesitter-full"
46
+ Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "treesitter-full"
47
+ Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "treesitter-full"
48
+ Requires-Dist: tree-sitter-rust>=0.24.0; extra == "treesitter-full"
49
+ Requires-Dist: tree-sitter-go>=0.23.4; extra == "treesitter-full"
50
+ Requires-Dist: tree-sitter-scala>=0.24.0; extra == "treesitter-full"
51
+ Requires-Dist: tree-sitter-java>=0.23.5; extra == "treesitter-full"
52
+ Requires-Dist: tree-sitter-cpp>=0.23.0; extra == "treesitter-full"
53
+ Requires-Dist: tree-sitter-lua>=0.0.19; extra == "treesitter-full"
54
+ Provides-Extra: semantic
55
+ Requires-Dist: qdrant-client>=1.9.0; extra == "semantic"
56
+ Requires-Dist: torch>=2.6.0; extra == "semantic"
57
+ Requires-Dist: transformers>=4.0.0; extra == "semantic"
58
+ Dynamic: license-file
59
+
60
+ # Code-Graph-RAG
61
+
62
+ A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization.
63
+
64
+ ## Install
65
+
66
+ ```bash
67
+ pip install code-graph-rag
68
+ ```
69
+
70
+ With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua):
71
+
72
+ ```bash
73
+ pip install 'code-graph-rag[treesitter-full]'
74
+ ```
75
+
76
+ With semantic code search (UniXcoder embeddings):
77
+
78
+ ```bash
79
+ pip install 'code-graph-rag[semantic]'
80
+ ```
81
+
82
+ ### Prerequisites
83
+
84
+ - Python 3.12+
85
+ - Docker (for Memgraph)
86
+ - `cmake` (for building pymgclient)
87
+ - `ripgrep` (`rg`) (for shell command text searching)
88
+
89
+ ## CLI Quick Start
90
+
91
+ The package installs a `cgr` command.
92
+
93
+ **Start Memgraph, parse a repo, and query it:**
94
+
95
+ ```bash
96
+ docker compose up -d # start Memgraph
97
+ cgr start --repo-path ./my-project \
98
+ --update-graph --clean # parse & launch interactive chat
99
+ ```
100
+
101
+ **Index to protobuf for offline use:**
102
+
103
+ ```bash
104
+ cgr index -o ./index-output --repo-path ./my-project
105
+ ```
106
+
107
+ **Export knowledge graph to JSON:**
108
+
109
+ ```bash
110
+ cgr export -o graph.json
111
+ ```
112
+
113
+ **AI-guided optimization:**
114
+
115
+ ```bash
116
+ cgr optimize python --repo-path ./my-project
117
+ ```
118
+
119
+ **Run as an MCP server (for Claude Code):**
120
+
121
+ ```bash
122
+ cgr mcp-server
123
+ ```
124
+
125
+ **Check your setup:**
126
+
127
+ ```bash
128
+ cgr doctor
129
+ ```
130
+
131
+ ## Python SDK
132
+
133
+ The `cgr` package provides short imports for programmatic use.
134
+
135
+ ### Load and query an exported graph
136
+
137
+ ```python
138
+ from cgr import load_graph
139
+
140
+ graph = load_graph("graph.json")
141
+ print(graph.summary())
142
+
143
+ functions = graph.find_nodes_by_label("Function")
144
+ for fn in functions[:5]:
145
+ rels = graph.get_relationships_for_node(fn.node_id)
146
+ print(f"{fn.properties['name']}: {len(rels)} relationships")
147
+ ```
148
+
149
+ ### Query Memgraph with Cypher
150
+
151
+ ```python
152
+ from cgr import MemgraphIngestor
153
+
154
+ with MemgraphIngestor(host="localhost", port=7687) as db:
155
+ rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10")
156
+ for row in rows:
157
+ print(row)
158
+ ```
159
+
160
+ ### Generate Cypher from natural language
161
+
162
+ ```python
163
+ import asyncio
164
+ from cgr import CypherGenerator
165
+
166
+ async def main():
167
+ gen = CypherGenerator()
168
+ cypher = await gen.generate("Find all classes that inherit from BaseModel")
169
+ print(cypher)
170
+
171
+ asyncio.run(main())
172
+ ```
173
+
174
+ ### Semantic code search
175
+
176
+ Requires the `semantic` extra.
177
+
178
+ ```python
179
+ from cgr import embed_code
180
+
181
+ embedding = embed_code("def authenticate(user, password): ...")
182
+ print(f"Embedding dimension: {len(embedding)}")
183
+ ```
184
+
185
+ ### Configuration
186
+
187
+ ```python
188
+ from cgr import settings
189
+
190
+ settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...")
191
+ settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key")
192
+ ```
193
+
194
+ ## Environment Variables
195
+
196
+ Configure via `.env` or environment variables:
197
+
198
+ | Variable | Default | Description |
199
+ |----------|---------|-------------|
200
+ | `MEMGRAPH_HOST` | `localhost` | Memgraph hostname |
201
+ | `MEMGRAPH_PORT` | `7687` | Memgraph port |
202
+ | `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` |
203
+ | `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) |
204
+ | `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) |
205
+ | `CYPHER_PROVIDER` | | Provider for Cypher generation |
206
+ | `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) |
207
+ | `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) |
208
+ | `TARGET_REPO_PATH` | `.` | Default repository path |
209
+
210
+ ## Documentation
211
+
212
+ Full documentation, architecture details, and contribution guide:
213
+ [docs.code-graph-rag.com](https://docs.code-graph-rag.com)
214
+
215
+ ## License
216
+
217
+ MIT
218
+
219
+ <!-- mcp-name: io.github.vitali87/code-graph-rag -->
@@ -0,0 +1,160 @@
1
+ # Code-Graph-RAG
2
+
3
+ A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install code-graph-rag
9
+ ```
10
+
11
+ With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua):
12
+
13
+ ```bash
14
+ pip install 'code-graph-rag[treesitter-full]'
15
+ ```
16
+
17
+ With semantic code search (UniXcoder embeddings):
18
+
19
+ ```bash
20
+ pip install 'code-graph-rag[semantic]'
21
+ ```
22
+
23
+ ### Prerequisites
24
+
25
+ - Python 3.12+
26
+ - Docker (for Memgraph)
27
+ - `cmake` (for building pymgclient)
28
+ - `ripgrep` (`rg`) (for shell command text searching)
29
+
30
+ ## CLI Quick Start
31
+
32
+ The package installs a `cgr` command.
33
+
34
+ **Start Memgraph, parse a repo, and query it:**
35
+
36
+ ```bash
37
+ docker compose up -d # start Memgraph
38
+ cgr start --repo-path ./my-project \
39
+ --update-graph --clean # parse & launch interactive chat
40
+ ```
41
+
42
+ **Index to protobuf for offline use:**
43
+
44
+ ```bash
45
+ cgr index -o ./index-output --repo-path ./my-project
46
+ ```
47
+
48
+ **Export knowledge graph to JSON:**
49
+
50
+ ```bash
51
+ cgr export -o graph.json
52
+ ```
53
+
54
+ **AI-guided optimization:**
55
+
56
+ ```bash
57
+ cgr optimize python --repo-path ./my-project
58
+ ```
59
+
60
+ **Run as an MCP server (for Claude Code):**
61
+
62
+ ```bash
63
+ cgr mcp-server
64
+ ```
65
+
66
+ **Check your setup:**
67
+
68
+ ```bash
69
+ cgr doctor
70
+ ```
71
+
72
+ ## Python SDK
73
+
74
+ The `cgr` package provides short imports for programmatic use.
75
+
76
+ ### Load and query an exported graph
77
+
78
+ ```python
79
+ from cgr import load_graph
80
+
81
+ graph = load_graph("graph.json")
82
+ print(graph.summary())
83
+
84
+ functions = graph.find_nodes_by_label("Function")
85
+ for fn in functions[:5]:
86
+ rels = graph.get_relationships_for_node(fn.node_id)
87
+ print(f"{fn.properties['name']}: {len(rels)} relationships")
88
+ ```
89
+
90
+ ### Query Memgraph with Cypher
91
+
92
+ ```python
93
+ from cgr import MemgraphIngestor
94
+
95
+ with MemgraphIngestor(host="localhost", port=7687) as db:
96
+ rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10")
97
+ for row in rows:
98
+ print(row)
99
+ ```
100
+
101
+ ### Generate Cypher from natural language
102
+
103
+ ```python
104
+ import asyncio
105
+ from cgr import CypherGenerator
106
+
107
+ async def main():
108
+ gen = CypherGenerator()
109
+ cypher = await gen.generate("Find all classes that inherit from BaseModel")
110
+ print(cypher)
111
+
112
+ asyncio.run(main())
113
+ ```
114
+
115
+ ### Semantic code search
116
+
117
+ Requires the `semantic` extra.
118
+
119
+ ```python
120
+ from cgr import embed_code
121
+
122
+ embedding = embed_code("def authenticate(user, password): ...")
123
+ print(f"Embedding dimension: {len(embedding)}")
124
+ ```
125
+
126
+ ### Configuration
127
+
128
+ ```python
129
+ from cgr import settings
130
+
131
+ settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...")
132
+ settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key")
133
+ ```
134
+
135
+ ## Environment Variables
136
+
137
+ Configure via `.env` or environment variables:
138
+
139
+ | Variable | Default | Description |
140
+ |----------|---------|-------------|
141
+ | `MEMGRAPH_HOST` | `localhost` | Memgraph hostname |
142
+ | `MEMGRAPH_PORT` | `7687` | Memgraph port |
143
+ | `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` |
144
+ | `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) |
145
+ | `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) |
146
+ | `CYPHER_PROVIDER` | | Provider for Cypher generation |
147
+ | `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) |
148
+ | `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) |
149
+ | `TARGET_REPO_PATH` | `.` | Default repository path |
150
+
151
+ ## Documentation
152
+
153
+ Full documentation, architecture details, and contribution guide:
154
+ [docs.code-graph-rag.com](https://docs.code-graph-rag.com)
155
+
156
+ ## License
157
+
158
+ MIT
159
+
160
+ <!-- mcp-name: io.github.vitali87/code-graph-rag -->
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: code-graph-rag
3
+ Version: 0.0.88
4
+ Summary: The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs
5
+ License-Expression: MIT
6
+ Keywords: rag,retrieval-augmented-generation,knowledge-graph,code-analysis,tree-sitter,mcp,mcp-server,llm,graph-database,semantic-search,codebase,memgraph,developer-tools,monorepo
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Environment :: Console
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Topic :: Software Development :: Code Generators
13
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.12
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: loguru>=0.7.3
21
+ Requires-Dist: mcp>=1.21.1
22
+ Requires-Dist: pydantic-ai>=1.27.0
23
+ Requires-Dist: pydantic-settings>=2.0.0
24
+ Requires-Dist: pymgclient>=1.4.0
25
+ Requires-Dist: python-dotenv>=1.1.0
26
+ Requires-Dist: toml>=0.10.2
27
+ Requires-Dist: tree-sitter-python>=0.23.6
28
+ Requires-Dist: tree-sitter==0.25.0
29
+ Requires-Dist: watchdog>=6.0.0
30
+ Requires-Dist: typer>=0.12.5
31
+ Requires-Dist: rich>=13.7.1
32
+ Requires-Dist: prompt-toolkit>=3.0.0
33
+ Requires-Dist: diff-match-patch>=20241021
34
+ Requires-Dist: click>=8.0.0
35
+ Requires-Dist: protobuf>=5.27.0
36
+ Requires-Dist: defusedxml>=0.7.1
37
+ Requires-Dist: huggingface-hub[hf-xet]>=0.36.0
38
+ Provides-Extra: test
39
+ Requires-Dist: pytest>=8.4.1; extra == "test"
40
+ Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
41
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
42
+ Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
43
+ Requires-Dist: testcontainers>=4.9.0; extra == "test"
44
+ Provides-Extra: treesitter-full
45
+ Requires-Dist: tree-sitter-python>=0.23.6; extra == "treesitter-full"
46
+ Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "treesitter-full"
47
+ Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "treesitter-full"
48
+ Requires-Dist: tree-sitter-rust>=0.24.0; extra == "treesitter-full"
49
+ Requires-Dist: tree-sitter-go>=0.23.4; extra == "treesitter-full"
50
+ Requires-Dist: tree-sitter-scala>=0.24.0; extra == "treesitter-full"
51
+ Requires-Dist: tree-sitter-java>=0.23.5; extra == "treesitter-full"
52
+ Requires-Dist: tree-sitter-cpp>=0.23.0; extra == "treesitter-full"
53
+ Requires-Dist: tree-sitter-lua>=0.0.19; extra == "treesitter-full"
54
+ Provides-Extra: semantic
55
+ Requires-Dist: qdrant-client>=1.9.0; extra == "semantic"
56
+ Requires-Dist: torch>=2.6.0; extra == "semantic"
57
+ Requires-Dist: transformers>=4.0.0; extra == "semantic"
58
+ Dynamic: license-file
59
+
60
+ # Code-Graph-RAG
61
+
62
+ A graph-based RAG system that parses multi-language codebases with Tree-sitter, builds knowledge graphs in Memgraph, and enables natural language querying, editing, and optimization.
63
+
64
+ ## Install
65
+
66
+ ```bash
67
+ pip install code-graph-rag
68
+ ```
69
+
70
+ With all Tree-sitter grammars (Python, JS, TS, Rust, Go, Java, Scala, C++, Lua):
71
+
72
+ ```bash
73
+ pip install 'code-graph-rag[treesitter-full]'
74
+ ```
75
+
76
+ With semantic code search (UniXcoder embeddings):
77
+
78
+ ```bash
79
+ pip install 'code-graph-rag[semantic]'
80
+ ```
81
+
82
+ ### Prerequisites
83
+
84
+ - Python 3.12+
85
+ - Docker (for Memgraph)
86
+ - `cmake` (for building pymgclient)
87
+ - `ripgrep` (`rg`) (for shell command text searching)
88
+
89
+ ## CLI Quick Start
90
+
91
+ The package installs a `cgr` command.
92
+
93
+ **Start Memgraph, parse a repo, and query it:**
94
+
95
+ ```bash
96
+ docker compose up -d # start Memgraph
97
+ cgr start --repo-path ./my-project \
98
+ --update-graph --clean # parse & launch interactive chat
99
+ ```
100
+
101
+ **Index to protobuf for offline use:**
102
+
103
+ ```bash
104
+ cgr index -o ./index-output --repo-path ./my-project
105
+ ```
106
+
107
+ **Export knowledge graph to JSON:**
108
+
109
+ ```bash
110
+ cgr export -o graph.json
111
+ ```
112
+
113
+ **AI-guided optimization:**
114
+
115
+ ```bash
116
+ cgr optimize python --repo-path ./my-project
117
+ ```
118
+
119
+ **Run as an MCP server (for Claude Code):**
120
+
121
+ ```bash
122
+ cgr mcp-server
123
+ ```
124
+
125
+ **Check your setup:**
126
+
127
+ ```bash
128
+ cgr doctor
129
+ ```
130
+
131
+ ## Python SDK
132
+
133
+ The `cgr` package provides short imports for programmatic use.
134
+
135
+ ### Load and query an exported graph
136
+
137
+ ```python
138
+ from cgr import load_graph
139
+
140
+ graph = load_graph("graph.json")
141
+ print(graph.summary())
142
+
143
+ functions = graph.find_nodes_by_label("Function")
144
+ for fn in functions[:5]:
145
+ rels = graph.get_relationships_for_node(fn.node_id)
146
+ print(f"{fn.properties['name']}: {len(rels)} relationships")
147
+ ```
148
+
149
+ ### Query Memgraph with Cypher
150
+
151
+ ```python
152
+ from cgr import MemgraphIngestor
153
+
154
+ with MemgraphIngestor(host="localhost", port=7687) as db:
155
+ rows = db.fetch_all("MATCH (f:Function) RETURN f.name LIMIT 10")
156
+ for row in rows:
157
+ print(row)
158
+ ```
159
+
160
+ ### Generate Cypher from natural language
161
+
162
+ ```python
163
+ import asyncio
164
+ from cgr import CypherGenerator
165
+
166
+ async def main():
167
+ gen = CypherGenerator()
168
+ cypher = await gen.generate("Find all classes that inherit from BaseModel")
169
+ print(cypher)
170
+
171
+ asyncio.run(main())
172
+ ```
173
+
174
+ ### Semantic code search
175
+
176
+ Requires the `semantic` extra.
177
+
178
+ ```python
179
+ from cgr import embed_code
180
+
181
+ embedding = embed_code("def authenticate(user, password): ...")
182
+ print(f"Embedding dimension: {len(embedding)}")
183
+ ```
184
+
185
+ ### Configuration
186
+
187
+ ```python
188
+ from cgr import settings
189
+
190
+ settings.set_orchestrator("openai", "gpt-4o", api_key="sk-...")
191
+ settings.set_cypher("google", "gemini-2.5-flash", api_key="your-key")
192
+ ```
193
+
194
+ ## Environment Variables
195
+
196
+ Configure via `.env` or environment variables:
197
+
198
+ | Variable | Default | Description |
199
+ |----------|---------|-------------|
200
+ | `MEMGRAPH_HOST` | `localhost` | Memgraph hostname |
201
+ | `MEMGRAPH_PORT` | `7687` | Memgraph port |
202
+ | `ORCHESTRATOR_PROVIDER` | | Provider: `google`, `openai`, `ollama` |
203
+ | `ORCHESTRATOR_MODEL` | | Model ID (e.g. `gpt-4o`, `gemini-2.5-pro`) |
204
+ | `ORCHESTRATOR_API_KEY` | | API key for the provider (not needed for `ollama`) |
205
+ | `CYPHER_PROVIDER` | | Provider for Cypher generation |
206
+ | `CYPHER_MODEL` | | Model ID for Cypher generation (e.g. `codellama`, `gpt-4o-mini`) |
207
+ | `CYPHER_API_KEY` | | API key for Cypher provider (not needed for `ollama`) |
208
+ | `TARGET_REPO_PATH` | `.` | Default repository path |
209
+
210
+ ## Documentation
211
+
212
+ Full documentation, architecture details, and contribution guide:
213
+ [docs.code-graph-rag.com](https://docs.code-graph-rag.com)
214
+
215
+ ## License
216
+
217
+ MIT
218
+
219
+ <!-- mcp-name: io.github.vitali87/code-graph-rag -->
@@ -1,4 +1,5 @@
1
1
  LICENSE
2
+ PYPI_README.md
2
3
  README.md
3
4
  pyproject.toml
4
5
  cgr/__init__.py
@@ -145,6 +145,8 @@ class AppConfig(BaseSettings):
145
145
  MEMGRAPH_HOST: str = "localhost"
146
146
  MEMGRAPH_PORT: int = 7687
147
147
  MEMGRAPH_HTTP_PORT: int = 7444
148
+ MEMGRAPH_USERNAME: str | None = None
149
+ MEMGRAPH_PASSWORD: str | None = None
148
150
  LAB_PORT: int = 3000
149
151
  MEMGRAPH_BATCH_SIZE: int = 1000
150
152
  AGENT_RETRIES: int = 3
@@ -420,7 +420,7 @@ CYPHER_DEFAULT_LIMIT = 50
420
420
  CYPHER_QUERY_EMBEDDINGS = """
421
421
  MATCH (m:Module)-[:DEFINES]->(n)
422
422
  WHERE (n:Function OR n:Method)
423
- AND m.qualified_name STARTS WITH $project_name + '.'
423
+ AND m.qualified_name STARTS WITH ($project_name + '.')
424
424
  RETURN id(n) AS node_id, n.qualified_name AS qualified_name,
425
425
  n.start_line AS start_line, n.end_line AS end_line,
426
426
  m.path AS path
@@ -880,8 +880,11 @@ PYINSTALLER_ARG_CLEAN = "--clean"
880
880
  PYINSTALLER_ARG_COLLECT_ALL = "--collect-all"
881
881
  PYINSTALLER_ARG_COLLECT_DATA = "--collect-data"
882
882
  PYINSTALLER_ARG_HIDDEN_IMPORT = "--hidden-import"
883
+ PYINSTALLER_ARG_EXCLUDE_MODULE = "--exclude-module"
883
884
  PYINSTALLER_ENTRY_POINT = "main.py"
884
885
 
886
+ PYINSTALLER_EXCLUDED_MODULES = ["logfire", "logfire_api"]
887
+
885
888
  # (H) TOML parsing constants
886
889
  TOML_KEY_PROJECT = "project"
887
890
  TOML_KEY_OPTIONAL_DEPS = "optional-dependencies"
@@ -48,6 +48,10 @@ LLM_INIT_ORCHESTRATOR = "Failed to initialize RAG Orchestrator: {error}"
48
48
  # (H) Graph service errors
49
49
  BATCH_SIZE = "batch_size must be a positive integer"
50
50
  CONN = "Not connected to Memgraph."
51
+ AUTH_INCOMPLETE = (
52
+ "Both username and password are required for authentication. "
53
+ "Either provide both or neither."
54
+ )
51
55
 
52
56
  # (H) Access control errors (used with raise)
53
57
  ACCESS_DENIED = "Access denied: Cannot access files outside the project root."
@@ -369,7 +369,7 @@ class GraphUpdater:
369
369
  logger.info(ls.PASS_4_EMBEDDINGS)
370
370
 
371
371
  results = self.ingestor.fetch_all(
372
- cs.CYPHER_QUERY_EMBEDDINGS, {"project_name": self.project_name + "."}
372
+ cs.CYPHER_QUERY_EMBEDDINGS, {"project_name": self.project_name}
373
373
  )
374
374
 
375
375
  if not results:
@@ -752,6 +752,8 @@ def connect_memgraph(batch_size: int) -> MemgraphIngestor:
752
752
  host=settings.MEMGRAPH_HOST,
753
753
  port=settings.MEMGRAPH_PORT,
754
754
  batch_size=batch_size,
755
+ username=settings.MEMGRAPH_USERNAME,
756
+ password=settings.MEMGRAPH_PASSWORD,
755
757
  )
756
758
 
757
759
 
@@ -1,8 +1,8 @@
1
1
  [project]
2
2
  name = "code-graph-rag"
3
- version = "0.0.79"
3
+ version = "0.0.88"
4
4
  description = "The ultimate RAG for your monorepo. Query, understand, and edit multi-language codebases with the power of AI and knowledge graphs"
5
- readme = "README.md"
5
+ readme = "PYPI_README.md"
6
6
  requires-python = ">=3.12"
7
7
  license = "MIT"
8
8
  classifiers = [
@@ -150,6 +150,11 @@ dev = [
150
150
  "types-toml>=0.10.8.20240310",
151
151
  "vulture>=2.14",
152
152
  ]
153
+ docs = [
154
+ "mkdocs>=1.6.1,<2",
155
+ "mkdocs-material>=9.7.3",
156
+ "mkdocs-minify-plugin>=0.8.0",
157
+ ]
153
158
 
154
159
  [tool.bandit]
155
160
  exclude_dirs = ["codebase_rag/tests", "scripts"]