codegraph-cli 2.1.0__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/PKG-INFO +75 -21
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/README.md +63 -19
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/__init__.py +1 -1
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/agents.py +59 -3
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/chat_agent.py +58 -11
- codegraph_cli-2.1.2/codegraph_cli/cli.py +851 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_chat.py +204 -94
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_diagnose.py +13 -2
- codegraph_cli-2.1.2/codegraph_cli/cli_docs.py +207 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_explore.py +1053 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_export.py +941 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_groups.py +33 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_health.py +316 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_history.py +213 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_onboard.py +380 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_quickstart.py +256 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_refactor.py +17 -3
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_setup.py +12 -12
- codegraph_cli-2.1.2/codegraph_cli/cli_suggestions.py +90 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_test.py +17 -3
- codegraph_cli-2.1.2/codegraph_cli/cli_tui.py +210 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_v2.py +24 -4
- codegraph_cli-2.1.2/codegraph_cli/cli_watch.py +158 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_workflows.py +255 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/codegen_agent.py +15 -1
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/config.py +18 -5
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/context_manager.py +117 -15
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/crew_agents.py +32 -8
- codegraph_cli-2.1.2/codegraph_cli/crew_chat.py +292 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/crew_tools.py +30 -2
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/embeddings.py +95 -5
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/llm.py +42 -55
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/project_context.py +64 -1
- codegraph_cli-2.1.2/codegraph_cli/rag.py +463 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/storage.py +310 -14
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/vector_store.py +110 -8
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/PKG-INFO +75 -21
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/SOURCES.txt +13 -0
- codegraph_cli-2.1.2/codegraph_cli.egg-info/entry_points.txt +2 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/requires.txt +13 -1
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/pyproject.toml +15 -3
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_cli.py +47 -47
- codegraph_cli-2.1.2/tests/test_cli_workflows.py +242 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_vector_store.py +3 -3
- codegraph_cli-2.1.0/codegraph_cli/cli.py +0 -336
- codegraph_cli-2.1.0/codegraph_cli/crew_chat.py +0 -159
- codegraph_cli-2.1.0/codegraph_cli/rag.py +0 -200
- codegraph_cli-2.1.0/codegraph_cli.egg-info/entry_points.txt +0 -2
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/LICENSE +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/bug_detector.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/chat_session.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/config_manager.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/diff_engine.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/graph_export.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/models.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/models_v2.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/orchestrator.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/parser.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/performance_analyzer.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/refactor_agent.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/security_scanner.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/templates/graph_interactive.html +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/testgen_agent.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/validation_engine.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/dependency_links.txt +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/top_level.txt +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/setup.cfg +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_agents.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_bug_detector.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_parser.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_security_scanner.py +0 -0
- {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_storage.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codegraph-cli
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
|
|
5
5
|
Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,22 +35,32 @@ Requires-Dist: tree-sitter>=0.24.0
|
|
|
35
35
|
Requires-Dist: tree-sitter-python>=0.23.0
|
|
36
36
|
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
37
37
|
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
38
|
-
Requires-Dist:
|
|
38
|
+
Requires-Dist: rich>=13.0.0
|
|
39
|
+
Requires-Dist: python-docx>=1.0.0
|
|
40
|
+
Requires-Dist: pydantic>=2.0.0
|
|
39
41
|
Provides-Extra: crew
|
|
40
42
|
Requires-Dist: crewai>=0.80.0; extra == "crew"
|
|
43
|
+
Provides-Extra: explore
|
|
44
|
+
Requires-Dist: starlette>=0.27.0; extra == "explore"
|
|
45
|
+
Requires-Dist: uvicorn>=0.24.0; extra == "explore"
|
|
41
46
|
Provides-Extra: dev
|
|
42
47
|
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
43
48
|
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
44
49
|
Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
|
|
45
50
|
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
46
51
|
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
52
|
+
Provides-Extra: watch
|
|
53
|
+
Requires-Dist: watchdog>=3.0.0; extra == "watch"
|
|
47
54
|
Provides-Extra: embeddings
|
|
48
55
|
Requires-Dist: torch>=2.0.0; extra == "embeddings"
|
|
49
56
|
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
|
|
50
57
|
Provides-Extra: all
|
|
51
58
|
Requires-Dist: crewai>=0.80.0; extra == "all"
|
|
59
|
+
Requires-Dist: starlette>=0.27.0; extra == "all"
|
|
60
|
+
Requires-Dist: uvicorn>=0.24.0; extra == "all"
|
|
52
61
|
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
53
62
|
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
|
|
63
|
+
Requires-Dist: watchdog>=3.0.0; extra == "all"
|
|
54
64
|
Dynamic: license-file
|
|
55
65
|
|
|
56
66
|
# CodeGraph CLI
|
|
@@ -59,7 +69,8 @@ Dynamic: license-file
|
|
|
59
69
|
|
|
60
70
|
[](LICENSE)
|
|
61
71
|
[](https://www.python.org)
|
|
62
|
-
[](https://github.com/al1-nasir/codegraph-cli)
|
|
73
|
+
[](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
|
|
63
74
|
|
|
64
75
|
---
|
|
65
76
|
|
|
@@ -84,12 +95,24 @@ Core capabilities:
|
|
|
84
95
|
pip install codegraph-cli
|
|
85
96
|
```
|
|
86
97
|
|
|
98
|
+
With neural embedding models (semantic code search):
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pip install codegraph-cli[embeddings]
|
|
102
|
+
```
|
|
103
|
+
|
|
87
104
|
With CrewAI multi-agent support:
|
|
88
105
|
|
|
89
106
|
```bash
|
|
90
107
|
pip install codegraph-cli[crew]
|
|
91
108
|
```
|
|
92
109
|
|
|
110
|
+
Everything:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
pip install codegraph-cli[all]
|
|
114
|
+
```
|
|
115
|
+
|
|
93
116
|
For development:
|
|
94
117
|
|
|
95
118
|
```bash
|
|
@@ -105,15 +128,15 @@ pip install -e ".[dev]"
|
|
|
105
128
|
### 1. Configure your LLM provider
|
|
106
129
|
|
|
107
130
|
```bash
|
|
108
|
-
cg setup
|
|
131
|
+
cg config setup
|
|
109
132
|
```
|
|
110
133
|
|
|
111
134
|
This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
|
|
112
135
|
|
|
113
136
|
```bash
|
|
114
|
-
cg set-llm openrouter
|
|
115
|
-
cg set-llm groq
|
|
116
|
-
cg set-llm ollama
|
|
137
|
+
cg config set-llm openrouter
|
|
138
|
+
cg config set-llm groq
|
|
139
|
+
cg config set-llm ollama
|
|
117
140
|
```
|
|
118
141
|
|
|
119
142
|
### 2. Index a project
|
|
@@ -140,18 +163,46 @@ cg chat start --crew # multi-agent mode
|
|
|
140
163
|
|
|
141
164
|
| Provider | Type | Configuration |
|
|
142
165
|
|----------|------|---------------|
|
|
143
|
-
| Ollama | Local, free | `cg set-llm ollama` |
|
|
144
|
-
| Groq | Cloud, free tier | `cg set-llm groq` |
|
|
145
|
-
| OpenAI | Cloud | `cg set-llm openai` |
|
|
146
|
-
| Anthropic | Cloud | `cg set-llm anthropic` |
|
|
147
|
-
| Gemini | Cloud | `cg set-llm gemini` |
|
|
148
|
-
| OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
|
|
166
|
+
| Ollama | Local, free | `cg config set-llm ollama` |
|
|
167
|
+
| Groq | Cloud, free tier | `cg config set-llm groq` |
|
|
168
|
+
| OpenAI | Cloud | `cg config set-llm openai` |
|
|
169
|
+
| Anthropic | Cloud | `cg config set-llm anthropic` |
|
|
170
|
+
| Gemini | Cloud | `cg config set-llm gemini` |
|
|
171
|
+
| OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
|
|
149
172
|
|
|
150
173
|
All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
|
|
151
174
|
|
|
152
175
|
```bash
|
|
153
|
-
cg show-llm # view current provider, model, and endpoint
|
|
154
|
-
cg unset-llm # reset to defaults
|
|
176
|
+
cg config show-llm # view current provider, model, and endpoint
|
|
177
|
+
cg config unset-llm # reset to defaults
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Embedding Models
|
|
183
|
+
|
|
184
|
+
CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
|
|
185
|
+
|
|
186
|
+
| Model | Download | Dim | Quality | Command |
|
|
187
|
+
|-------|----------|-----|---------|---------|
|
|
188
|
+
| hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
|
|
189
|
+
| minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
|
|
190
|
+
| bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
|
|
191
|
+
| jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
|
|
192
|
+
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
|
|
193
|
+
|
|
194
|
+
The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
cg config set-embedding jina-code # switch to a neural model
|
|
198
|
+
cg config show-embedding # view current model and all options
|
|
199
|
+
cg config unset-embedding # reset to hash default
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
After changing the embedding model, re-index your project:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
cg index /path/to/project
|
|
155
206
|
```
|
|
156
207
|
|
|
157
208
|
---
|
|
@@ -252,8 +303,9 @@ CLI Layer (Typer)
|
|
|
252
303
|
| | |
|
|
253
304
|
| +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
|
|
254
305
|
| +-- RAGRetriever |
|
|
255
|
-
| +-- LLM Adapter +-- Embeddings
|
|
256
|
-
|
|
|
306
|
+
| +-- LLM Adapter +-- Embeddings (configurable)
|
|
307
|
+
| hash | minilm | bge-base
|
|
308
|
+
| jina-code | qodo-1.5b
|
|
257
309
|
+-- ChatAgent (standard mode)
|
|
258
310
|
|
|
|
259
311
|
+-- CrewChatAgent (--crew mode)
|
|
@@ -264,6 +316,8 @@ CLI Layer (Typer)
|
|
|
264
316
|
+-- Code Analysis Agent ---> 3 search/analysis tools
|
|
265
317
|
```
|
|
266
318
|
|
|
319
|
+
**Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
320
|
+
|
|
267
321
|
**Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
|
|
268
322
|
|
|
269
323
|
**Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
|
|
@@ -278,14 +332,14 @@ CLI Layer (Typer)
|
|
|
278
332
|
codegraph_cli/
|
|
279
333
|
cli.py # main Typer application, all top-level commands
|
|
280
334
|
cli_chat.py # interactive chat REPL with styled output
|
|
281
|
-
cli_setup.py # setup wizard, set-llm, unset-llm,
|
|
335
|
+
cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
|
|
282
336
|
cli_v2.py # v2 code generation commands
|
|
283
337
|
config.py # loads config from TOML
|
|
284
|
-
config_manager.py # TOML read/write, provider
|
|
338
|
+
config_manager.py # TOML read/write, provider and embedding config
|
|
285
339
|
llm.py # multi-provider LLM adapter
|
|
286
340
|
parser.py # tree-sitter AST parsing
|
|
287
341
|
storage.py # SQLite graph store
|
|
288
|
-
embeddings.py #
|
|
342
|
+
embeddings.py # configurable embedding engine (5 models)
|
|
289
343
|
rag.py # RAG retriever
|
|
290
344
|
vector_store.py # LanceDB vector store
|
|
291
345
|
orchestrator.py # coordinates parsing, search, impact
|
|
@@ -310,7 +364,7 @@ codegraph_cli/
|
|
|
310
364
|
git clone https://github.com/al1-nasir/codegraph-cli.git
|
|
311
365
|
cd codegraph-cli
|
|
312
366
|
python -m venv .venv && source .venv/bin/activate
|
|
313
|
-
pip install -e ".[dev,crew]"
|
|
367
|
+
pip install -e ".[dev,crew,embeddings]"
|
|
314
368
|
pytest
|
|
315
369
|
```
|
|
316
370
|
|
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://www.python.org)
|
|
7
|
-
[](https://github.com/al1-nasir/codegraph-cli)
|
|
8
|
+
[](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
|
|
8
9
|
|
|
9
10
|
---
|
|
10
11
|
|
|
@@ -29,12 +30,24 @@ Core capabilities:
|
|
|
29
30
|
pip install codegraph-cli
|
|
30
31
|
```
|
|
31
32
|
|
|
33
|
+
With neural embedding models (semantic code search):
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install codegraph-cli[embeddings]
|
|
37
|
+
```
|
|
38
|
+
|
|
32
39
|
With CrewAI multi-agent support:
|
|
33
40
|
|
|
34
41
|
```bash
|
|
35
42
|
pip install codegraph-cli[crew]
|
|
36
43
|
```
|
|
37
44
|
|
|
45
|
+
Everything:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install codegraph-cli[all]
|
|
49
|
+
```
|
|
50
|
+
|
|
38
51
|
For development:
|
|
39
52
|
|
|
40
53
|
```bash
|
|
@@ -50,15 +63,15 @@ pip install -e ".[dev]"
|
|
|
50
63
|
### 1. Configure your LLM provider
|
|
51
64
|
|
|
52
65
|
```bash
|
|
53
|
-
cg setup
|
|
66
|
+
cg config setup
|
|
54
67
|
```
|
|
55
68
|
|
|
56
69
|
This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
|
|
57
70
|
|
|
58
71
|
```bash
|
|
59
|
-
cg set-llm openrouter
|
|
60
|
-
cg set-llm groq
|
|
61
|
-
cg set-llm ollama
|
|
72
|
+
cg config set-llm openrouter
|
|
73
|
+
cg config set-llm groq
|
|
74
|
+
cg config set-llm ollama
|
|
62
75
|
```
|
|
63
76
|
|
|
64
77
|
### 2. Index a project
|
|
@@ -85,18 +98,46 @@ cg chat start --crew # multi-agent mode
|
|
|
85
98
|
|
|
86
99
|
| Provider | Type | Configuration |
|
|
87
100
|
|----------|------|---------------|
|
|
88
|
-
| Ollama | Local, free | `cg set-llm ollama` |
|
|
89
|
-
| Groq | Cloud, free tier | `cg set-llm groq` |
|
|
90
|
-
| OpenAI | Cloud | `cg set-llm openai` |
|
|
91
|
-
| Anthropic | Cloud | `cg set-llm anthropic` |
|
|
92
|
-
| Gemini | Cloud | `cg set-llm gemini` |
|
|
93
|
-
| OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
|
|
101
|
+
| Ollama | Local, free | `cg config set-llm ollama` |
|
|
102
|
+
| Groq | Cloud, free tier | `cg config set-llm groq` |
|
|
103
|
+
| OpenAI | Cloud | `cg config set-llm openai` |
|
|
104
|
+
| Anthropic | Cloud | `cg config set-llm anthropic` |
|
|
105
|
+
| Gemini | Cloud | `cg config set-llm gemini` |
|
|
106
|
+
| OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
|
|
94
107
|
|
|
95
108
|
All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
|
|
96
109
|
|
|
97
110
|
```bash
|
|
98
|
-
cg show-llm # view current provider, model, and endpoint
|
|
99
|
-
cg unset-llm # reset to defaults
|
|
111
|
+
cg config show-llm # view current provider, model, and endpoint
|
|
112
|
+
cg config unset-llm # reset to defaults
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Embedding Models
|
|
118
|
+
|
|
119
|
+
CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
|
|
120
|
+
|
|
121
|
+
| Model | Download | Dim | Quality | Command |
|
|
122
|
+
|-------|----------|-----|---------|---------|
|
|
123
|
+
| hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
|
|
124
|
+
| minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
|
|
125
|
+
| bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
|
|
126
|
+
| jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
|
|
127
|
+
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
|
|
128
|
+
|
|
129
|
+
The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
cg config set-embedding jina-code # switch to a neural model
|
|
133
|
+
cg config show-embedding # view current model and all options
|
|
134
|
+
cg config unset-embedding # reset to hash default
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
After changing the embedding model, re-index your project:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
cg index /path/to/project
|
|
100
141
|
```
|
|
101
142
|
|
|
102
143
|
---
|
|
@@ -197,8 +238,9 @@ CLI Layer (Typer)
|
|
|
197
238
|
| | |
|
|
198
239
|
| +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
|
|
199
240
|
| +-- RAGRetriever |
|
|
200
|
-
| +-- LLM Adapter +-- Embeddings
|
|
201
|
-
|
|
|
241
|
+
| +-- LLM Adapter +-- Embeddings (configurable)
|
|
242
|
+
| hash | minilm | bge-base
|
|
243
|
+
| jina-code | qodo-1.5b
|
|
202
244
|
+-- ChatAgent (standard mode)
|
|
203
245
|
|
|
|
204
246
|
+-- CrewChatAgent (--crew mode)
|
|
@@ -209,6 +251,8 @@ CLI Layer (Typer)
|
|
|
209
251
|
+-- Code Analysis Agent ---> 3 search/analysis tools
|
|
210
252
|
```
|
|
211
253
|
|
|
254
|
+
**Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
255
|
+
|
|
212
256
|
**Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
|
|
213
257
|
|
|
214
258
|
**Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
|
|
@@ -223,14 +267,14 @@ CLI Layer (Typer)
|
|
|
223
267
|
codegraph_cli/
|
|
224
268
|
cli.py # main Typer application, all top-level commands
|
|
225
269
|
cli_chat.py # interactive chat REPL with styled output
|
|
226
|
-
cli_setup.py # setup wizard, set-llm, unset-llm,
|
|
270
|
+
cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
|
|
227
271
|
cli_v2.py # v2 code generation commands
|
|
228
272
|
config.py # loads config from TOML
|
|
229
|
-
config_manager.py # TOML read/write, provider
|
|
273
|
+
config_manager.py # TOML read/write, provider and embedding config
|
|
230
274
|
llm.py # multi-provider LLM adapter
|
|
231
275
|
parser.py # tree-sitter AST parsing
|
|
232
276
|
storage.py # SQLite graph store
|
|
233
|
-
embeddings.py #
|
|
277
|
+
embeddings.py # configurable embedding engine (5 models)
|
|
234
278
|
rag.py # RAG retriever
|
|
235
279
|
vector_store.py # LanceDB vector store
|
|
236
280
|
orchestrator.py # coordinates parsing, search, impact
|
|
@@ -255,7 +299,7 @@ codegraph_cli/
|
|
|
255
299
|
git clone https://github.com/al1-nasir/codegraph-cli.git
|
|
256
300
|
cd codegraph-cli
|
|
257
301
|
python -m venv .venv && source .venv/bin/activate
|
|
258
|
-
pip install -e ".[dev,crew]"
|
|
302
|
+
pip install -e ".[dev,crew,embeddings]"
|
|
259
303
|
pytest
|
|
260
304
|
```
|
|
261
305
|
|
|
@@ -2,17 +2,66 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import re
|
|
5
6
|
from collections import deque
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Dict, List, Set
|
|
8
9
|
|
|
9
10
|
from .embeddings import HashEmbeddingModel, TransformerEmbedder
|
|
10
11
|
from .llm import LocalLLM
|
|
11
|
-
from .models import ImpactReport
|
|
12
|
+
from .models import ImpactReport, Node
|
|
12
13
|
from .parser import PythonGraphParser
|
|
13
14
|
from .rag import RAGRetriever
|
|
14
15
|
from .storage import GraphStore
|
|
15
16
|
|
|
17
|
+
# Regex to strip bare import lines from chunk text
|
|
18
|
+
_IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
|
|
19
|
+
|
|
20
|
+
# Maximum characters to keep for a single chunk's code body.
|
|
21
|
+
# Module-level nodes can be very large; truncating keeps embeddings
|
|
22
|
+
# focused on the symbol's signature + docstring + first N lines.
|
|
23
|
+
_MAX_CHUNK_CODE_CHARS = 1500
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _build_chunk_text(node: Node) -> str:
|
|
27
|
+
"""Build structured chunk text for embedding.
|
|
28
|
+
|
|
29
|
+
The text is formatted so that the embedding model captures:
|
|
30
|
+
- **file path** (helps retrieval when users mention filenames)
|
|
31
|
+
- **symbol name + type** (boosts exact-match semantics)
|
|
32
|
+
- **docstring** (captures purpose / intent)
|
|
33
|
+
- **code body** (captures implementation detail)
|
|
34
|
+
|
|
35
|
+
Import lines and decorators-only boilerplate are stripped to
|
|
36
|
+
reduce noise. Module-level nodes are truncated to avoid huge
|
|
37
|
+
embeddings that dilute meaning.
|
|
38
|
+
"""
|
|
39
|
+
parts: List[str] = [
|
|
40
|
+
f"file: {node.file_path}",
|
|
41
|
+
f"symbol: {node.qualname}",
|
|
42
|
+
f"type: {node.node_type}",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if node.docstring:
|
|
46
|
+
parts.append(f"doc: {node.docstring.strip()}")
|
|
47
|
+
|
|
48
|
+
# Clean code: strip import lines for non-module nodes
|
|
49
|
+
code = node.code
|
|
50
|
+
if node.node_type != "module":
|
|
51
|
+
code = _IMPORT_RE.sub("", code).strip()
|
|
52
|
+
else:
|
|
53
|
+
# For modules keep only the first N chars to avoid huge chunks
|
|
54
|
+
code = code[:_MAX_CHUNK_CODE_CHARS]
|
|
55
|
+
|
|
56
|
+
# Truncate overly long code
|
|
57
|
+
if len(code) > _MAX_CHUNK_CODE_CHARS:
|
|
58
|
+
code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
|
|
59
|
+
|
|
60
|
+
if code:
|
|
61
|
+
parts.append(code)
|
|
62
|
+
|
|
63
|
+
return "\n".join(parts)
|
|
64
|
+
|
|
16
65
|
|
|
17
66
|
class GraphAgent:
|
|
18
67
|
"""Responsible for parsing projects and maintaining graph memory."""
|
|
@@ -31,7 +80,7 @@ class GraphAgent:
|
|
|
31
80
|
total_nodes = len(nodes)
|
|
32
81
|
|
|
33
82
|
for idx, node in enumerate(nodes, 1):
|
|
34
|
-
text =
|
|
83
|
+
text = _build_chunk_text(node)
|
|
35
84
|
emb = self.embedding_model.embed_text(text)
|
|
36
85
|
node_payload.append((node, emb))
|
|
37
86
|
|
|
@@ -43,13 +92,20 @@ class GraphAgent:
|
|
|
43
92
|
if show_progress:
|
|
44
93
|
print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
|
|
45
94
|
|
|
46
|
-
self.
|
|
95
|
+
emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
|
|
96
|
+
emb_dim = getattr(self.embedding_model, 'dim', 256)
|
|
97
|
+
|
|
98
|
+
self.store.insert_nodes(node_payload, model_key=emb_model_key)
|
|
47
99
|
self.store.insert_edges(edges)
|
|
100
|
+
|
|
101
|
+
# Record embedding model info in project metadata
|
|
48
102
|
self.store.set_metadata(
|
|
49
103
|
{
|
|
50
104
|
"project_root": str(project_root),
|
|
51
105
|
"node_count": len(nodes),
|
|
52
106
|
"edge_count": len(edges),
|
|
107
|
+
"embedding_model": emb_model_key,
|
|
108
|
+
"embedding_dim": emb_dim,
|
|
53
109
|
}
|
|
54
110
|
)
|
|
55
111
|
return {"nodes": len(nodes), "edges": len(edges)}
|
|
@@ -7,7 +7,7 @@ from typing import Optional
|
|
|
7
7
|
|
|
8
8
|
from .chat_session import SessionManager
|
|
9
9
|
from .codegen_agent import CodeGenAgent
|
|
10
|
-
from .context_manager import assemble_context_for_llm, detect_intent
|
|
10
|
+
from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
|
|
11
11
|
from .llm import LocalLLM
|
|
12
12
|
from .models_v2 import ChatSession, CodeProposal
|
|
13
13
|
from .orchestrator import MCPOrchestrator
|
|
@@ -59,11 +59,60 @@ class ChatAgent:
|
|
|
59
59
|
self.rag_retriever = rag_retriever
|
|
60
60
|
self.session_manager = SessionManager()
|
|
61
61
|
|
|
62
|
+
# Symbol memory — tracks recently discussed symbols & files
|
|
63
|
+
# so we can skip redundant RAG queries.
|
|
64
|
+
self.symbol_memory = SymbolMemory()
|
|
65
|
+
|
|
62
66
|
# Initialize specialized agents
|
|
63
67
|
from .codegen_agent import CodeGenAgent
|
|
64
68
|
from .refactor_agent import RefactorAgent
|
|
65
69
|
self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
|
|
66
70
|
self.refactor_agent = RefactorAgent(context.store)
|
|
71
|
+
|
|
72
|
+
# Build enhanced system prompt with auto-context
|
|
73
|
+
self.system_prompt = self._build_system_prompt()
|
|
74
|
+
|
|
75
|
+
def _build_system_prompt(self) -> str:
|
|
76
|
+
"""Build system prompt enriched with project context.
|
|
77
|
+
|
|
78
|
+
Includes project name, source path, indexed file/symbol counts,
|
|
79
|
+
node-type breakdown, and recently modified files so the LLM has
|
|
80
|
+
immediate awareness of the codebase.
|
|
81
|
+
"""
|
|
82
|
+
base = SYSTEM_PROMPT
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
summary = self.context.get_project_summary()
|
|
86
|
+
parts = [
|
|
87
|
+
"\n\nProject Context:",
|
|
88
|
+
f"- Project: {summary.get('project_name', 'unknown')}",
|
|
89
|
+
f"- Source: {summary.get('source_path', 'N/A')}",
|
|
90
|
+
f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
node_types = summary.get("node_types", {})
|
|
94
|
+
if node_types:
|
|
95
|
+
parts.append(
|
|
96
|
+
f"- Breakdown: {node_types.get('function', 0)} functions, "
|
|
97
|
+
f"{node_types.get('class', 0)} classes, "
|
|
98
|
+
f"{node_types.get('module', 0)} modules"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Recently modified files
|
|
102
|
+
if self.context.has_source_access:
|
|
103
|
+
try:
|
|
104
|
+
items = self.context.list_directory(".")
|
|
105
|
+
files = [f for f in items if f["type"] == "file"]
|
|
106
|
+
files.sort(key=lambda f: f.get("modified", ""), reverse=True)
|
|
107
|
+
recent = [f["name"] for f in files[:5]]
|
|
108
|
+
if recent:
|
|
109
|
+
parts.append(f"- Recently modified: {', '.join(recent)}")
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
return base + "\n".join(parts)
|
|
114
|
+
except Exception:
|
|
115
|
+
return base
|
|
67
116
|
|
|
68
117
|
def process_message(
|
|
69
118
|
self,
|
|
@@ -72,6 +121,10 @@ class ChatAgent:
|
|
|
72
121
|
) -> str:
|
|
73
122
|
"""Process user message and generate response.
|
|
74
123
|
|
|
124
|
+
Note: The caller (REPL) is responsible for adding messages to
|
|
125
|
+
the session. This method does NOT add messages itself to avoid
|
|
126
|
+
duplicate entries.
|
|
127
|
+
|
|
75
128
|
Args:
|
|
76
129
|
user_message: User's message
|
|
77
130
|
session: Current chat session
|
|
@@ -79,10 +132,6 @@ class ChatAgent:
|
|
|
79
132
|
Returns:
|
|
80
133
|
Assistant's response
|
|
81
134
|
"""
|
|
82
|
-
# Add user message to session
|
|
83
|
-
timestamp = datetime.now().isoformat()
|
|
84
|
-
session.add_message("user", user_message, timestamp)
|
|
85
|
-
|
|
86
135
|
# Detect intent
|
|
87
136
|
intent = detect_intent(user_message)
|
|
88
137
|
|
|
@@ -103,9 +152,6 @@ class ChatAgent:
|
|
|
103
152
|
# General chat - use LLM with RAG context
|
|
104
153
|
response = self._handle_chat(user_message, session)
|
|
105
154
|
|
|
106
|
-
# Add assistant response to session
|
|
107
|
-
session.add_message("assistant", response, datetime.now().isoformat())
|
|
108
|
-
|
|
109
155
|
# Save session
|
|
110
156
|
self.session_manager.save_session(session)
|
|
111
157
|
|
|
@@ -289,13 +335,14 @@ class ChatAgent:
|
|
|
289
335
|
|
|
290
336
|
def _handle_chat(self, message: str, session: ChatSession) -> str:
|
|
291
337
|
"""Handle general chat with LLM and RAG context."""
|
|
292
|
-
# Assemble context using smart RAG strategy
|
|
338
|
+
# Assemble context using smart RAG strategy + symbol memory
|
|
293
339
|
context_messages = assemble_context_for_llm(
|
|
294
340
|
user_message=message,
|
|
295
341
|
session=session,
|
|
296
342
|
rag_retriever=self.rag_retriever,
|
|
297
|
-
system_prompt=
|
|
298
|
-
max_tokens=8000
|
|
343
|
+
system_prompt=self.system_prompt,
|
|
344
|
+
max_tokens=8000,
|
|
345
|
+
symbol_memory=self.symbol_memory,
|
|
299
346
|
)
|
|
300
347
|
|
|
301
348
|
# Call LLM
|