codegraph-cli 2.0.0__tar.gz → 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/PKG-INFO +57 -11
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/README.md +50 -7
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/__init__.py +1 -1
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/agents.py +1 -1
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli.py +6 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_chat.py +9 -4
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_setup.py +158 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/config.py +6 -1
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/config_manager.py +70 -20
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/context_manager.py +1 -1
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/crew_agents.py +6 -1
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/crew_chat.py +5 -1
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/crew_tools.py +9 -1
- codegraph_cli-2.1.1/codegraph_cli/embeddings.py +409 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/orchestrator.py +2 -2
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/rag.py +3 -3
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/PKG-INFO +57 -11
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/requires.txt +6 -2
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/pyproject.toml +9 -4
- codegraph_cli-2.0.0/codegraph_cli/embeddings.py +0 -241
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/LICENSE +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/bug_detector.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/chat_agent.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/chat_session.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_diagnose.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_refactor.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_test.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_v2.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/codegen_agent.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/diff_engine.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/graph_export.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/llm.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/models.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/models_v2.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/parser.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/performance_analyzer.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/project_context.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/refactor_agent.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/security_scanner.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/storage.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/templates/graph_interactive.html +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/testgen_agent.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/validation_engine.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/vector_store.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/SOURCES.txt +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/dependency_links.txt +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/entry_points.txt +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/top_level.txt +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/setup.cfg +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_agents.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_bug_detector.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_cli.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_parser.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_security_scanner.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_storage.py +0 -0
- {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_vector_store.py +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codegraph-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.1
|
|
4
4
|
Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
|
|
5
|
-
Author-email: Ali Nasir <
|
|
5
|
+
Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/al1-nasir/codegraph-cli
|
|
8
8
|
Project-URL: Documentation, https://github.com/al1-nasir/codegraph-cli#readme
|
|
@@ -31,7 +31,6 @@ Requires-Dist: typer<1.0.0,>=0.12.0
|
|
|
31
31
|
Requires-Dist: toml>=0.10.2
|
|
32
32
|
Requires-Dist: lancedb>=0.4.0
|
|
33
33
|
Requires-Dist: pyarrow>=14.0.0
|
|
34
|
-
Requires-Dist: sentence-transformers>=2.2.0
|
|
35
34
|
Requires-Dist: tree-sitter>=0.24.0
|
|
36
35
|
Requires-Dist: tree-sitter-python>=0.23.0
|
|
37
36
|
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
@@ -45,9 +44,13 @@ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
|
45
44
|
Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
|
|
46
45
|
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
47
46
|
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
47
|
+
Provides-Extra: embeddings
|
|
48
|
+
Requires-Dist: torch>=2.0.0; extra == "embeddings"
|
|
49
|
+
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
|
|
48
50
|
Provides-Extra: all
|
|
49
51
|
Requires-Dist: crewai>=0.80.0; extra == "all"
|
|
50
|
-
Requires-Dist:
|
|
52
|
+
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
53
|
+
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
|
|
51
54
|
Dynamic: license-file
|
|
52
55
|
|
|
53
56
|
# CodeGraph CLI
|
|
@@ -56,7 +59,7 @@ Dynamic: license-file
|
|
|
56
59
|
|
|
57
60
|
[](LICENSE)
|
|
58
61
|
[](https://www.python.org)
|
|
59
|
-
[](https://github.com/al1-nasir/codegraph-cli)
|
|
60
63
|
|
|
61
64
|
---
|
|
62
65
|
|
|
@@ -81,12 +84,24 @@ Core capabilities:
|
|
|
81
84
|
pip install codegraph-cli
|
|
82
85
|
```
|
|
83
86
|
|
|
87
|
+
With neural embedding models (semantic code search):
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install codegraph-cli[embeddings]
|
|
91
|
+
```
|
|
92
|
+
|
|
84
93
|
With CrewAI multi-agent support:
|
|
85
94
|
|
|
86
95
|
```bash
|
|
87
96
|
pip install codegraph-cli[crew]
|
|
88
97
|
```
|
|
89
98
|
|
|
99
|
+
Everything:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install codegraph-cli[all]
|
|
103
|
+
```
|
|
104
|
+
|
|
90
105
|
For development:
|
|
91
106
|
|
|
92
107
|
```bash
|
|
@@ -153,6 +168,34 @@ cg unset-llm # reset to defaults
|
|
|
153
168
|
|
|
154
169
|
---
|
|
155
170
|
|
|
171
|
+
## Embedding Models
|
|
172
|
+
|
|
173
|
+
CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
|
|
174
|
+
|
|
175
|
+
| Model | Download | Dim | Quality | Command |
|
|
176
|
+
|-------|----------|-----|---------|---------|
|
|
177
|
+
| hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
|
|
178
|
+
| minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
|
|
179
|
+
| bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
|
|
180
|
+
| jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
|
|
181
|
+
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
|
|
182
|
+
|
|
183
|
+
The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
cg set-embedding jina-code # switch to a neural model
|
|
187
|
+
cg show-embedding # view current model and all options
|
|
188
|
+
cg unset-embedding # reset to hash default
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
After changing the embedding model, re-index your project:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
cg index /path/to/project
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
156
199
|
## Commands
|
|
157
200
|
|
|
158
201
|
### Project Management
|
|
@@ -249,8 +292,9 @@ CLI Layer (Typer)
|
|
|
249
292
|
| | |
|
|
250
293
|
| +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
|
|
251
294
|
| +-- RAGRetriever |
|
|
252
|
-
| +-- LLM Adapter +-- Embeddings
|
|
253
|
-
|
|
|
295
|
+
| +-- LLM Adapter +-- Embeddings (configurable)
|
|
296
|
+
| hash | minilm | bge-base
|
|
297
|
+
| jina-code | qodo-1.5b
|
|
254
298
|
+-- ChatAgent (standard mode)
|
|
255
299
|
|
|
|
256
300
|
+-- CrewChatAgent (--crew mode)
|
|
@@ -261,6 +305,8 @@ CLI Layer (Typer)
|
|
|
261
305
|
+-- Code Analysis Agent ---> 3 search/analysis tools
|
|
262
306
|
```
|
|
263
307
|
|
|
308
|
+
**Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
309
|
+
|
|
264
310
|
**Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
|
|
265
311
|
|
|
266
312
|
**Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
|
|
@@ -275,14 +321,14 @@ CLI Layer (Typer)
|
|
|
275
321
|
codegraph_cli/
|
|
276
322
|
cli.py # main Typer application, all top-level commands
|
|
277
323
|
cli_chat.py # interactive chat REPL with styled output
|
|
278
|
-
cli_setup.py # setup wizard, set-llm, unset-llm,
|
|
324
|
+
cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
|
|
279
325
|
cli_v2.py # v2 code generation commands
|
|
280
326
|
config.py # loads config from TOML
|
|
281
|
-
config_manager.py # TOML read/write, provider
|
|
327
|
+
config_manager.py # TOML read/write, provider and embedding config
|
|
282
328
|
llm.py # multi-provider LLM adapter
|
|
283
329
|
parser.py # tree-sitter AST parsing
|
|
284
330
|
storage.py # SQLite graph store
|
|
285
|
-
embeddings.py #
|
|
331
|
+
embeddings.py # configurable embedding engine (5 models)
|
|
286
332
|
rag.py # RAG retriever
|
|
287
333
|
vector_store.py # LanceDB vector store
|
|
288
334
|
orchestrator.py # coordinates parsing, search, impact
|
|
@@ -307,7 +353,7 @@ codegraph_cli/
|
|
|
307
353
|
git clone https://github.com/al1-nasir/codegraph-cli.git
|
|
308
354
|
cd codegraph-cli
|
|
309
355
|
python -m venv .venv && source .venv/bin/activate
|
|
310
|
-
pip install -e ".[dev,crew]"
|
|
356
|
+
pip install -e ".[dev,crew,embeddings]"
|
|
311
357
|
pytest
|
|
312
358
|
```
|
|
313
359
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://www.python.org)
|
|
7
|
-
[](https://github.com/al1-nasir/codegraph-cli)
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
@@ -29,12 +29,24 @@ Core capabilities:
|
|
|
29
29
|
pip install codegraph-cli
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
+
With neural embedding models (semantic code search):
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install codegraph-cli[embeddings]
|
|
36
|
+
```
|
|
37
|
+
|
|
32
38
|
With CrewAI multi-agent support:
|
|
33
39
|
|
|
34
40
|
```bash
|
|
35
41
|
pip install codegraph-cli[crew]
|
|
36
42
|
```
|
|
37
43
|
|
|
44
|
+
Everything:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install codegraph-cli[all]
|
|
48
|
+
```
|
|
49
|
+
|
|
38
50
|
For development:
|
|
39
51
|
|
|
40
52
|
```bash
|
|
@@ -101,6 +113,34 @@ cg unset-llm # reset to defaults
|
|
|
101
113
|
|
|
102
114
|
---
|
|
103
115
|
|
|
116
|
+
## Embedding Models
|
|
117
|
+
|
|
118
|
+
CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
|
|
119
|
+
|
|
120
|
+
| Model | Download | Dim | Quality | Command |
|
|
121
|
+
|-------|----------|-----|---------|---------|
|
|
122
|
+
| hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
|
|
123
|
+
| minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
|
|
124
|
+
| bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
|
|
125
|
+
| jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
|
|
126
|
+
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
|
|
127
|
+
|
|
128
|
+
The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
cg set-embedding jina-code # switch to a neural model
|
|
132
|
+
cg show-embedding # view current model and all options
|
|
133
|
+
cg unset-embedding # reset to hash default
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
After changing the embedding model, re-index your project:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
cg index /path/to/project
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
104
144
|
## Commands
|
|
105
145
|
|
|
106
146
|
### Project Management
|
|
@@ -197,8 +237,9 @@ CLI Layer (Typer)
|
|
|
197
237
|
| | |
|
|
198
238
|
| +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
|
|
199
239
|
| +-- RAGRetriever |
|
|
200
|
-
| +-- LLM Adapter +-- Embeddings
|
|
201
|
-
|
|
|
240
|
+
| +-- LLM Adapter +-- Embeddings (configurable)
|
|
241
|
+
| hash | minilm | bge-base
|
|
242
|
+
| jina-code | qodo-1.5b
|
|
202
243
|
+-- ChatAgent (standard mode)
|
|
203
244
|
|
|
|
204
245
|
+-- CrewChatAgent (--crew mode)
|
|
@@ -209,6 +250,8 @@ CLI Layer (Typer)
|
|
|
209
250
|
+-- Code Analysis Agent ---> 3 search/analysis tools
|
|
210
251
|
```
|
|
211
252
|
|
|
253
|
+
**Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
254
|
+
|
|
212
255
|
**Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
|
|
213
256
|
|
|
214
257
|
**Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
|
|
@@ -223,14 +266,14 @@ CLI Layer (Typer)
|
|
|
223
266
|
codegraph_cli/
|
|
224
267
|
cli.py # main Typer application, all top-level commands
|
|
225
268
|
cli_chat.py # interactive chat REPL with styled output
|
|
226
|
-
cli_setup.py # setup wizard, set-llm, unset-llm,
|
|
269
|
+
cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
|
|
227
270
|
cli_v2.py # v2 code generation commands
|
|
228
271
|
config.py # loads config from TOML
|
|
229
|
-
config_manager.py # TOML read/write, provider
|
|
272
|
+
config_manager.py # TOML read/write, provider and embedding config
|
|
230
273
|
llm.py # multi-provider LLM adapter
|
|
231
274
|
parser.py # tree-sitter AST parsing
|
|
232
275
|
storage.py # SQLite graph store
|
|
233
|
-
embeddings.py #
|
|
276
|
+
embeddings.py # configurable embedding engine (5 models)
|
|
234
277
|
rag.py # RAG retriever
|
|
235
278
|
vector_store.py # LanceDB vector store
|
|
236
279
|
orchestrator.py # coordinates parsing, search, impact
|
|
@@ -255,7 +298,7 @@ codegraph_cli/
|
|
|
255
298
|
git clone https://github.com/al1-nasir/codegraph-cli.git
|
|
256
299
|
cd codegraph-cli
|
|
257
300
|
python -m venv .venv && source .venv/bin/activate
|
|
258
|
-
pip install -e ".[dev,crew]"
|
|
301
|
+
pip install -e ".[dev,crew,embeddings]"
|
|
259
302
|
pytest
|
|
260
303
|
```
|
|
261
304
|
|
|
@@ -6,7 +6,7 @@ from collections import deque
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Dict, List, Set
|
|
8
8
|
|
|
9
|
-
from .embeddings import HashEmbeddingModel
|
|
9
|
+
from .embeddings import HashEmbeddingModel, TransformerEmbedder
|
|
10
10
|
from .llm import LocalLLM
|
|
11
11
|
from .models import ImpactReport
|
|
12
12
|
from .parser import PythonGraphParser
|
|
@@ -10,6 +10,7 @@ import typer
|
|
|
10
10
|
from . import __version__, config
|
|
11
11
|
from .cli_chat import chat_app
|
|
12
12
|
from .cli_setup import setup as setup_wizard, set_llm, unset_llm, show_llm
|
|
13
|
+
from .cli_setup import set_embedding, unset_embedding, show_embedding
|
|
13
14
|
from .cli_v2 import v2_app
|
|
14
15
|
from .graph_export import export_dot, export_html
|
|
15
16
|
from .orchestrator import MCPOrchestrator
|
|
@@ -35,6 +36,11 @@ app.command("set-llm")(set_llm)
|
|
|
35
36
|
app.command("unset-llm")(unset_llm)
|
|
36
37
|
app.command("show-llm")(show_llm)
|
|
37
38
|
|
|
39
|
+
# Register embedding management commands
|
|
40
|
+
app.command("set-embedding")(set_embedding)
|
|
41
|
+
app.command("unset-embedding")(unset_embedding)
|
|
42
|
+
app.command("show-embedding")(show_embedding)
|
|
43
|
+
|
|
38
44
|
|
|
39
45
|
def version_callback(value: bool):
|
|
40
46
|
"""Print version and exit."""
|
|
@@ -13,7 +13,6 @@ import typer
|
|
|
13
13
|
from . import config
|
|
14
14
|
from .chat_agent import ChatAgent
|
|
15
15
|
from .chat_session import SessionManager
|
|
16
|
-
from .crew_chat import CrewChatAgent
|
|
17
16
|
from .llm import LocalLLM
|
|
18
17
|
from .orchestrator import MCPOrchestrator
|
|
19
18
|
from .rag import RAGRetriever
|
|
@@ -281,7 +280,7 @@ def start_chat(
|
|
|
281
280
|
new_session: bool = typer.Option(False, "--new", "-n", help="Force start a new session"),
|
|
282
281
|
):
|
|
283
282
|
"""Start interactive chat session."""
|
|
284
|
-
from .embeddings import
|
|
283
|
+
from .embeddings import get_embedder
|
|
285
284
|
from .project_context import ProjectContext
|
|
286
285
|
|
|
287
286
|
pm = ProjectManager()
|
|
@@ -294,12 +293,18 @@ def start_chat(
|
|
|
294
293
|
|
|
295
294
|
# Initialize components
|
|
296
295
|
context = ProjectContext(project, pm)
|
|
297
|
-
embedding_model =
|
|
296
|
+
embedding_model = get_embedder()
|
|
298
297
|
llm = LocalLLM(model=llm_model, provider=llm_provider, api_key=llm_api_key, endpoint=llm_endpoint)
|
|
299
298
|
rag_retriever = RAGRetriever(context.store, embedding_model)
|
|
300
299
|
|
|
301
300
|
if use_crew:
|
|
302
|
-
|
|
301
|
+
try:
|
|
302
|
+
from .crew_chat import CrewChatAgent
|
|
303
|
+
except ImportError:
|
|
304
|
+
print(f"\n {C_RED}CrewAI is not installed.{C_RESET}")
|
|
305
|
+
print(f" {C_DIM}Install with: pip install codegraph-cli[crew]{C_RESET}\n")
|
|
306
|
+
raise typer.Exit(1)
|
|
307
|
+
print(f"\n {C_MAGENTA}Initializing CrewAI multi-agent system...{C_RESET}")
|
|
303
308
|
agent = CrewChatAgent(context, llm, rag_retriever)
|
|
304
309
|
else:
|
|
305
310
|
orchestrator = MCPOrchestrator(
|
|
@@ -8,6 +8,7 @@ from typing import Optional
|
|
|
8
8
|
import typer
|
|
9
9
|
|
|
10
10
|
from . import config_manager
|
|
11
|
+
from .embeddings import EMBEDDING_MODELS
|
|
11
12
|
|
|
12
13
|
app = typer.Typer(help="Setup wizard for LLM provider configuration")
|
|
13
14
|
|
|
@@ -287,6 +288,12 @@ def setup():
|
|
|
287
288
|
print_error("Failed to save configuration!")
|
|
288
289
|
raise typer.Exit(code=1)
|
|
289
290
|
|
|
291
|
+
# Offer embedding setup
|
|
292
|
+
typer.echo("")
|
|
293
|
+
setup_emb = typer.confirm("Configure embedding model for semantic search?", default=True)
|
|
294
|
+
if setup_emb:
|
|
295
|
+
_interactive_embedding_setup()
|
|
296
|
+
|
|
290
297
|
|
|
291
298
|
def set_llm(
|
|
292
299
|
provider: str = typer.Argument(..., help="LLM provider: ollama, groq, openai, anthropic, gemini, openrouter"),
|
|
@@ -466,5 +473,156 @@ def show_llm():
|
|
|
466
473
|
typer.echo("")
|
|
467
474
|
|
|
468
475
|
|
|
476
|
+
# ===================================================================
|
|
477
|
+
# Embedding model commands
|
|
478
|
+
# ===================================================================
|
|
479
|
+
|
|
480
|
+
def _interactive_embedding_setup():
|
|
481
|
+
"""Interactive embedding model picker (called from setup wizard)."""
|
|
482
|
+
typer.echo("")
|
|
483
|
+
typer.echo(typer.style("╭──────────────────────────────────────────────╮", fg=typer.colors.CYAN))
|
|
484
|
+
typer.echo(typer.style("│", fg=typer.colors.CYAN) + typer.style(" Embedding Model Setup ", bold=True) + typer.style("│", fg=typer.colors.CYAN))
|
|
485
|
+
typer.echo(typer.style("╰──────────────────────────────────────────────╯", fg=typer.colors.CYAN))
|
|
486
|
+
typer.echo("")
|
|
487
|
+
typer.echo("Choose an embedding model for semantic code search:")
|
|
488
|
+
typer.echo("Larger models give better results but need more disk/RAM.\n")
|
|
489
|
+
|
|
490
|
+
# List models with numbers
|
|
491
|
+
model_keys = list(EMBEDDING_MODELS.keys())
|
|
492
|
+
for i, key in enumerate(model_keys, 1):
|
|
493
|
+
spec = EMBEDDING_MODELS[key]
|
|
494
|
+
name_col = f"{key}".ljust(12)
|
|
495
|
+
size_col = f"({spec['size']})".ljust(14)
|
|
496
|
+
desc = spec["description"]
|
|
497
|
+
typer.echo(f" {i}) {name_col} {size_col} {desc}")
|
|
498
|
+
|
|
499
|
+
typer.echo("")
|
|
500
|
+
|
|
501
|
+
while True:
|
|
502
|
+
choice = typer.prompt(f"Enter choice [1-{len(model_keys)}]", type=str)
|
|
503
|
+
try:
|
|
504
|
+
idx = int(choice)
|
|
505
|
+
if 1 <= idx <= len(model_keys):
|
|
506
|
+
selected = model_keys[idx - 1]
|
|
507
|
+
break
|
|
508
|
+
except ValueError:
|
|
509
|
+
# Accept model key directly
|
|
510
|
+
if choice.strip() in model_keys:
|
|
511
|
+
selected = choice.strip()
|
|
512
|
+
break
|
|
513
|
+
print_error(f"Invalid choice. Enter 1-{len(model_keys)} or a model key.")
|
|
514
|
+
|
|
515
|
+
spec = EMBEDDING_MODELS[selected]
|
|
516
|
+
|
|
517
|
+
if selected != "hash":
|
|
518
|
+
typer.echo(f"\n Model: {typer.style(spec['name'], fg=typer.colors.CYAN)}")
|
|
519
|
+
typer.echo(f" Download: {typer.style(spec['size'], fg=typer.colors.YELLOW)}")
|
|
520
|
+
typer.echo(f" Dim: {spec['dim']}")
|
|
521
|
+
print_info("Requires: pip install codegraph-cli[embeddings]")
|
|
522
|
+
else:
|
|
523
|
+
typer.echo(f"\n Model: {typer.style('Hash Embedding (zero-dependency)', fg=typer.colors.CYAN)}")
|
|
524
|
+
print_info("No download needed, but no semantic understanding.")
|
|
525
|
+
|
|
526
|
+
success = config_manager.save_embedding_config(selected)
|
|
527
|
+
if success:
|
|
528
|
+
print_success(f"Embedding model set to: {selected}")
|
|
529
|
+
if selected != "hash":
|
|
530
|
+
print_info(f"Model will be downloaded on first use (~{spec['size']}).")
|
|
531
|
+
print_info("Re-index your project after changing embeddings: cg index <path>")
|
|
532
|
+
else:
|
|
533
|
+
print_error("Failed to save embedding config!")
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def set_embedding(
|
|
537
|
+
model: str = typer.Argument(
|
|
538
|
+
...,
|
|
539
|
+
help="Embedding model key: qodo-1.5b, jina-code, bge-base, minilm, hash",
|
|
540
|
+
),
|
|
541
|
+
):
|
|
542
|
+
"""Set the embedding model for semantic code search.
|
|
543
|
+
|
|
544
|
+
Available models (smallest to largest):
|
|
545
|
+
|
|
546
|
+
hash 0 bytes No download, keyword-level only
|
|
547
|
+
minilm ~80 MB Tiny, fast, decent quality
|
|
548
|
+
bge-base ~440 MB Solid general-purpose
|
|
549
|
+
jina-code ~550 MB Code-aware, good quality
|
|
550
|
+
qodo-1.5b ~6.2 GB Best quality, code-optimized
|
|
551
|
+
|
|
552
|
+
Examples:
|
|
553
|
+
cg set-embedding minilm
|
|
554
|
+
cg set-embedding jina-code
|
|
555
|
+
cg set-embedding hash
|
|
556
|
+
"""
|
|
557
|
+
model = model.lower().strip()
|
|
558
|
+
|
|
559
|
+
if model not in EMBEDDING_MODELS:
|
|
560
|
+
print_error(
|
|
561
|
+
f"Unknown model '{model}'. "
|
|
562
|
+
f"Choose from: {', '.join(EMBEDDING_MODELS.keys())}"
|
|
563
|
+
)
|
|
564
|
+
raise typer.Exit(code=1)
|
|
565
|
+
|
|
566
|
+
spec = EMBEDDING_MODELS[model]
|
|
567
|
+
success = config_manager.save_embedding_config(model)
|
|
568
|
+
|
|
569
|
+
if success:
|
|
570
|
+
print_success(f"Embedding model set to: {model}")
|
|
571
|
+
typer.echo(f" Name: {typer.style(spec['name'], fg=typer.colors.CYAN)}")
|
|
572
|
+
typer.echo(f" Dim: {spec['dim']}")
|
|
573
|
+
if model != "hash":
|
|
574
|
+
typer.echo(f" Size: {spec['size']} (downloaded on first use)")
|
|
575
|
+
print_info("Re-index your project after changing: cg index <path>")
|
|
576
|
+
else:
|
|
577
|
+
print_error("Failed to save configuration!")
|
|
578
|
+
raise typer.Exit(code=1)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def unset_embedding():
|
|
582
|
+
"""Reset embedding model to default (hash — no download)."""
|
|
583
|
+
success = config_manager.clear_embedding_config()
|
|
584
|
+
if success:
|
|
585
|
+
print_success("Embedding model reset to default (hash).")
|
|
586
|
+
print_info("No neural model will be used. Re-index to apply.")
|
|
587
|
+
else:
|
|
588
|
+
print_error("Failed to reset embedding config!")
|
|
589
|
+
raise typer.Exit(code=1)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def show_embedding():
|
|
593
|
+
"""Show current embedding model configuration."""
|
|
594
|
+
typer.echo("")
|
|
595
|
+
typer.echo(typer.style("╭──────────────────────────────────────────────╮", fg=typer.colors.CYAN))
|
|
596
|
+
typer.echo(typer.style("│", fg=typer.colors.CYAN) + typer.style(" Embedding Configuration ", bold=True) + typer.style("│", fg=typer.colors.CYAN))
|
|
597
|
+
typer.echo(typer.style("╰──────────────────────────────────────────────╯", fg=typer.colors.CYAN))
|
|
598
|
+
|
|
599
|
+
emb_cfg = config_manager.load_embedding_config()
|
|
600
|
+
current_key = emb_cfg.get("model", "hash")
|
|
601
|
+
spec = EMBEDDING_MODELS.get(current_key)
|
|
602
|
+
|
|
603
|
+
if spec is None:
|
|
604
|
+
typer.echo(f" Model {typer.style(current_key, fg=typer.colors.RED)} (unknown)")
|
|
605
|
+
else:
|
|
606
|
+
typer.echo(f" Model {typer.style(f' {current_key} ', bg=typer.colors.CYAN, fg=typer.colors.WHITE, bold=True)}")
|
|
607
|
+
typer.echo(f" Name {typer.style(spec['name'], bold=True)}")
|
|
608
|
+
typer.echo(f" Dim {spec['dim']}")
|
|
609
|
+
typer.echo(f" Size {spec['size']}")
|
|
610
|
+
typer.echo(f" Desc {spec['description']}")
|
|
611
|
+
|
|
612
|
+
typer.echo("")
|
|
613
|
+
typer.echo(typer.style(" Available Models", bold=True))
|
|
614
|
+
typer.echo(typer.style(" ─────────────────────────────────────────", dim=True))
|
|
615
|
+
for key, s in EMBEDDING_MODELS.items():
|
|
616
|
+
marker = typer.style(" *", fg=typer.colors.GREEN) if key == current_key else " "
|
|
617
|
+
typer.echo(f" {marker} {key.ljust(12)} {s['size'].ljust(12)} {s['description']}")
|
|
618
|
+
|
|
619
|
+
typer.echo("")
|
|
620
|
+
typer.echo(typer.style(" Quick Commands", bold=True))
|
|
621
|
+
typer.echo(typer.style(" ─────────────────────────────────────────", dim=True))
|
|
622
|
+
typer.echo(f" {typer.style('cg set-embedding <model>', fg=typer.colors.YELLOW)} Switch model")
|
|
623
|
+
typer.echo(f" {typer.style('cg unset-embedding', fg=typer.colors.YELLOW)} Reset to hash")
|
|
624
|
+
typer.echo("")
|
|
625
|
+
|
|
626
|
+
|
|
469
627
|
if __name__ == "__main__":
|
|
470
628
|
app()
|
|
@@ -13,10 +13,12 @@ SUPPORTED_EXTENSIONS = {".py"}
|
|
|
13
13
|
|
|
14
14
|
# Load configuration from TOML file (if available)
|
|
15
15
|
try:
|
|
16
|
-
from .config_manager import load_config
|
|
16
|
+
from .config_manager import load_config, load_embedding_config
|
|
17
17
|
_toml_config = load_config()
|
|
18
|
+
_emb_config = load_embedding_config()
|
|
18
19
|
except ImportError:
|
|
19
20
|
_toml_config = {}
|
|
21
|
+
_emb_config = {}
|
|
20
22
|
|
|
21
23
|
# LLM Provider Configuration — loaded from ~/.codegraph/config.toml (set via `cg setup` or `cg set-llm`)
|
|
22
24
|
LLM_PROVIDER = _toml_config.get("provider", "ollama")
|
|
@@ -24,6 +26,9 @@ LLM_API_KEY = _toml_config.get("api_key", "")
|
|
|
24
26
|
LLM_MODEL = _toml_config.get("model", "qwen2.5-coder:7b")
|
|
25
27
|
LLM_ENDPOINT = _toml_config.get("endpoint", "http://127.0.0.1:11434/api/generate")
|
|
26
28
|
|
|
29
|
+
# Embedding model — set via `cg set-embedding` (default: "hash" = no download)
|
|
30
|
+
EMBEDDING_MODEL = _emb_config.get("model", "hash")
|
|
31
|
+
|
|
27
32
|
|
|
28
33
|
def ensure_base_dirs() -> None:
|
|
29
34
|
"""Create base directories for local storage if needed."""
|
|
@@ -78,11 +78,37 @@ def load_config() -> Dict[str, Any]:
|
|
|
78
78
|
return DEFAULT_CONFIGS["ollama"].copy()
|
|
79
79
|
|
|
80
80
|
|
|
81
|
+
def load_full_config() -> Dict[str, Any]:
|
|
82
|
+
"""Load the entire TOML config (all sections)."""
|
|
83
|
+
if not CONFIG_FILE.exists() or toml is None:
|
|
84
|
+
return {}
|
|
85
|
+
try:
|
|
86
|
+
with open(CONFIG_FILE, "r") as f:
|
|
87
|
+
return toml.load(f)
|
|
88
|
+
except Exception:
|
|
89
|
+
return {}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _save_full_config(config: Dict[str, Any]) -> bool:
|
|
93
|
+
"""Write entire config dict to TOML file, preserving all sections."""
|
|
94
|
+
if toml is None:
|
|
95
|
+
return False
|
|
96
|
+
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
try:
|
|
98
|
+
with open(CONFIG_FILE, "w") as f:
|
|
99
|
+
toml.dump(config, f)
|
|
100
|
+
return True
|
|
101
|
+
except Exception:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
81
105
|
def save_config(provider: str, model: str, api_key: str = "", endpoint: str = "") -> bool:
|
|
82
106
|
"""Save LLM configuration to TOML file.
|
|
83
107
|
|
|
108
|
+
Preserves other sections (e.g. ``[embeddings]``) in the file.
|
|
109
|
+
|
|
84
110
|
Args:
|
|
85
|
-
provider: Provider name (ollama, groq, openai, anthropic)
|
|
111
|
+
provider: Provider name (ollama, groq, openai, anthropic, gemini, openrouter)
|
|
86
112
|
model: Model name
|
|
87
113
|
api_key: API key for cloud providers
|
|
88
114
|
endpoint: Custom endpoint (for Ollama)
|
|
@@ -90,32 +116,56 @@ def save_config(provider: str, model: str, api_key: str = "", endpoint: str = ""
|
|
|
90
116
|
Returns:
|
|
91
117
|
True if saved successfully, False otherwise
|
|
92
118
|
"""
|
|
93
|
-
|
|
94
|
-
return False
|
|
95
|
-
|
|
96
|
-
# Ensure directory exists
|
|
97
|
-
BASE_DIR.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
config = load_full_config()
|
|
98
120
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
"
|
|
102
|
-
"provider": provider,
|
|
103
|
-
"model": model,
|
|
104
|
-
}
|
|
121
|
+
config["llm"] = {
|
|
122
|
+
"provider": provider,
|
|
123
|
+
"model": model,
|
|
105
124
|
}
|
|
106
|
-
|
|
107
125
|
if api_key:
|
|
108
126
|
config["llm"]["api_key"] = api_key
|
|
109
|
-
|
|
110
127
|
if endpoint:
|
|
111
128
|
config["llm"]["endpoint"] = endpoint
|
|
112
129
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
130
|
+
return _save_full_config(config)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# ------------------------------------------------------------------
|
|
134
|
+
# Embedding configuration
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
def load_embedding_config() -> Dict[str, Any]:
|
|
138
|
+
"""Load embedding configuration from ``[embeddings]`` section.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dict with at least ``model`` key, or empty dict.
|
|
142
|
+
"""
|
|
143
|
+
full = load_full_config()
|
|
144
|
+
return full.get("embeddings", {})
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def save_embedding_config(model_key: str) -> bool:
|
|
148
|
+
"""Save embedding model choice to config TOML.
|
|
149
|
+
|
|
150
|
+
Preserves ``[llm]`` and other sections.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
model_key: One of the keys from ``EMBEDDING_MODELS``
|
|
154
|
+
(e.g. ``"minilm"``, ``"jina-code"``, ``"hash"``).
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
True if saved successfully.
|
|
158
|
+
"""
|
|
159
|
+
config = load_full_config()
|
|
160
|
+
config["embeddings"] = {"model": model_key}
|
|
161
|
+
return _save_full_config(config)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def clear_embedding_config() -> bool:
|
|
165
|
+
"""Remove ``[embeddings]`` section from config, resetting to default."""
|
|
166
|
+
config = load_full_config()
|
|
167
|
+
config.pop("embeddings", None)
|
|
168
|
+
return _save_full_config(config)
|
|
119
169
|
|
|
120
170
|
|
|
121
171
|
def get_provider_config(provider: str) -> Dict[str, Any]:
|
|
@@ -4,7 +4,12 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from typing import TYPE_CHECKING, List
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
try:
|
|
8
|
+
from crewai import Agent
|
|
9
|
+
CREWAI_AVAILABLE = True
|
|
10
|
+
except ImportError:
|
|
11
|
+
Agent = None # type: ignore
|
|
12
|
+
CREWAI_AVAILABLE = False
|
|
8
13
|
|
|
9
14
|
if TYPE_CHECKING:
|
|
10
15
|
from .crew_tools import create_tools
|