codegraph-cli 2.0.0__tar.gz → 2.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/PKG-INFO +57 -11
  2. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/README.md +50 -7
  3. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/__init__.py +1 -1
  4. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/agents.py +1 -1
  5. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli.py +6 -0
  6. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_chat.py +9 -4
  7. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_setup.py +158 -0
  8. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/config.py +6 -1
  9. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/config_manager.py +70 -20
  10. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/context_manager.py +1 -1
  11. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/crew_agents.py +6 -1
  12. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/crew_chat.py +5 -1
  13. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/crew_tools.py +9 -1
  14. codegraph_cli-2.1.1/codegraph_cli/embeddings.py +409 -0
  15. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/orchestrator.py +2 -2
  16. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/rag.py +3 -3
  17. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/PKG-INFO +57 -11
  18. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/requires.txt +6 -2
  19. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/pyproject.toml +9 -4
  20. codegraph_cli-2.0.0/codegraph_cli/embeddings.py +0 -241
  21. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/LICENSE +0 -0
  22. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/bug_detector.py +0 -0
  23. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/chat_agent.py +0 -0
  24. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/chat_session.py +0 -0
  25. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_diagnose.py +0 -0
  26. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_refactor.py +0 -0
  27. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_test.py +0 -0
  28. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/cli_v2.py +0 -0
  29. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/codegen_agent.py +0 -0
  30. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/diff_engine.py +0 -0
  31. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/graph_export.py +0 -0
  32. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/llm.py +0 -0
  33. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/models.py +0 -0
  34. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/models_v2.py +0 -0
  35. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/parser.py +0 -0
  36. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/performance_analyzer.py +0 -0
  37. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/project_context.py +0 -0
  38. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/refactor_agent.py +0 -0
  39. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/security_scanner.py +0 -0
  40. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/storage.py +0 -0
  41. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/templates/graph_interactive.html +0 -0
  42. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/testgen_agent.py +0 -0
  43. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/validation_engine.py +0 -0
  44. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli/vector_store.py +0 -0
  45. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/SOURCES.txt +0 -0
  46. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/dependency_links.txt +0 -0
  47. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/entry_points.txt +0 -0
  48. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/codegraph_cli.egg-info/top_level.txt +0 -0
  49. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/setup.cfg +0 -0
  50. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_agents.py +0 -0
  51. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_bug_detector.py +0 -0
  52. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_cli.py +0 -0
  53. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_parser.py +0 -0
  54. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_security_scanner.py +0 -0
  55. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_storage.py +0 -0
  56. {codegraph_cli-2.0.0 → codegraph_cli-2.1.1}/tests/test_vector_store.py +0 -0
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codegraph-cli
3
- Version: 2.0.0
3
+ Version: 2.1.1
4
4
  Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
5
- Author-email: Ali Nasir <ali@codegraph.dev>
5
+ Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
6
6
  License: MIT
7
7
  Project-URL: Homepage, https://github.com/al1-nasir/codegraph-cli
8
8
  Project-URL: Documentation, https://github.com/al1-nasir/codegraph-cli#readme
@@ -31,7 +31,6 @@ Requires-Dist: typer<1.0.0,>=0.12.0
31
31
  Requires-Dist: toml>=0.10.2
32
32
  Requires-Dist: lancedb>=0.4.0
33
33
  Requires-Dist: pyarrow>=14.0.0
34
- Requires-Dist: sentence-transformers>=2.2.0
35
34
  Requires-Dist: tree-sitter>=0.24.0
36
35
  Requires-Dist: tree-sitter-python>=0.23.0
37
36
  Requires-Dist: tree-sitter-javascript>=0.23.0
@@ -45,9 +44,13 @@ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
45
44
  Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
46
45
  Requires-Dist: build>=1.0.0; extra == "dev"
47
46
  Requires-Dist: twine>=5.0.0; extra == "dev"
47
+ Provides-Extra: embeddings
48
+ Requires-Dist: torch>=2.0.0; extra == "embeddings"
49
+ Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
48
50
  Provides-Extra: all
49
51
  Requires-Dist: crewai>=0.80.0; extra == "all"
50
- Requires-Dist: google-generativeai>=0.5.0; extra == "all"
52
+ Requires-Dist: torch>=2.0.0; extra == "all"
53
+ Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
51
54
  Dynamic: license-file
52
55
 
53
56
  # CodeGraph CLI
@@ -56,7 +59,7 @@ Dynamic: license-file
56
59
 
57
60
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
58
61
  [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org)
59
- [![Version](https://img.shields.io/badge/version-2.0.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
62
+ [![Version](https://img.shields.io/badge/version-2.1.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
60
63
 
61
64
  ---
62
65
 
@@ -81,12 +84,24 @@ Core capabilities:
81
84
  pip install codegraph-cli
82
85
  ```
83
86
 
87
+ With neural embedding models (semantic code search):
88
+
89
+ ```bash
90
+ pip install codegraph-cli[embeddings]
91
+ ```
92
+
84
93
  With CrewAI multi-agent support:
85
94
 
86
95
  ```bash
87
96
  pip install codegraph-cli[crew]
88
97
  ```
89
98
 
99
+ Everything:
100
+
101
+ ```bash
102
+ pip install codegraph-cli[all]
103
+ ```
104
+
90
105
  For development:
91
106
 
92
107
  ```bash
@@ -153,6 +168,34 @@ cg unset-llm # reset to defaults
153
168
 
154
169
  ---
155
170
 
171
+ ## Embedding Models
172
+
173
+ CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
174
+
175
+ | Model | Download | Dim | Quality | Command |
176
+ |-------|----------|-----|---------|---------|
177
+ | hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
178
+ | minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
179
+ | bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
180
+ | jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
181
+ | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
182
+
183
+ The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
184
+
185
+ ```bash
186
+ cg set-embedding jina-code # switch to a neural model
187
+ cg show-embedding # view current model and all options
188
+ cg unset-embedding # reset to hash default
189
+ ```
190
+
191
+ After changing the embedding model, re-index your project:
192
+
193
+ ```bash
194
+ cg index /path/to/project
195
+ ```
196
+
197
+ ---
198
+
156
199
  ## Commands
157
200
 
158
201
  ### Project Management
@@ -249,8 +292,9 @@ CLI Layer (Typer)
249
292
  | | |
250
293
  | +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
251
294
  | +-- RAGRetriever |
252
- | +-- LLM Adapter +-- Embeddings
253
- |
295
+ | +-- LLM Adapter +-- Embeddings (configurable)
296
+ | hash | minilm | bge-base
297
+ | jina-code | qodo-1.5b
254
298
  +-- ChatAgent (standard mode)
255
299
  |
256
300
  +-- CrewChatAgent (--crew mode)
@@ -261,6 +305,8 @@ CLI Layer (Typer)
261
305
  +-- Code Analysis Agent ---> 3 search/analysis tools
262
306
  ```
263
307
 
308
+ **Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
309
+
264
310
  **Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
265
311
 
266
312
  **Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
@@ -275,14 +321,14 @@ CLI Layer (Typer)
275
321
  codegraph_cli/
276
322
  cli.py # main Typer application, all top-level commands
277
323
  cli_chat.py # interactive chat REPL with styled output
278
- cli_setup.py # setup wizard, set-llm, unset-llm, show-llm
324
+ cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
279
325
  cli_v2.py # v2 code generation commands
280
326
  config.py # loads config from TOML
281
- config_manager.py # TOML read/write, provider validation
327
+ config_manager.py # TOML read/write, provider and embedding config
282
328
  llm.py # multi-provider LLM adapter
283
329
  parser.py # tree-sitter AST parsing
284
330
  storage.py # SQLite graph store
285
- embeddings.py # hash-based embedding model
331
+ embeddings.py # configurable embedding engine (5 models)
286
332
  rag.py # RAG retriever
287
333
  vector_store.py # LanceDB vector store
288
334
  orchestrator.py # coordinates parsing, search, impact
@@ -307,7 +353,7 @@ codegraph_cli/
307
353
  git clone https://github.com/al1-nasir/codegraph-cli.git
308
354
  cd codegraph-cli
309
355
  python -m venv .venv && source .venv/bin/activate
310
- pip install -e ".[dev,crew]"
356
+ pip install -e ".[dev,crew,embeddings]"
311
357
  pytest
312
358
  ```
313
359
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
6
  [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org)
7
- [![Version](https://img.shields.io/badge/version-2.0.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
7
+ [![Version](https://img.shields.io/badge/version-2.1.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
8
8
 
9
9
  ---
10
10
 
@@ -29,12 +29,24 @@ Core capabilities:
29
29
  pip install codegraph-cli
30
30
  ```
31
31
 
32
+ With neural embedding models (semantic code search):
33
+
34
+ ```bash
35
+ pip install codegraph-cli[embeddings]
36
+ ```
37
+
32
38
  With CrewAI multi-agent support:
33
39
 
34
40
  ```bash
35
41
  pip install codegraph-cli[crew]
36
42
  ```
37
43
 
44
+ Everything:
45
+
46
+ ```bash
47
+ pip install codegraph-cli[all]
48
+ ```
49
+
38
50
  For development:
39
51
 
40
52
  ```bash
@@ -101,6 +113,34 @@ cg unset-llm # reset to defaults
101
113
 
102
114
  ---
103
115
 
116
+ ## Embedding Models
117
+
118
+ CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
119
+
120
+ | Model | Download | Dim | Quality | Command |
121
+ |-------|----------|-----|---------|---------|
122
+ | hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
123
+ | minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
124
+ | bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
125
+ | jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
126
+ | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
127
+
128
+ The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
129
+
130
+ ```bash
131
+ cg set-embedding jina-code # switch to a neural model
132
+ cg show-embedding # view current model and all options
133
+ cg unset-embedding # reset to hash default
134
+ ```
135
+
136
+ After changing the embedding model, re-index your project:
137
+
138
+ ```bash
139
+ cg index /path/to/project
140
+ ```
141
+
142
+ ---
143
+
104
144
  ## Commands
105
145
 
106
146
  ### Project Management
@@ -197,8 +237,9 @@ CLI Layer (Typer)
197
237
  | | |
198
238
  | +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
199
239
  | +-- RAGRetriever |
200
- | +-- LLM Adapter +-- Embeddings
201
- |
240
+ | +-- LLM Adapter +-- Embeddings (configurable)
241
+ | hash | minilm | bge-base
242
+ | jina-code | qodo-1.5b
202
243
  +-- ChatAgent (standard mode)
203
244
  |
204
245
  +-- CrewChatAgent (--crew mode)
@@ -209,6 +250,8 @@ CLI Layer (Typer)
209
250
  +-- Code Analysis Agent ---> 3 search/analysis tools
210
251
  ```
211
252
 
253
+ **Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
254
+
212
255
  **Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
213
256
 
214
257
  **Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
@@ -223,14 +266,14 @@ CLI Layer (Typer)
223
266
  codegraph_cli/
224
267
  cli.py # main Typer application, all top-level commands
225
268
  cli_chat.py # interactive chat REPL with styled output
226
- cli_setup.py # setup wizard, set-llm, unset-llm, show-llm
269
+ cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
227
270
  cli_v2.py # v2 code generation commands
228
271
  config.py # loads config from TOML
229
- config_manager.py # TOML read/write, provider validation
272
+ config_manager.py # TOML read/write, provider and embedding config
230
273
  llm.py # multi-provider LLM adapter
231
274
  parser.py # tree-sitter AST parsing
232
275
  storage.py # SQLite graph store
233
- embeddings.py # hash-based embedding model
276
+ embeddings.py # configurable embedding engine (5 models)
234
277
  rag.py # RAG retriever
235
278
  vector_store.py # LanceDB vector store
236
279
  orchestrator.py # coordinates parsing, search, impact
@@ -255,7 +298,7 @@ codegraph_cli/
255
298
  git clone https://github.com/al1-nasir/codegraph-cli.git
256
299
  cd codegraph-cli
257
300
  python -m venv .venv && source .venv/bin/activate
258
- pip install -e ".[dev,crew]"
301
+ pip install -e ".[dev,crew,embeddings]"
259
302
  pytest
260
303
  ```
261
304
 
@@ -1,4 +1,4 @@
1
1
  """CodeGraph CLI package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "2.0.0"
4
+ __version__ = "2.0.1"
@@ -6,7 +6,7 @@ from collections import deque
6
6
  from pathlib import Path
7
7
  from typing import Dict, List, Set
8
8
 
9
- from .embeddings import HashEmbeddingModel
9
+ from .embeddings import HashEmbeddingModel, TransformerEmbedder
10
10
  from .llm import LocalLLM
11
11
  from .models import ImpactReport
12
12
  from .parser import PythonGraphParser
@@ -10,6 +10,7 @@ import typer
10
10
  from . import __version__, config
11
11
  from .cli_chat import chat_app
12
12
  from .cli_setup import setup as setup_wizard, set_llm, unset_llm, show_llm
13
+ from .cli_setup import set_embedding, unset_embedding, show_embedding
13
14
  from .cli_v2 import v2_app
14
15
  from .graph_export import export_dot, export_html
15
16
  from .orchestrator import MCPOrchestrator
@@ -35,6 +36,11 @@ app.command("set-llm")(set_llm)
35
36
  app.command("unset-llm")(unset_llm)
36
37
  app.command("show-llm")(show_llm)
37
38
 
39
+ # Register embedding management commands
40
+ app.command("set-embedding")(set_embedding)
41
+ app.command("unset-embedding")(unset_embedding)
42
+ app.command("show-embedding")(show_embedding)
43
+
38
44
 
39
45
  def version_callback(value: bool):
40
46
  """Print version and exit."""
@@ -13,7 +13,6 @@ import typer
13
13
  from . import config
14
14
  from .chat_agent import ChatAgent
15
15
  from .chat_session import SessionManager
16
- from .crew_chat import CrewChatAgent
17
16
  from .llm import LocalLLM
18
17
  from .orchestrator import MCPOrchestrator
19
18
  from .rag import RAGRetriever
@@ -281,7 +280,7 @@ def start_chat(
281
280
  new_session: bool = typer.Option(False, "--new", "-n", help="Force start a new session"),
282
281
  ):
283
282
  """Start interactive chat session."""
284
- from .embeddings import HashEmbeddingModel
283
+ from .embeddings import get_embedder
285
284
  from .project_context import ProjectContext
286
285
 
287
286
  pm = ProjectManager()
@@ -294,12 +293,18 @@ def start_chat(
294
293
 
295
294
  # Initialize components
296
295
  context = ProjectContext(project, pm)
297
- embedding_model = HashEmbeddingModel()
296
+ embedding_model = get_embedder()
298
297
  llm = LocalLLM(model=llm_model, provider=llm_provider, api_key=llm_api_key, endpoint=llm_endpoint)
299
298
  rag_retriever = RAGRetriever(context.store, embedding_model)
300
299
 
301
300
  if use_crew:
302
- print(f"\n {C_MAGENTA}🤖 Initializing CrewAI multi-agent system...{C_RESET}")
301
+ try:
302
+ from .crew_chat import CrewChatAgent
303
+ except ImportError:
304
+ print(f"\n {C_RED}CrewAI is not installed.{C_RESET}")
305
+ print(f" {C_DIM}Install with: pip install codegraph-cli[crew]{C_RESET}\n")
306
+ raise typer.Exit(1)
307
+ print(f"\n {C_MAGENTA}Initializing CrewAI multi-agent system...{C_RESET}")
303
308
  agent = CrewChatAgent(context, llm, rag_retriever)
304
309
  else:
305
310
  orchestrator = MCPOrchestrator(
@@ -8,6 +8,7 @@ from typing import Optional
8
8
  import typer
9
9
 
10
10
  from . import config_manager
11
+ from .embeddings import EMBEDDING_MODELS
11
12
 
12
13
  app = typer.Typer(help="Setup wizard for LLM provider configuration")
13
14
 
@@ -287,6 +288,12 @@ def setup():
287
288
  print_error("Failed to save configuration!")
288
289
  raise typer.Exit(code=1)
289
290
 
291
+ # Offer embedding setup
292
+ typer.echo("")
293
+ setup_emb = typer.confirm("Configure embedding model for semantic search?", default=True)
294
+ if setup_emb:
295
+ _interactive_embedding_setup()
296
+
290
297
 
291
298
  def set_llm(
292
299
  provider: str = typer.Argument(..., help="LLM provider: ollama, groq, openai, anthropic, gemini, openrouter"),
@@ -466,5 +473,156 @@ def show_llm():
466
473
  typer.echo("")
467
474
 
468
475
 
476
+ # ===================================================================
477
+ # Embedding model commands
478
+ # ===================================================================
479
+
480
+ def _interactive_embedding_setup():
481
+ """Interactive embedding model picker (called from setup wizard)."""
482
+ typer.echo("")
483
+ typer.echo(typer.style("╭──────────────────────────────────────────────╮", fg=typer.colors.CYAN))
484
+ typer.echo(typer.style("│", fg=typer.colors.CYAN) + typer.style(" Embedding Model Setup ", bold=True) + typer.style("│", fg=typer.colors.CYAN))
485
+ typer.echo(typer.style("╰──────────────────────────────────────────────╯", fg=typer.colors.CYAN))
486
+ typer.echo("")
487
+ typer.echo("Choose an embedding model for semantic code search:")
488
+ typer.echo("Larger models give better results but need more disk/RAM.\n")
489
+
490
+ # List models with numbers
491
+ model_keys = list(EMBEDDING_MODELS.keys())
492
+ for i, key in enumerate(model_keys, 1):
493
+ spec = EMBEDDING_MODELS[key]
494
+ name_col = f"{key}".ljust(12)
495
+ size_col = f"({spec['size']})".ljust(14)
496
+ desc = spec["description"]
497
+ typer.echo(f" {i}) {name_col} {size_col} {desc}")
498
+
499
+ typer.echo("")
500
+
501
+ while True:
502
+ choice = typer.prompt(f"Enter choice [1-{len(model_keys)}]", type=str)
503
+ try:
504
+ idx = int(choice)
505
+ if 1 <= idx <= len(model_keys):
506
+ selected = model_keys[idx - 1]
507
+ break
508
+ except ValueError:
509
+ # Accept model key directly
510
+ if choice.strip() in model_keys:
511
+ selected = choice.strip()
512
+ break
513
+ print_error(f"Invalid choice. Enter 1-{len(model_keys)} or a model key.")
514
+
515
+ spec = EMBEDDING_MODELS[selected]
516
+
517
+ if selected != "hash":
518
+ typer.echo(f"\n Model: {typer.style(spec['name'], fg=typer.colors.CYAN)}")
519
+ typer.echo(f" Download: {typer.style(spec['size'], fg=typer.colors.YELLOW)}")
520
+ typer.echo(f" Dim: {spec['dim']}")
521
+ print_info("Requires: pip install codegraph-cli[embeddings]")
522
+ else:
523
+ typer.echo(f"\n Model: {typer.style('Hash Embedding (zero-dependency)', fg=typer.colors.CYAN)}")
524
+ print_info("No download needed, but no semantic understanding.")
525
+
526
+ success = config_manager.save_embedding_config(selected)
527
+ if success:
528
+ print_success(f"Embedding model set to: {selected}")
529
+ if selected != "hash":
530
+ print_info(f"Model will be downloaded on first use (~{spec['size']}).")
531
+ print_info("Re-index your project after changing embeddings: cg index <path>")
532
+ else:
533
+ print_error("Failed to save embedding config!")
534
+
535
+
536
+ def set_embedding(
537
+ model: str = typer.Argument(
538
+ ...,
539
+ help="Embedding model key: qodo-1.5b, jina-code, bge-base, minilm, hash",
540
+ ),
541
+ ):
542
+ """Set the embedding model for semantic code search.
543
+
544
+ Available models (smallest to largest):
545
+
546
+ hash 0 bytes No download, keyword-level only
547
+ minilm ~80 MB Tiny, fast, decent quality
548
+ bge-base ~440 MB Solid general-purpose
549
+ jina-code ~550 MB Code-aware, good quality
550
+ qodo-1.5b ~6.2 GB Best quality, code-optimized
551
+
552
+ Examples:
553
+ cg set-embedding minilm
554
+ cg set-embedding jina-code
555
+ cg set-embedding hash
556
+ """
557
+ model = model.lower().strip()
558
+
559
+ if model not in EMBEDDING_MODELS:
560
+ print_error(
561
+ f"Unknown model '{model}'. "
562
+ f"Choose from: {', '.join(EMBEDDING_MODELS.keys())}"
563
+ )
564
+ raise typer.Exit(code=1)
565
+
566
+ spec = EMBEDDING_MODELS[model]
567
+ success = config_manager.save_embedding_config(model)
568
+
569
+ if success:
570
+ print_success(f"Embedding model set to: {model}")
571
+ typer.echo(f" Name: {typer.style(spec['name'], fg=typer.colors.CYAN)}")
572
+ typer.echo(f" Dim: {spec['dim']}")
573
+ if model != "hash":
574
+ typer.echo(f" Size: {spec['size']} (downloaded on first use)")
575
+ print_info("Re-index your project after changing: cg index <path>")
576
+ else:
577
+ print_error("Failed to save configuration!")
578
+ raise typer.Exit(code=1)
579
+
580
+
581
+ def unset_embedding():
582
+ """Reset embedding model to default (hash — no download)."""
583
+ success = config_manager.clear_embedding_config()
584
+ if success:
585
+ print_success("Embedding model reset to default (hash).")
586
+ print_info("No neural model will be used. Re-index to apply.")
587
+ else:
588
+ print_error("Failed to reset embedding config!")
589
+ raise typer.Exit(code=1)
590
+
591
+
592
+ def show_embedding():
593
+ """Show current embedding model configuration."""
594
+ typer.echo("")
595
+ typer.echo(typer.style("╭──────────────────────────────────────────────╮", fg=typer.colors.CYAN))
596
+ typer.echo(typer.style("│", fg=typer.colors.CYAN) + typer.style(" Embedding Configuration ", bold=True) + typer.style("│", fg=typer.colors.CYAN))
597
+ typer.echo(typer.style("╰──────────────────────────────────────────────╯", fg=typer.colors.CYAN))
598
+
599
+ emb_cfg = config_manager.load_embedding_config()
600
+ current_key = emb_cfg.get("model", "hash")
601
+ spec = EMBEDDING_MODELS.get(current_key)
602
+
603
+ if spec is None:
604
+ typer.echo(f" Model {typer.style(current_key, fg=typer.colors.RED)} (unknown)")
605
+ else:
606
+ typer.echo(f" Model {typer.style(f' {current_key} ', bg=typer.colors.CYAN, fg=typer.colors.WHITE, bold=True)}")
607
+ typer.echo(f" Name {typer.style(spec['name'], bold=True)}")
608
+ typer.echo(f" Dim {spec['dim']}")
609
+ typer.echo(f" Size {spec['size']}")
610
+ typer.echo(f" Desc {spec['description']}")
611
+
612
+ typer.echo("")
613
+ typer.echo(typer.style(" Available Models", bold=True))
614
+ typer.echo(typer.style(" ─────────────────────────────────────────", dim=True))
615
+ for key, s in EMBEDDING_MODELS.items():
616
+ marker = typer.style(" *", fg=typer.colors.GREEN) if key == current_key else " "
617
+ typer.echo(f" {marker} {key.ljust(12)} {s['size'].ljust(12)} {s['description']}")
618
+
619
+ typer.echo("")
620
+ typer.echo(typer.style(" Quick Commands", bold=True))
621
+ typer.echo(typer.style(" ─────────────────────────────────────────", dim=True))
622
+ typer.echo(f" {typer.style('cg set-embedding <model>', fg=typer.colors.YELLOW)} Switch model")
623
+ typer.echo(f" {typer.style('cg unset-embedding', fg=typer.colors.YELLOW)} Reset to hash")
624
+ typer.echo("")
625
+
626
+
469
627
  if __name__ == "__main__":
470
628
  app()
@@ -13,10 +13,12 @@ SUPPORTED_EXTENSIONS = {".py"}
13
13
 
14
14
  # Load configuration from TOML file (if available)
15
15
  try:
16
- from .config_manager import load_config
16
+ from .config_manager import load_config, load_embedding_config
17
17
  _toml_config = load_config()
18
+ _emb_config = load_embedding_config()
18
19
  except ImportError:
19
20
  _toml_config = {}
21
+ _emb_config = {}
20
22
 
21
23
  # LLM Provider Configuration — loaded from ~/.codegraph/config.toml (set via `cg setup` or `cg set-llm`)
22
24
  LLM_PROVIDER = _toml_config.get("provider", "ollama")
@@ -24,6 +26,9 @@ LLM_API_KEY = _toml_config.get("api_key", "")
24
26
  LLM_MODEL = _toml_config.get("model", "qwen2.5-coder:7b")
25
27
  LLM_ENDPOINT = _toml_config.get("endpoint", "http://127.0.0.1:11434/api/generate")
26
28
 
29
+ # Embedding model — set via `cg set-embedding` (default: "hash" = no download)
30
+ EMBEDDING_MODEL = _emb_config.get("model", "hash")
31
+
27
32
 
28
33
  def ensure_base_dirs() -> None:
29
34
  """Create base directories for local storage if needed."""
@@ -78,11 +78,37 @@ def load_config() -> Dict[str, Any]:
78
78
  return DEFAULT_CONFIGS["ollama"].copy()
79
79
 
80
80
 
81
+ def load_full_config() -> Dict[str, Any]:
82
+ """Load the entire TOML config (all sections)."""
83
+ if not CONFIG_FILE.exists() or toml is None:
84
+ return {}
85
+ try:
86
+ with open(CONFIG_FILE, "r") as f:
87
+ return toml.load(f)
88
+ except Exception:
89
+ return {}
90
+
91
+
92
+ def _save_full_config(config: Dict[str, Any]) -> bool:
93
+ """Write entire config dict to TOML file, preserving all sections."""
94
+ if toml is None:
95
+ return False
96
+ BASE_DIR.mkdir(parents=True, exist_ok=True)
97
+ try:
98
+ with open(CONFIG_FILE, "w") as f:
99
+ toml.dump(config, f)
100
+ return True
101
+ except Exception:
102
+ return False
103
+
104
+
81
105
  def save_config(provider: str, model: str, api_key: str = "", endpoint: str = "") -> bool:
82
106
  """Save LLM configuration to TOML file.
83
107
 
108
+ Preserves other sections (e.g. ``[embeddings]``) in the file.
109
+
84
110
  Args:
85
- provider: Provider name (ollama, groq, openai, anthropic)
111
+ provider: Provider name (ollama, groq, openai, anthropic, gemini, openrouter)
86
112
  model: Model name
87
113
  api_key: API key for cloud providers
88
114
  endpoint: Custom endpoint (for Ollama)
@@ -90,32 +116,56 @@ def save_config(provider: str, model: str, api_key: str = "", endpoint: str = ""
90
116
  Returns:
91
117
  True if saved successfully, False otherwise
92
118
  """
93
- if toml is None:
94
- return False
95
-
96
- # Ensure directory exists
97
- BASE_DIR.mkdir(parents=True, exist_ok=True)
119
+ config = load_full_config()
98
120
 
99
- # Build config
100
- config = {
101
- "llm": {
102
- "provider": provider,
103
- "model": model,
104
- }
121
+ config["llm"] = {
122
+ "provider": provider,
123
+ "model": model,
105
124
  }
106
-
107
125
  if api_key:
108
126
  config["llm"]["api_key"] = api_key
109
-
110
127
  if endpoint:
111
128
  config["llm"]["endpoint"] = endpoint
112
129
 
113
- try:
114
- with open(CONFIG_FILE, "w") as f:
115
- toml.dump(config, f)
116
- return True
117
- except Exception:
118
- return False
130
+ return _save_full_config(config)
131
+
132
+
133
+ # ------------------------------------------------------------------
134
+ # Embedding configuration
135
+ # ------------------------------------------------------------------
136
+
137
+ def load_embedding_config() -> Dict[str, Any]:
138
+ """Load embedding configuration from ``[embeddings]`` section.
139
+
140
+ Returns:
141
+ Dict with at least ``model`` key, or empty dict.
142
+ """
143
+ full = load_full_config()
144
+ return full.get("embeddings", {})
145
+
146
+
147
+ def save_embedding_config(model_key: str) -> bool:
148
+ """Save embedding model choice to config TOML.
149
+
150
+ Preserves ``[llm]`` and other sections.
151
+
152
+ Args:
153
+ model_key: One of the keys from ``EMBEDDING_MODELS``
154
+ (e.g. ``"minilm"``, ``"jina-code"``, ``"hash"``).
155
+
156
+ Returns:
157
+ True if saved successfully.
158
+ """
159
+ config = load_full_config()
160
+ config["embeddings"] = {"model": model_key}
161
+ return _save_full_config(config)
162
+
163
+
164
+ def clear_embedding_config() -> bool:
165
+ """Remove ``[embeddings]`` section from config, resetting to default."""
166
+ config = load_full_config()
167
+ config.pop("embeddings", None)
168
+ return _save_full_config(config)
119
169
 
120
170
 
121
171
  def get_provider_config(provider: str) -> Dict[str, Any]:
@@ -56,7 +56,7 @@ class RepoMap:
56
56
  class: PythonGraphParser
57
57
  function: _resolve_call_edges
58
58
  codegraph_cli/embeddings.py
59
- class: NeuralEmbedder
59
+ class: TransformerEmbedder
60
60
  class: HashEmbeddingModel
61
61
  function: get_embedder
62
62
  function: cosine_similarity
@@ -4,7 +4,12 @@ from __future__ import annotations
4
4
 
5
5
  from typing import TYPE_CHECKING, List
6
6
 
7
- from crewai import Agent
7
+ try:
8
+ from crewai import Agent
9
+ CREWAI_AVAILABLE = True
10
+ except ImportError:
11
+ Agent = None # type: ignore
12
+ CREWAI_AVAILABLE = False
8
13
 
9
14
  if TYPE_CHECKING:
10
15
  from .crew_tools import create_tools