codegraph-cli 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/PKG-INFO +75 -21
  2. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/README.md +63 -19
  3. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/__init__.py +1 -1
  4. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/agents.py +59 -3
  5. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/chat_agent.py +58 -11
  6. codegraph_cli-2.1.2/codegraph_cli/cli.py +851 -0
  7. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_chat.py +204 -94
  8. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_diagnose.py +13 -2
  9. codegraph_cli-2.1.2/codegraph_cli/cli_docs.py +207 -0
  10. codegraph_cli-2.1.2/codegraph_cli/cli_explore.py +1053 -0
  11. codegraph_cli-2.1.2/codegraph_cli/cli_export.py +941 -0
  12. codegraph_cli-2.1.2/codegraph_cli/cli_groups.py +33 -0
  13. codegraph_cli-2.1.2/codegraph_cli/cli_health.py +316 -0
  14. codegraph_cli-2.1.2/codegraph_cli/cli_history.py +213 -0
  15. codegraph_cli-2.1.2/codegraph_cli/cli_onboard.py +380 -0
  16. codegraph_cli-2.1.2/codegraph_cli/cli_quickstart.py +256 -0
  17. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_refactor.py +17 -3
  18. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_setup.py +12 -12
  19. codegraph_cli-2.1.2/codegraph_cli/cli_suggestions.py +90 -0
  20. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_test.py +17 -3
  21. codegraph_cli-2.1.2/codegraph_cli/cli_tui.py +210 -0
  22. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/cli_v2.py +24 -4
  23. codegraph_cli-2.1.2/codegraph_cli/cli_watch.py +158 -0
  24. codegraph_cli-2.1.2/codegraph_cli/cli_workflows.py +255 -0
  25. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/codegen_agent.py +15 -1
  26. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/config.py +18 -5
  27. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/context_manager.py +117 -15
  28. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/crew_agents.py +32 -8
  29. codegraph_cli-2.1.2/codegraph_cli/crew_chat.py +292 -0
  30. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/crew_tools.py +30 -2
  31. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/embeddings.py +95 -5
  32. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/llm.py +42 -55
  33. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/project_context.py +64 -1
  34. codegraph_cli-2.1.2/codegraph_cli/rag.py +463 -0
  35. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/storage.py +310 -14
  36. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/vector_store.py +110 -8
  37. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/PKG-INFO +75 -21
  38. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/SOURCES.txt +13 -0
  39. codegraph_cli-2.1.2/codegraph_cli.egg-info/entry_points.txt +2 -0
  40. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/requires.txt +13 -1
  41. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/pyproject.toml +15 -3
  42. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_cli.py +47 -47
  43. codegraph_cli-2.1.2/tests/test_cli_workflows.py +242 -0
  44. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_vector_store.py +3 -3
  45. codegraph_cli-2.1.0/codegraph_cli/cli.py +0 -336
  46. codegraph_cli-2.1.0/codegraph_cli/crew_chat.py +0 -159
  47. codegraph_cli-2.1.0/codegraph_cli/rag.py +0 -200
  48. codegraph_cli-2.1.0/codegraph_cli.egg-info/entry_points.txt +0 -2
  49. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/LICENSE +0 -0
  50. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/bug_detector.py +0 -0
  51. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/chat_session.py +0 -0
  52. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/config_manager.py +0 -0
  53. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/diff_engine.py +0 -0
  54. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/graph_export.py +0 -0
  55. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/models.py +0 -0
  56. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/models_v2.py +0 -0
  57. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/orchestrator.py +0 -0
  58. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/parser.py +0 -0
  59. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/performance_analyzer.py +0 -0
  60. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/refactor_agent.py +0 -0
  61. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/security_scanner.py +0 -0
  62. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/templates/graph_interactive.html +0 -0
  63. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/testgen_agent.py +0 -0
  64. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli/validation_engine.py +0 -0
  65. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/dependency_links.txt +0 -0
  66. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/top_level.txt +0 -0
  67. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/setup.cfg +0 -0
  68. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_agents.py +0 -0
  69. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_bug_detector.py +0 -0
  70. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_parser.py +0 -0
  71. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_security_scanner.py +0 -0
  72. {codegraph_cli-2.1.0 → codegraph_cli-2.1.2}/tests/test_storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codegraph-cli
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
5
5
  Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
6
6
  License: MIT
@@ -35,22 +35,32 @@ Requires-Dist: tree-sitter>=0.24.0
35
35
  Requires-Dist: tree-sitter-python>=0.23.0
36
36
  Requires-Dist: tree-sitter-javascript>=0.23.0
37
37
  Requires-Dist: tree-sitter-typescript>=0.23.0
38
- Requires-Dist: litellm>=1.30.0
38
+ Requires-Dist: rich>=13.0.0
39
+ Requires-Dist: python-docx>=1.0.0
40
+ Requires-Dist: pydantic>=2.0.0
39
41
  Provides-Extra: crew
40
42
  Requires-Dist: crewai>=0.80.0; extra == "crew"
43
+ Provides-Extra: explore
44
+ Requires-Dist: starlette>=0.27.0; extra == "explore"
45
+ Requires-Dist: uvicorn>=0.24.0; extra == "explore"
41
46
  Provides-Extra: dev
42
47
  Requires-Dist: pytest>=7.4.0; extra == "dev"
43
48
  Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
44
49
  Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
45
50
  Requires-Dist: build>=1.0.0; extra == "dev"
46
51
  Requires-Dist: twine>=5.0.0; extra == "dev"
52
+ Provides-Extra: watch
53
+ Requires-Dist: watchdog>=3.0.0; extra == "watch"
47
54
  Provides-Extra: embeddings
48
55
  Requires-Dist: torch>=2.0.0; extra == "embeddings"
49
56
  Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
50
57
  Provides-Extra: all
51
58
  Requires-Dist: crewai>=0.80.0; extra == "all"
59
+ Requires-Dist: starlette>=0.27.0; extra == "all"
60
+ Requires-Dist: uvicorn>=0.24.0; extra == "all"
52
61
  Requires-Dist: torch>=2.0.0; extra == "all"
53
62
  Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
63
+ Requires-Dist: watchdog>=3.0.0; extra == "all"
54
64
  Dynamic: license-file
55
65
 
56
66
  # CodeGraph CLI
@@ -59,7 +69,8 @@ Dynamic: license-file
59
69
 
60
70
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
61
71
  [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org)
62
- [![Version](https://img.shields.io/badge/version-2.0.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
72
+ [![Version](https://img.shields.io/badge/version-2.1.1-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
73
+ [![CI](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
63
74
 
64
75
  ---
65
76
 
@@ -84,12 +95,24 @@ Core capabilities:
84
95
  pip install codegraph-cli
85
96
  ```
86
97
 
98
+ With neural embedding models (semantic code search):
99
+
100
+ ```bash
101
+ pip install codegraph-cli[embeddings]
102
+ ```
103
+
87
104
  With CrewAI multi-agent support:
88
105
 
89
106
  ```bash
90
107
  pip install codegraph-cli[crew]
91
108
  ```
92
109
 
110
+ Everything:
111
+
112
+ ```bash
113
+ pip install codegraph-cli[all]
114
+ ```
115
+
93
116
  For development:
94
117
 
95
118
  ```bash
@@ -105,15 +128,15 @@ pip install -e ".[dev]"
105
128
  ### 1. Configure your LLM provider
106
129
 
107
130
  ```bash
108
- cg setup
131
+ cg config setup
109
132
  ```
110
133
 
111
134
  This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
112
135
 
113
136
  ```bash
114
- cg set-llm openrouter
115
- cg set-llm groq
116
- cg set-llm ollama
137
+ cg config set-llm openrouter
138
+ cg config set-llm groq
139
+ cg config set-llm ollama
117
140
  ```
118
141
 
119
142
  ### 2. Index a project
@@ -140,18 +163,46 @@ cg chat start --crew # multi-agent mode
140
163
 
141
164
  | Provider | Type | Configuration |
142
165
  |----------|------|---------------|
143
- | Ollama | Local, free | `cg set-llm ollama` |
144
- | Groq | Cloud, free tier | `cg set-llm groq` |
145
- | OpenAI | Cloud | `cg set-llm openai` |
146
- | Anthropic | Cloud | `cg set-llm anthropic` |
147
- | Gemini | Cloud | `cg set-llm gemini` |
148
- | OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
166
+ | Ollama | Local, free | `cg config set-llm ollama` |
167
+ | Groq | Cloud, free tier | `cg config set-llm groq` |
168
+ | OpenAI | Cloud | `cg config set-llm openai` |
169
+ | Anthropic | Cloud | `cg config set-llm anthropic` |
170
+ | Gemini | Cloud | `cg config set-llm gemini` |
171
+ | OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
149
172
 
150
173
  All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
151
174
 
152
175
  ```bash
153
- cg show-llm # view current provider, model, and endpoint
154
- cg unset-llm # reset to defaults
176
+ cg config show-llm # view current provider, model, and endpoint
177
+ cg config unset-llm # reset to defaults
178
+ ```
179
+
180
+ ---
181
+
182
+ ## Embedding Models
183
+
184
+ CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
185
+
186
+ | Model | Download | Dim | Quality | Command |
187
+ |-------|----------|-----|---------|---------|
188
+ | hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
189
+ | minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
190
+ | bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
191
+ | jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
192
+ | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
193
+
194
+ The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
195
+
196
+ ```bash
197
+ cg config set-embedding jina-code # switch to a neural model
198
+ cg config show-embedding # view current model and all options
199
+ cg config unset-embedding # reset to hash default
200
+ ```
201
+
202
+ After changing the embedding model, re-index your project:
203
+
204
+ ```bash
205
+ cg index /path/to/project
155
206
  ```
156
207
 
157
208
  ---
@@ -252,8 +303,9 @@ CLI Layer (Typer)
252
303
  | | |
253
304
  | +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
254
305
  | +-- RAGRetriever |
255
- | +-- LLM Adapter +-- Embeddings
256
- |
306
+ | +-- LLM Adapter +-- Embeddings (configurable)
307
+ | hash | minilm | bge-base
308
+ | jina-code | qodo-1.5b
257
309
  +-- ChatAgent (standard mode)
258
310
  |
259
311
  +-- CrewChatAgent (--crew mode)
@@ -264,6 +316,8 @@ CLI Layer (Typer)
264
316
  +-- Code Analysis Agent ---> 3 search/analysis tools
265
317
  ```
266
318
 
319
+ **Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
320
+
267
321
  **Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
268
322
 
269
323
  **Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
@@ -278,14 +332,14 @@ CLI Layer (Typer)
278
332
  codegraph_cli/
279
333
  cli.py # main Typer application, all top-level commands
280
334
  cli_chat.py # interactive chat REPL with styled output
281
- cli_setup.py # setup wizard, set-llm, unset-llm, show-llm
335
+ cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
282
336
  cli_v2.py # v2 code generation commands
283
337
  config.py # loads config from TOML
284
- config_manager.py # TOML read/write, provider validation
338
+ config_manager.py # TOML read/write, provider and embedding config
285
339
  llm.py # multi-provider LLM adapter
286
340
  parser.py # tree-sitter AST parsing
287
341
  storage.py # SQLite graph store
288
- embeddings.py # hash-based embedding model
342
+ embeddings.py # configurable embedding engine (5 models)
289
343
  rag.py # RAG retriever
290
344
  vector_store.py # LanceDB vector store
291
345
  orchestrator.py # coordinates parsing, search, impact
@@ -310,7 +364,7 @@ codegraph_cli/
310
364
  git clone https://github.com/al1-nasir/codegraph-cli.git
311
365
  cd codegraph-cli
312
366
  python -m venv .venv && source .venv/bin/activate
313
- pip install -e ".[dev,crew]"
367
+ pip install -e ".[dev,crew,embeddings]"
314
368
  pytest
315
369
  ```
316
370
 
@@ -4,7 +4,8 @@
4
4
 
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
6
  [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org)
7
- [![Version](https://img.shields.io/badge/version-2.0.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
7
+ [![Version](https://img.shields.io/badge/version-2.1.1-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
8
+ [![CI](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
8
9
 
9
10
  ---
10
11
 
@@ -29,12 +30,24 @@ Core capabilities:
29
30
  pip install codegraph-cli
30
31
  ```
31
32
 
33
+ With neural embedding models (semantic code search):
34
+
35
+ ```bash
36
+ pip install codegraph-cli[embeddings]
37
+ ```
38
+
32
39
  With CrewAI multi-agent support:
33
40
 
34
41
  ```bash
35
42
  pip install codegraph-cli[crew]
36
43
  ```
37
44
 
45
+ Everything:
46
+
47
+ ```bash
48
+ pip install codegraph-cli[all]
49
+ ```
50
+
38
51
  For development:
39
52
 
40
53
  ```bash
@@ -50,15 +63,15 @@ pip install -e ".[dev]"
50
63
  ### 1. Configure your LLM provider
51
64
 
52
65
  ```bash
53
- cg setup
66
+ cg config setup
54
67
  ```
55
68
 
56
69
  This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
57
70
 
58
71
  ```bash
59
- cg set-llm openrouter
60
- cg set-llm groq
61
- cg set-llm ollama
72
+ cg config set-llm openrouter
73
+ cg config set-llm groq
74
+ cg config set-llm ollama
62
75
  ```
63
76
 
64
77
  ### 2. Index a project
@@ -85,18 +98,46 @@ cg chat start --crew # multi-agent mode
85
98
 
86
99
  | Provider | Type | Configuration |
87
100
  |----------|------|---------------|
88
- | Ollama | Local, free | `cg set-llm ollama` |
89
- | Groq | Cloud, free tier | `cg set-llm groq` |
90
- | OpenAI | Cloud | `cg set-llm openai` |
91
- | Anthropic | Cloud | `cg set-llm anthropic` |
92
- | Gemini | Cloud | `cg set-llm gemini` |
93
- | OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
101
+ | Ollama | Local, free | `cg config set-llm ollama` |
102
+ | Groq | Cloud, free tier | `cg config set-llm groq` |
103
+ | OpenAI | Cloud | `cg config set-llm openai` |
104
+ | Anthropic | Cloud | `cg config set-llm anthropic` |
105
+ | Gemini | Cloud | `cg config set-llm gemini` |
106
+ | OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
94
107
 
95
108
  All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
96
109
 
97
110
  ```bash
98
- cg show-llm # view current provider, model, and endpoint
99
- cg unset-llm # reset to defaults
111
+ cg config show-llm # view current provider, model, and endpoint
112
+ cg config unset-llm # reset to defaults
113
+ ```
114
+
115
+ ---
116
+
117
+ ## Embedding Models
118
+
119
+ CodeGraph supports configurable embedding models for semantic code search. Choose based on your hardware and quality needs:
120
+
121
+ | Model | Download | Dim | Quality | Command |
122
+ |-------|----------|-----|---------|---------|
123
+ | hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
124
+ | minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
125
+ | bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
126
+ | jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
127
+ | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
128
+
129
+ The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
130
+
131
+ ```bash
132
+ cg config set-embedding jina-code # switch to a neural model
133
+ cg config show-embedding # view current model and all options
134
+ cg config unset-embedding # reset to hash default
135
+ ```
136
+
137
+ After changing the embedding model, re-index your project:
138
+
139
+ ```bash
140
+ cg index /path/to/project
100
141
  ```
101
142
 
102
143
  ---
@@ -197,8 +238,9 @@ CLI Layer (Typer)
197
238
  | | |
198
239
  | +-- Parser (tree-sitter) +-- VectorStore (LanceDB)
199
240
  | +-- RAGRetriever |
200
- | +-- LLM Adapter +-- Embeddings
201
- |
241
+ | +-- LLM Adapter +-- Embeddings (configurable)
242
+ | hash | minilm | bge-base
243
+ | jina-code | qodo-1.5b
202
244
  +-- ChatAgent (standard mode)
203
245
  |
204
246
  +-- CrewChatAgent (--crew mode)
@@ -209,6 +251,8 @@ CLI Layer (Typer)
209
251
  +-- Code Analysis Agent ---> 3 search/analysis tools
210
252
  ```
211
253
 
254
+ **Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
255
+
212
256
  **Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
213
257
 
214
258
  **Storage**: SQLite for the code graph (nodes + edges), LanceDB for vector embeddings. All data stored under `~/.codegraph/`.
@@ -223,14 +267,14 @@ CLI Layer (Typer)
223
267
  codegraph_cli/
224
268
  cli.py # main Typer application, all top-level commands
225
269
  cli_chat.py # interactive chat REPL with styled output
226
- cli_setup.py # setup wizard, set-llm, unset-llm, show-llm
270
+ cli_setup.py # setup wizard, set-llm, unset-llm, set-embedding
227
271
  cli_v2.py # v2 code generation commands
228
272
  config.py # loads config from TOML
229
- config_manager.py # TOML read/write, provider validation
273
+ config_manager.py # TOML read/write, provider and embedding config
230
274
  llm.py # multi-provider LLM adapter
231
275
  parser.py # tree-sitter AST parsing
232
276
  storage.py # SQLite graph store
233
- embeddings.py # hash-based embedding model
277
+ embeddings.py # configurable embedding engine (5 models)
234
278
  rag.py # RAG retriever
235
279
  vector_store.py # LanceDB vector store
236
280
  orchestrator.py # coordinates parsing, search, impact
@@ -255,7 +299,7 @@ codegraph_cli/
255
299
  git clone https://github.com/al1-nasir/codegraph-cli.git
256
300
  cd codegraph-cli
257
301
  python -m venv .venv && source .venv/bin/activate
258
- pip install -e ".[dev,crew]"
302
+ pip install -e ".[dev,crew,embeddings]"
259
303
  pytest
260
304
  ```
261
305
 
@@ -1,4 +1,4 @@
1
1
  """CodeGraph CLI package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "2.0.1"
4
+ __version__ = "2.1.2"
@@ -2,17 +2,66 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import re
5
6
  from collections import deque
6
7
  from pathlib import Path
7
8
  from typing import Dict, List, Set
8
9
 
9
10
  from .embeddings import HashEmbeddingModel, TransformerEmbedder
10
11
  from .llm import LocalLLM
11
- from .models import ImpactReport
12
+ from .models import ImpactReport, Node
12
13
  from .parser import PythonGraphParser
13
14
  from .rag import RAGRetriever
14
15
  from .storage import GraphStore
15
16
 
17
+ # Regex to strip bare import lines from chunk text
18
+ _IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
19
+
20
+ # Maximum characters to keep for a single chunk's code body.
21
+ # Module-level nodes can be very large; truncating keeps embeddings
22
+ # focused on the symbol's signature + docstring + first N lines.
23
+ _MAX_CHUNK_CODE_CHARS = 1500
24
+
25
+
26
+ def _build_chunk_text(node: Node) -> str:
27
+ """Build structured chunk text for embedding.
28
+
29
+ The text is formatted so that the embedding model captures:
30
+ - **file path** (helps retrieval when users mention filenames)
31
+ - **symbol name + type** (boosts exact-match semantics)
32
+ - **docstring** (captures purpose / intent)
33
+ - **code body** (captures implementation detail)
34
+
35
+ Import lines and decorators-only boilerplate are stripped to
36
+ reduce noise. Module-level nodes are truncated to avoid huge
37
+ embeddings that dilute meaning.
38
+ """
39
+ parts: List[str] = [
40
+ f"file: {node.file_path}",
41
+ f"symbol: {node.qualname}",
42
+ f"type: {node.node_type}",
43
+ ]
44
+
45
+ if node.docstring:
46
+ parts.append(f"doc: {node.docstring.strip()}")
47
+
48
+ # Clean code: strip import lines for non-module nodes
49
+ code = node.code
50
+ if node.node_type != "module":
51
+ code = _IMPORT_RE.sub("", code).strip()
52
+ else:
53
+ # For modules keep only the first N chars to avoid huge chunks
54
+ code = code[:_MAX_CHUNK_CODE_CHARS]
55
+
56
+ # Truncate overly long code
57
+ if len(code) > _MAX_CHUNK_CODE_CHARS:
58
+ code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
59
+
60
+ if code:
61
+ parts.append(code)
62
+
63
+ return "\n".join(parts)
64
+
16
65
 
17
66
  class GraphAgent:
18
67
  """Responsible for parsing projects and maintaining graph memory."""
@@ -31,7 +80,7 @@ class GraphAgent:
31
80
  total_nodes = len(nodes)
32
81
 
33
82
  for idx, node in enumerate(nodes, 1):
34
- text = "\n".join([node.qualname, node.docstring, node.code])
83
+ text = _build_chunk_text(node)
35
84
  emb = self.embedding_model.embed_text(text)
36
85
  node_payload.append((node, emb))
37
86
 
@@ -43,13 +92,20 @@ class GraphAgent:
43
92
  if show_progress:
44
93
  print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
45
94
 
46
- self.store.insert_nodes(node_payload)
95
+ emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
96
+ emb_dim = getattr(self.embedding_model, 'dim', 256)
97
+
98
+ self.store.insert_nodes(node_payload, model_key=emb_model_key)
47
99
  self.store.insert_edges(edges)
100
+
101
+ # Record embedding model info in project metadata
48
102
  self.store.set_metadata(
49
103
  {
50
104
  "project_root": str(project_root),
51
105
  "node_count": len(nodes),
52
106
  "edge_count": len(edges),
107
+ "embedding_model": emb_model_key,
108
+ "embedding_dim": emb_dim,
53
109
  }
54
110
  )
55
111
  return {"nodes": len(nodes), "edges": len(edges)}
@@ -7,7 +7,7 @@ from typing import Optional
7
7
 
8
8
  from .chat_session import SessionManager
9
9
  from .codegen_agent import CodeGenAgent
10
- from .context_manager import assemble_context_for_llm, detect_intent
10
+ from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
11
11
  from .llm import LocalLLM
12
12
  from .models_v2 import ChatSession, CodeProposal
13
13
  from .orchestrator import MCPOrchestrator
@@ -59,11 +59,60 @@ class ChatAgent:
59
59
  self.rag_retriever = rag_retriever
60
60
  self.session_manager = SessionManager()
61
61
 
62
+ # Symbol memory — tracks recently discussed symbols & files
63
+ # so we can skip redundant RAG queries.
64
+ self.symbol_memory = SymbolMemory()
65
+
62
66
  # Initialize specialized agents
63
67
  from .codegen_agent import CodeGenAgent
64
68
  from .refactor_agent import RefactorAgent
65
69
  self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
66
70
  self.refactor_agent = RefactorAgent(context.store)
71
+
72
+ # Build enhanced system prompt with auto-context
73
+ self.system_prompt = self._build_system_prompt()
74
+
75
+ def _build_system_prompt(self) -> str:
76
+ """Build system prompt enriched with project context.
77
+
78
+ Includes project name, source path, indexed file/symbol counts,
79
+ node-type breakdown, and recently modified files so the LLM has
80
+ immediate awareness of the codebase.
81
+ """
82
+ base = SYSTEM_PROMPT
83
+
84
+ try:
85
+ summary = self.context.get_project_summary()
86
+ parts = [
87
+ "\n\nProject Context:",
88
+ f"- Project: {summary.get('project_name', 'unknown')}",
89
+ f"- Source: {summary.get('source_path', 'N/A')}",
90
+ f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
91
+ ]
92
+
93
+ node_types = summary.get("node_types", {})
94
+ if node_types:
95
+ parts.append(
96
+ f"- Breakdown: {node_types.get('function', 0)} functions, "
97
+ f"{node_types.get('class', 0)} classes, "
98
+ f"{node_types.get('module', 0)} modules"
99
+ )
100
+
101
+ # Recently modified files
102
+ if self.context.has_source_access:
103
+ try:
104
+ items = self.context.list_directory(".")
105
+ files = [f for f in items if f["type"] == "file"]
106
+ files.sort(key=lambda f: f.get("modified", ""), reverse=True)
107
+ recent = [f["name"] for f in files[:5]]
108
+ if recent:
109
+ parts.append(f"- Recently modified: {', '.join(recent)}")
110
+ except Exception:
111
+ pass
112
+
113
+ return base + "\n".join(parts)
114
+ except Exception:
115
+ return base
67
116
 
68
117
  def process_message(
69
118
  self,
@@ -72,6 +121,10 @@ class ChatAgent:
72
121
  ) -> str:
73
122
  """Process user message and generate response.
74
123
 
124
+ Note: The caller (REPL) is responsible for adding messages to
125
+ the session. This method does NOT add messages itself to avoid
126
+ duplicate entries.
127
+
75
128
  Args:
76
129
  user_message: User's message
77
130
  session: Current chat session
@@ -79,10 +132,6 @@ class ChatAgent:
79
132
  Returns:
80
133
  Assistant's response
81
134
  """
82
- # Add user message to session
83
- timestamp = datetime.now().isoformat()
84
- session.add_message("user", user_message, timestamp)
85
-
86
135
  # Detect intent
87
136
  intent = detect_intent(user_message)
88
137
 
@@ -103,9 +152,6 @@ class ChatAgent:
103
152
  # General chat - use LLM with RAG context
104
153
  response = self._handle_chat(user_message, session)
105
154
 
106
- # Add assistant response to session
107
- session.add_message("assistant", response, datetime.now().isoformat())
108
-
109
155
  # Save session
110
156
  self.session_manager.save_session(session)
111
157
 
@@ -289,13 +335,14 @@ class ChatAgent:
289
335
 
290
336
  def _handle_chat(self, message: str, session: ChatSession) -> str:
291
337
  """Handle general chat with LLM and RAG context."""
292
- # Assemble context using smart RAG strategy
338
+ # Assemble context using smart RAG strategy + symbol memory
293
339
  context_messages = assemble_context_for_llm(
294
340
  user_message=message,
295
341
  session=session,
296
342
  rag_retriever=self.rag_retriever,
297
- system_prompt=SYSTEM_PROMPT,
298
- max_tokens=8000
343
+ system_prompt=self.system_prompt,
344
+ max_tokens=8000,
345
+ symbol_memory=self.symbol_memory,
299
346
  )
300
347
 
301
348
  # Call LLM