codegraph-cli 2.1.1__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/PKG-INFO +35 -24
  2. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/README.md +23 -22
  3. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/__init__.py +1 -1
  4. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/agents.py +59 -3
  5. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/chat_agent.py +58 -11
  6. codegraph_cli-2.1.2/codegraph_cli/cli.py +851 -0
  7. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_chat.py +200 -95
  8. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_diagnose.py +13 -2
  9. codegraph_cli-2.1.2/codegraph_cli/cli_docs.py +207 -0
  10. codegraph_cli-2.1.2/codegraph_cli/cli_explore.py +1053 -0
  11. codegraph_cli-2.1.2/codegraph_cli/cli_export.py +941 -0
  12. codegraph_cli-2.1.2/codegraph_cli/cli_groups.py +33 -0
  13. codegraph_cli-2.1.2/codegraph_cli/cli_health.py +316 -0
  14. codegraph_cli-2.1.2/codegraph_cli/cli_history.py +213 -0
  15. codegraph_cli-2.1.2/codegraph_cli/cli_onboard.py +380 -0
  16. codegraph_cli-2.1.2/codegraph_cli/cli_quickstart.py +256 -0
  17. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_refactor.py +17 -3
  18. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_setup.py +12 -12
  19. codegraph_cli-2.1.2/codegraph_cli/cli_suggestions.py +90 -0
  20. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_test.py +17 -3
  21. codegraph_cli-2.1.2/codegraph_cli/cli_tui.py +210 -0
  22. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_v2.py +24 -4
  23. codegraph_cli-2.1.2/codegraph_cli/cli_watch.py +158 -0
  24. codegraph_cli-2.1.2/codegraph_cli/cli_workflows.py +255 -0
  25. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/codegen_agent.py +15 -1
  26. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/config.py +18 -5
  27. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/context_manager.py +117 -15
  28. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/crew_agents.py +26 -7
  29. codegraph_cli-2.1.2/codegraph_cli/crew_chat.py +292 -0
  30. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/crew_tools.py +21 -1
  31. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/embeddings.py +95 -5
  32. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/llm.py +42 -55
  33. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/project_context.py +64 -1
  34. codegraph_cli-2.1.2/codegraph_cli/rag.py +463 -0
  35. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/storage.py +310 -14
  36. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/vector_store.py +110 -8
  37. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/PKG-INFO +35 -24
  38. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/SOURCES.txt +13 -0
  39. codegraph_cli-2.1.2/codegraph_cli.egg-info/entry_points.txt +2 -0
  40. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/requires.txt +13 -1
  41. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/pyproject.toml +15 -3
  42. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_cli.py +47 -47
  43. codegraph_cli-2.1.2/tests/test_cli_workflows.py +242 -0
  44. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_vector_store.py +3 -3
  45. codegraph_cli-2.1.1/codegraph_cli/cli.py +0 -336
  46. codegraph_cli-2.1.1/codegraph_cli/crew_chat.py +0 -163
  47. codegraph_cli-2.1.1/codegraph_cli/rag.py +0 -200
  48. codegraph_cli-2.1.1/codegraph_cli.egg-info/entry_points.txt +0 -2
  49. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/LICENSE +0 -0
  50. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/bug_detector.py +0 -0
  51. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/chat_session.py +0 -0
  52. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/config_manager.py +0 -0
  53. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/diff_engine.py +0 -0
  54. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/graph_export.py +0 -0
  55. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/models.py +0 -0
  56. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/models_v2.py +0 -0
  57. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/orchestrator.py +0 -0
  58. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/parser.py +0 -0
  59. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/performance_analyzer.py +0 -0
  60. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/refactor_agent.py +0 -0
  61. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/security_scanner.py +0 -0
  62. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/templates/graph_interactive.html +0 -0
  63. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/testgen_agent.py +0 -0
  64. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/validation_engine.py +0 -0
  65. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/dependency_links.txt +0 -0
  66. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/top_level.txt +0 -0
  67. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/setup.cfg +0 -0
  68. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_agents.py +0 -0
  69. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_bug_detector.py +0 -0
  70. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_parser.py +0 -0
  71. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_security_scanner.py +0 -0
  72. {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codegraph-cli
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
5
5
  Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
6
6
  License: MIT
@@ -35,22 +35,32 @@ Requires-Dist: tree-sitter>=0.24.0
35
35
  Requires-Dist: tree-sitter-python>=0.23.0
36
36
  Requires-Dist: tree-sitter-javascript>=0.23.0
37
37
  Requires-Dist: tree-sitter-typescript>=0.23.0
38
- Requires-Dist: litellm>=1.30.0
38
+ Requires-Dist: rich>=13.0.0
39
+ Requires-Dist: python-docx>=1.0.0
40
+ Requires-Dist: pydantic>=2.0.0
39
41
  Provides-Extra: crew
40
42
  Requires-Dist: crewai>=0.80.0; extra == "crew"
43
+ Provides-Extra: explore
44
+ Requires-Dist: starlette>=0.27.0; extra == "explore"
45
+ Requires-Dist: uvicorn>=0.24.0; extra == "explore"
41
46
  Provides-Extra: dev
42
47
  Requires-Dist: pytest>=7.4.0; extra == "dev"
43
48
  Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
44
49
  Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
45
50
  Requires-Dist: build>=1.0.0; extra == "dev"
46
51
  Requires-Dist: twine>=5.0.0; extra == "dev"
52
+ Provides-Extra: watch
53
+ Requires-Dist: watchdog>=3.0.0; extra == "watch"
47
54
  Provides-Extra: embeddings
48
55
  Requires-Dist: torch>=2.0.0; extra == "embeddings"
49
56
  Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
50
57
  Provides-Extra: all
51
58
  Requires-Dist: crewai>=0.80.0; extra == "all"
59
+ Requires-Dist: starlette>=0.27.0; extra == "all"
60
+ Requires-Dist: uvicorn>=0.24.0; extra == "all"
52
61
  Requires-Dist: torch>=2.0.0; extra == "all"
53
62
  Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
63
+ Requires-Dist: watchdog>=3.0.0; extra == "all"
54
64
  Dynamic: license-file
55
65
 
56
66
  # CodeGraph CLI
@@ -59,7 +69,8 @@ Dynamic: license-file
59
69
 
60
70
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
61
71
  [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org)
62
- [![Version](https://img.shields.io/badge/version-2.1.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
72
+ [![Version](https://img.shields.io/badge/version-2.1.1-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
73
+ [![CI](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
63
74
 
64
75
  ---
65
76
 
@@ -117,15 +128,15 @@ pip install -e ".[dev]"
117
128
  ### 1. Configure your LLM provider
118
129
 
119
130
  ```bash
120
- cg setup
131
+ cg config setup
121
132
  ```
122
133
 
123
134
  This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
124
135
 
125
136
  ```bash
126
- cg set-llm openrouter
127
- cg set-llm groq
128
- cg set-llm ollama
137
+ cg config set-llm openrouter
138
+ cg config set-llm groq
139
+ cg config set-llm ollama
129
140
  ```
130
141
 
131
142
  ### 2. Index a project
@@ -152,18 +163,18 @@ cg chat start --crew # multi-agent mode
152
163
 
153
164
  | Provider | Type | Configuration |
154
165
  |----------|------|---------------|
155
- | Ollama | Local, free | `cg set-llm ollama` |
156
- | Groq | Cloud, free tier | `cg set-llm groq` |
157
- | OpenAI | Cloud | `cg set-llm openai` |
158
- | Anthropic | Cloud | `cg set-llm anthropic` |
159
- | Gemini | Cloud | `cg set-llm gemini` |
160
- | OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
166
+ | Ollama | Local, free | `cg config set-llm ollama` |
167
+ | Groq | Cloud, free tier | `cg config set-llm groq` |
168
+ | OpenAI | Cloud | `cg config set-llm openai` |
169
+ | Anthropic | Cloud | `cg config set-llm anthropic` |
170
+ | Gemini | Cloud | `cg config set-llm gemini` |
171
+ | OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
161
172
 
162
173
  All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
163
174
 
164
175
  ```bash
165
- cg show-llm # view current provider, model, and endpoint
166
- cg unset-llm # reset to defaults
176
+ cg config show-llm # view current provider, model, and endpoint
177
+ cg config unset-llm # reset to defaults
167
178
  ```
168
179
 
169
180
  ---
@@ -174,18 +185,18 @@ CodeGraph supports configurable embedding models for semantic code search. Choos
174
185
 
175
186
  | Model | Download | Dim | Quality | Command |
176
187
  |-------|----------|-----|---------|---------|
177
- | hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
178
- | minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
179
- | bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
180
- | jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
181
- | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
188
+ | hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
189
+ | minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
190
+ | bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
191
+ | jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
192
+ | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
182
193
 
183
194
  The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
184
195
 
185
196
  ```bash
186
- cg set-embedding jina-code # switch to a neural model
187
- cg show-embedding # view current model and all options
188
- cg unset-embedding # reset to hash default
197
+ cg config set-embedding jina-code # switch to a neural model
198
+ cg config show-embedding # view current model and all options
199
+ cg config unset-embedding # reset to hash default
189
200
  ```
190
201
 
191
202
  After changing the embedding model, re-index your project:
@@ -305,7 +316,7 @@ CLI Layer (Typer)
305
316
  +-- Code Analysis Agent ---> 3 search/analysis tools
306
317
  ```
307
318
 
308
- **Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
319
+ **Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
309
320
 
310
321
  **Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
311
322
 
@@ -4,7 +4,8 @@
4
4
 
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
6
  [![Python 3.9+](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org)
7
- [![Version](https://img.shields.io/badge/version-2.1.0-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
7
+ [![Version](https://img.shields.io/badge/version-2.1.1-blue.svg)](https://github.com/al1-nasir/codegraph-cli)
8
+ [![CI](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml/badge.svg)](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
8
9
 
9
10
  ---
10
11
 
@@ -62,15 +63,15 @@ pip install -e ".[dev]"
62
63
  ### 1. Configure your LLM provider
63
64
 
64
65
  ```bash
65
- cg setup
66
+ cg config setup
66
67
  ```
67
68
 
68
69
  This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
69
70
 
70
71
  ```bash
71
- cg set-llm openrouter
72
- cg set-llm groq
73
- cg set-llm ollama
72
+ cg config set-llm openrouter
73
+ cg config set-llm groq
74
+ cg config set-llm ollama
74
75
  ```
75
76
 
76
77
  ### 2. Index a project
@@ -97,18 +98,18 @@ cg chat start --crew # multi-agent mode
97
98
 
98
99
  | Provider | Type | Configuration |
99
100
  |----------|------|---------------|
100
- | Ollama | Local, free | `cg set-llm ollama` |
101
- | Groq | Cloud, free tier | `cg set-llm groq` |
102
- | OpenAI | Cloud | `cg set-llm openai` |
103
- | Anthropic | Cloud | `cg set-llm anthropic` |
104
- | Gemini | Cloud | `cg set-llm gemini` |
105
- | OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
101
+ | Ollama | Local, free | `cg config set-llm ollama` |
102
+ | Groq | Cloud, free tier | `cg config set-llm groq` |
103
+ | OpenAI | Cloud | `cg config set-llm openai` |
104
+ | Anthropic | Cloud | `cg config set-llm anthropic` |
105
+ | Gemini | Cloud | `cg config set-llm gemini` |
106
+ | OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
106
107
 
107
108
  All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
108
109
 
109
110
  ```bash
110
- cg show-llm # view current provider, model, and endpoint
111
- cg unset-llm # reset to defaults
111
+ cg config show-llm # view current provider, model, and endpoint
112
+ cg config unset-llm # reset to defaults
112
113
  ```
113
114
 
114
115
  ---
@@ -119,18 +120,18 @@ CodeGraph supports configurable embedding models for semantic code search. Choos
119
120
 
120
121
  | Model | Download | Dim | Quality | Command |
121
122
  |-------|----------|-----|---------|---------|
122
- | hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
123
- | minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
124
- | bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
125
- | jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
126
- | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
123
+ | hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
124
+ | minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
125
+ | bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
126
+ | jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
127
+ | qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
127
128
 
128
129
  The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
129
130
 
130
131
  ```bash
131
- cg set-embedding jina-code # switch to a neural model
132
- cg show-embedding # view current model and all options
133
- cg unset-embedding # reset to hash default
132
+ cg config set-embedding jina-code # switch to a neural model
133
+ cg config show-embedding # view current model and all options
134
+ cg config unset-embedding # reset to hash default
134
135
  ```
135
136
 
136
137
  After changing the embedding model, re-index your project:
@@ -250,7 +251,7 @@ CLI Layer (Typer)
250
251
  +-- Code Analysis Agent ---> 3 search/analysis tools
251
252
  ```
252
253
 
253
- **Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
254
+ **Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
254
255
 
255
256
  **Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
256
257
 
@@ -1,4 +1,4 @@
1
1
  """CodeGraph CLI package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "2.0.1"
4
+ __version__ = "2.1.2"
@@ -2,17 +2,66 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import re
5
6
  from collections import deque
6
7
  from pathlib import Path
7
8
  from typing import Dict, List, Set
8
9
 
9
10
  from .embeddings import HashEmbeddingModel, TransformerEmbedder
10
11
  from .llm import LocalLLM
11
- from .models import ImpactReport
12
+ from .models import ImpactReport, Node
12
13
  from .parser import PythonGraphParser
13
14
  from .rag import RAGRetriever
14
15
  from .storage import GraphStore
15
16
 
17
+ # Regex to strip bare import lines from chunk text
18
+ _IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
19
+
20
+ # Maximum characters to keep for a single chunk's code body.
21
+ # Module-level nodes can be very large; truncating keeps embeddings
22
+ # focused on the symbol's signature + docstring + first N lines.
23
+ _MAX_CHUNK_CODE_CHARS = 1500
24
+
25
+
26
+ def _build_chunk_text(node: Node) -> str:
27
+ """Build structured chunk text for embedding.
28
+
29
+ The text is formatted so that the embedding model captures:
30
+ - **file path** (helps retrieval when users mention filenames)
31
+ - **symbol name + type** (boosts exact-match semantics)
32
+ - **docstring** (captures purpose / intent)
33
+ - **code body** (captures implementation detail)
34
+
35
+ Import lines and decorators-only boilerplate are stripped to
36
+ reduce noise. Module-level nodes are truncated to avoid huge
37
+ embeddings that dilute meaning.
38
+ """
39
+ parts: List[str] = [
40
+ f"file: {node.file_path}",
41
+ f"symbol: {node.qualname}",
42
+ f"type: {node.node_type}",
43
+ ]
44
+
45
+ if node.docstring:
46
+ parts.append(f"doc: {node.docstring.strip()}")
47
+
48
+ # Clean code: strip import lines for non-module nodes
49
+ code = node.code
50
+ if node.node_type != "module":
51
+ code = _IMPORT_RE.sub("", code).strip()
52
+ else:
53
+ # For modules keep only the first N chars to avoid huge chunks
54
+ code = code[:_MAX_CHUNK_CODE_CHARS]
55
+
56
+ # Truncate overly long code
57
+ if len(code) > _MAX_CHUNK_CODE_CHARS:
58
+ code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
59
+
60
+ if code:
61
+ parts.append(code)
62
+
63
+ return "\n".join(parts)
64
+
16
65
 
17
66
  class GraphAgent:
18
67
  """Responsible for parsing projects and maintaining graph memory."""
@@ -31,7 +80,7 @@ class GraphAgent:
31
80
  total_nodes = len(nodes)
32
81
 
33
82
  for idx, node in enumerate(nodes, 1):
34
- text = "\n".join([node.qualname, node.docstring, node.code])
83
+ text = _build_chunk_text(node)
35
84
  emb = self.embedding_model.embed_text(text)
36
85
  node_payload.append((node, emb))
37
86
 
@@ -43,13 +92,20 @@ class GraphAgent:
43
92
  if show_progress:
44
93
  print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
45
94
 
46
- self.store.insert_nodes(node_payload)
95
+ emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
96
+ emb_dim = getattr(self.embedding_model, 'dim', 256)
97
+
98
+ self.store.insert_nodes(node_payload, model_key=emb_model_key)
47
99
  self.store.insert_edges(edges)
100
+
101
+ # Record embedding model info in project metadata
48
102
  self.store.set_metadata(
49
103
  {
50
104
  "project_root": str(project_root),
51
105
  "node_count": len(nodes),
52
106
  "edge_count": len(edges),
107
+ "embedding_model": emb_model_key,
108
+ "embedding_dim": emb_dim,
53
109
  }
54
110
  )
55
111
  return {"nodes": len(nodes), "edges": len(edges)}
@@ -7,7 +7,7 @@ from typing import Optional
7
7
 
8
8
  from .chat_session import SessionManager
9
9
  from .codegen_agent import CodeGenAgent
10
- from .context_manager import assemble_context_for_llm, detect_intent
10
+ from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
11
11
  from .llm import LocalLLM
12
12
  from .models_v2 import ChatSession, CodeProposal
13
13
  from .orchestrator import MCPOrchestrator
@@ -59,11 +59,60 @@ class ChatAgent:
59
59
  self.rag_retriever = rag_retriever
60
60
  self.session_manager = SessionManager()
61
61
 
62
+ # Symbol memory — tracks recently discussed symbols & files
63
+ # so we can skip redundant RAG queries.
64
+ self.symbol_memory = SymbolMemory()
65
+
62
66
  # Initialize specialized agents
63
67
  from .codegen_agent import CodeGenAgent
64
68
  from .refactor_agent import RefactorAgent
65
69
  self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
66
70
  self.refactor_agent = RefactorAgent(context.store)
71
+
72
+ # Build enhanced system prompt with auto-context
73
+ self.system_prompt = self._build_system_prompt()
74
+
75
+ def _build_system_prompt(self) -> str:
76
+ """Build system prompt enriched with project context.
77
+
78
+ Includes project name, source path, indexed file/symbol counts,
79
+ node-type breakdown, and recently modified files so the LLM has
80
+ immediate awareness of the codebase.
81
+ """
82
+ base = SYSTEM_PROMPT
83
+
84
+ try:
85
+ summary = self.context.get_project_summary()
86
+ parts = [
87
+ "\n\nProject Context:",
88
+ f"- Project: {summary.get('project_name', 'unknown')}",
89
+ f"- Source: {summary.get('source_path', 'N/A')}",
90
+ f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
91
+ ]
92
+
93
+ node_types = summary.get("node_types", {})
94
+ if node_types:
95
+ parts.append(
96
+ f"- Breakdown: {node_types.get('function', 0)} functions, "
97
+ f"{node_types.get('class', 0)} classes, "
98
+ f"{node_types.get('module', 0)} modules"
99
+ )
100
+
101
+ # Recently modified files
102
+ if self.context.has_source_access:
103
+ try:
104
+ items = self.context.list_directory(".")
105
+ files = [f for f in items if f["type"] == "file"]
106
+ files.sort(key=lambda f: f.get("modified", ""), reverse=True)
107
+ recent = [f["name"] for f in files[:5]]
108
+ if recent:
109
+ parts.append(f"- Recently modified: {', '.join(recent)}")
110
+ except Exception:
111
+ pass
112
+
113
+ return base + "\n".join(parts)
114
+ except Exception:
115
+ return base
67
116
 
68
117
  def process_message(
69
118
  self,
@@ -72,6 +121,10 @@ class ChatAgent:
72
121
  ) -> str:
73
122
  """Process user message and generate response.
74
123
 
124
+ Note: The caller (REPL) is responsible for adding messages to
125
+ the session. This method does NOT add messages itself to avoid
126
+ duplicate entries.
127
+
75
128
  Args:
76
129
  user_message: User's message
77
130
  session: Current chat session
@@ -79,10 +132,6 @@ class ChatAgent:
79
132
  Returns:
80
133
  Assistant's response
81
134
  """
82
- # Add user message to session
83
- timestamp = datetime.now().isoformat()
84
- session.add_message("user", user_message, timestamp)
85
-
86
135
  # Detect intent
87
136
  intent = detect_intent(user_message)
88
137
 
@@ -103,9 +152,6 @@ class ChatAgent:
103
152
  # General chat - use LLM with RAG context
104
153
  response = self._handle_chat(user_message, session)
105
154
 
106
- # Add assistant response to session
107
- session.add_message("assistant", response, datetime.now().isoformat())
108
-
109
155
  # Save session
110
156
  self.session_manager.save_session(session)
111
157
 
@@ -289,13 +335,14 @@ class ChatAgent:
289
335
 
290
336
  def _handle_chat(self, message: str, session: ChatSession) -> str:
291
337
  """Handle general chat with LLM and RAG context."""
292
- # Assemble context using smart RAG strategy
338
+ # Assemble context using smart RAG strategy + symbol memory
293
339
  context_messages = assemble_context_for_llm(
294
340
  user_message=message,
295
341
  session=session,
296
342
  rag_retriever=self.rag_retriever,
297
- system_prompt=SYSTEM_PROMPT,
298
- max_tokens=8000
343
+ system_prompt=self.system_prompt,
344
+ max_tokens=8000,
345
+ symbol_memory=self.symbol_memory,
299
346
  )
300
347
 
301
348
  # Call LLM