codegraph-cli 2.1.1__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/PKG-INFO +35 -24
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/README.md +23 -22
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/__init__.py +1 -1
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/agents.py +59 -3
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/chat_agent.py +58 -11
- codegraph_cli-2.1.2/codegraph_cli/cli.py +851 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_chat.py +200 -95
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_diagnose.py +13 -2
- codegraph_cli-2.1.2/codegraph_cli/cli_docs.py +207 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_explore.py +1053 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_export.py +941 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_groups.py +33 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_health.py +316 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_history.py +213 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_onboard.py +380 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_quickstart.py +256 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_refactor.py +17 -3
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_setup.py +12 -12
- codegraph_cli-2.1.2/codegraph_cli/cli_suggestions.py +90 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_test.py +17 -3
- codegraph_cli-2.1.2/codegraph_cli/cli_tui.py +210 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/cli_v2.py +24 -4
- codegraph_cli-2.1.2/codegraph_cli/cli_watch.py +158 -0
- codegraph_cli-2.1.2/codegraph_cli/cli_workflows.py +255 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/codegen_agent.py +15 -1
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/config.py +18 -5
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/context_manager.py +117 -15
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/crew_agents.py +26 -7
- codegraph_cli-2.1.2/codegraph_cli/crew_chat.py +292 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/crew_tools.py +21 -1
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/embeddings.py +95 -5
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/llm.py +42 -55
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/project_context.py +64 -1
- codegraph_cli-2.1.2/codegraph_cli/rag.py +463 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/storage.py +310 -14
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/vector_store.py +110 -8
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/PKG-INFO +35 -24
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/SOURCES.txt +13 -0
- codegraph_cli-2.1.2/codegraph_cli.egg-info/entry_points.txt +2 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/requires.txt +13 -1
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/pyproject.toml +15 -3
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_cli.py +47 -47
- codegraph_cli-2.1.2/tests/test_cli_workflows.py +242 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_vector_store.py +3 -3
- codegraph_cli-2.1.1/codegraph_cli/cli.py +0 -336
- codegraph_cli-2.1.1/codegraph_cli/crew_chat.py +0 -163
- codegraph_cli-2.1.1/codegraph_cli/rag.py +0 -200
- codegraph_cli-2.1.1/codegraph_cli.egg-info/entry_points.txt +0 -2
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/LICENSE +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/bug_detector.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/chat_session.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/config_manager.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/diff_engine.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/graph_export.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/models.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/models_v2.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/orchestrator.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/parser.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/performance_analyzer.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/refactor_agent.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/security_scanner.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/templates/graph_interactive.html +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/testgen_agent.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli/validation_engine.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/dependency_links.txt +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/codegraph_cli.egg-info/top_level.txt +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/setup.cfg +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_agents.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_bug_detector.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_parser.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_security_scanner.py +0 -0
- {codegraph_cli-2.1.1 → codegraph_cli-2.1.2}/tests/test_storage.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codegraph-cli
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: AI-powered code intelligence CLI with multi-agent analysis, impact graphs, and conversational coding.
|
|
5
5
|
Author-email: Ali Nasir <muhammadalinasir00786@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -35,22 +35,32 @@ Requires-Dist: tree-sitter>=0.24.0
|
|
|
35
35
|
Requires-Dist: tree-sitter-python>=0.23.0
|
|
36
36
|
Requires-Dist: tree-sitter-javascript>=0.23.0
|
|
37
37
|
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
38
|
-
Requires-Dist:
|
|
38
|
+
Requires-Dist: rich>=13.0.0
|
|
39
|
+
Requires-Dist: python-docx>=1.0.0
|
|
40
|
+
Requires-Dist: pydantic>=2.0.0
|
|
39
41
|
Provides-Extra: crew
|
|
40
42
|
Requires-Dist: crewai>=0.80.0; extra == "crew"
|
|
43
|
+
Provides-Extra: explore
|
|
44
|
+
Requires-Dist: starlette>=0.27.0; extra == "explore"
|
|
45
|
+
Requires-Dist: uvicorn>=0.24.0; extra == "explore"
|
|
41
46
|
Provides-Extra: dev
|
|
42
47
|
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
43
48
|
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
44
49
|
Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
|
|
45
50
|
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
46
51
|
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
52
|
+
Provides-Extra: watch
|
|
53
|
+
Requires-Dist: watchdog>=3.0.0; extra == "watch"
|
|
47
54
|
Provides-Extra: embeddings
|
|
48
55
|
Requires-Dist: torch>=2.0.0; extra == "embeddings"
|
|
49
56
|
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "embeddings"
|
|
50
57
|
Provides-Extra: all
|
|
51
58
|
Requires-Dist: crewai>=0.80.0; extra == "all"
|
|
59
|
+
Requires-Dist: starlette>=0.27.0; extra == "all"
|
|
60
|
+
Requires-Dist: uvicorn>=0.24.0; extra == "all"
|
|
52
61
|
Requires-Dist: torch>=2.0.0; extra == "all"
|
|
53
62
|
Requires-Dist: transformers<5.0.0,>=4.48.0; extra == "all"
|
|
63
|
+
Requires-Dist: watchdog>=3.0.0; extra == "all"
|
|
54
64
|
Dynamic: license-file
|
|
55
65
|
|
|
56
66
|
# CodeGraph CLI
|
|
@@ -59,7 +69,8 @@ Dynamic: license-file
|
|
|
59
69
|
|
|
60
70
|
[](LICENSE)
|
|
61
71
|
[](https://www.python.org)
|
|
62
|
-
[](https://github.com/al1-nasir/codegraph-cli)
|
|
73
|
+
[](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
|
|
63
74
|
|
|
64
75
|
---
|
|
65
76
|
|
|
@@ -117,15 +128,15 @@ pip install -e ".[dev]"
|
|
|
117
128
|
### 1. Configure your LLM provider
|
|
118
129
|
|
|
119
130
|
```bash
|
|
120
|
-
cg setup
|
|
131
|
+
cg config setup
|
|
121
132
|
```
|
|
122
133
|
|
|
123
134
|
This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
|
|
124
135
|
|
|
125
136
|
```bash
|
|
126
|
-
cg set-llm openrouter
|
|
127
|
-
cg set-llm groq
|
|
128
|
-
cg set-llm ollama
|
|
137
|
+
cg config set-llm openrouter
|
|
138
|
+
cg config set-llm groq
|
|
139
|
+
cg config set-llm ollama
|
|
129
140
|
```
|
|
130
141
|
|
|
131
142
|
### 2. Index a project
|
|
@@ -152,18 +163,18 @@ cg chat start --crew # multi-agent mode
|
|
|
152
163
|
|
|
153
164
|
| Provider | Type | Configuration |
|
|
154
165
|
|----------|------|---------------|
|
|
155
|
-
| Ollama | Local, free | `cg set-llm ollama` |
|
|
156
|
-
| Groq | Cloud, free tier | `cg set-llm groq` |
|
|
157
|
-
| OpenAI | Cloud | `cg set-llm openai` |
|
|
158
|
-
| Anthropic | Cloud | `cg set-llm anthropic` |
|
|
159
|
-
| Gemini | Cloud | `cg set-llm gemini` |
|
|
160
|
-
| OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
|
|
166
|
+
| Ollama | Local, free | `cg config set-llm ollama` |
|
|
167
|
+
| Groq | Cloud, free tier | `cg config set-llm groq` |
|
|
168
|
+
| OpenAI | Cloud | `cg config set-llm openai` |
|
|
169
|
+
| Anthropic | Cloud | `cg config set-llm anthropic` |
|
|
170
|
+
| Gemini | Cloud | `cg config set-llm gemini` |
|
|
171
|
+
| OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
|
|
161
172
|
|
|
162
173
|
All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
|
|
163
174
|
|
|
164
175
|
```bash
|
|
165
|
-
cg show-llm # view current provider, model, and endpoint
|
|
166
|
-
cg unset-llm # reset to defaults
|
|
176
|
+
cg config show-llm # view current provider, model, and endpoint
|
|
177
|
+
cg config unset-llm # reset to defaults
|
|
167
178
|
```
|
|
168
179
|
|
|
169
180
|
---
|
|
@@ -174,18 +185,18 @@ CodeGraph supports configurable embedding models for semantic code search. Choos
|
|
|
174
185
|
|
|
175
186
|
| Model | Download | Dim | Quality | Command |
|
|
176
187
|
|-------|----------|-----|---------|---------|
|
|
177
|
-
| hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
|
|
178
|
-
| minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
|
|
179
|
-
| bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
|
|
180
|
-
| jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
|
|
181
|
-
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
|
|
188
|
+
| hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
|
|
189
|
+
| minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
|
|
190
|
+
| bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
|
|
191
|
+
| jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
|
|
192
|
+
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
|
|
182
193
|
|
|
183
194
|
The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
|
|
184
195
|
|
|
185
196
|
```bash
|
|
186
|
-
cg set-embedding jina-code # switch to a neural model
|
|
187
|
-
cg show-embedding # view current model and all options
|
|
188
|
-
cg unset-embedding # reset to hash default
|
|
197
|
+
cg config set-embedding jina-code # switch to a neural model
|
|
198
|
+
cg config show-embedding # view current model and all options
|
|
199
|
+
cg config unset-embedding # reset to hash default
|
|
189
200
|
```
|
|
190
201
|
|
|
191
202
|
After changing the embedding model, re-index your project:
|
|
@@ -305,7 +316,7 @@ CLI Layer (Typer)
|
|
|
305
316
|
+-- Code Analysis Agent ---> 3 search/analysis tools
|
|
306
317
|
```
|
|
307
318
|
|
|
308
|
-
**Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
319
|
+
**Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
309
320
|
|
|
310
321
|
**Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
|
|
311
322
|
|
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://www.python.org)
|
|
7
|
-
[](https://github.com/al1-nasir/codegraph-cli)
|
|
8
|
+
[](https://github.com/al1-nasir/codegraph-cli/actions/workflows/ci.yml)
|
|
8
9
|
|
|
9
10
|
---
|
|
10
11
|
|
|
@@ -62,15 +63,15 @@ pip install -e ".[dev]"
|
|
|
62
63
|
### 1. Configure your LLM provider
|
|
63
64
|
|
|
64
65
|
```bash
|
|
65
|
-
cg setup
|
|
66
|
+
cg config setup
|
|
66
67
|
```
|
|
67
68
|
|
|
68
69
|
This runs an interactive wizard that writes configuration to `~/.codegraph/config.toml`. Alternatively, switch providers directly:
|
|
69
70
|
|
|
70
71
|
```bash
|
|
71
|
-
cg set-llm openrouter
|
|
72
|
-
cg set-llm groq
|
|
73
|
-
cg set-llm ollama
|
|
72
|
+
cg config set-llm openrouter
|
|
73
|
+
cg config set-llm groq
|
|
74
|
+
cg config set-llm ollama
|
|
74
75
|
```
|
|
75
76
|
|
|
76
77
|
### 2. Index a project
|
|
@@ -97,18 +98,18 @@ cg chat start --crew # multi-agent mode
|
|
|
97
98
|
|
|
98
99
|
| Provider | Type | Configuration |
|
|
99
100
|
|----------|------|---------------|
|
|
100
|
-
| Ollama | Local, free | `cg set-llm ollama` |
|
|
101
|
-
| Groq | Cloud, free tier | `cg set-llm groq` |
|
|
102
|
-
| OpenAI | Cloud | `cg set-llm openai` |
|
|
103
|
-
| Anthropic | Cloud | `cg set-llm anthropic` |
|
|
104
|
-
| Gemini | Cloud | `cg set-llm gemini` |
|
|
105
|
-
| OpenRouter | Cloud, multi-model | `cg set-llm openrouter` |
|
|
101
|
+
| Ollama | Local, free | `cg config set-llm ollama` |
|
|
102
|
+
| Groq | Cloud, free tier | `cg config set-llm groq` |
|
|
103
|
+
| OpenAI | Cloud | `cg config set-llm openai` |
|
|
104
|
+
| Anthropic | Cloud | `cg config set-llm anthropic` |
|
|
105
|
+
| Gemini | Cloud | `cg config set-llm gemini` |
|
|
106
|
+
| OpenRouter | Cloud, multi-model | `cg config set-llm openrouter` |
|
|
106
107
|
|
|
107
108
|
All configuration is stored in `~/.codegraph/config.toml`. No environment variables required.
|
|
108
109
|
|
|
109
110
|
```bash
|
|
110
|
-
cg show-llm # view current provider, model, and endpoint
|
|
111
|
-
cg unset-llm # reset to defaults
|
|
111
|
+
cg config show-llm # view current provider, model, and endpoint
|
|
112
|
+
cg config unset-llm # reset to defaults
|
|
112
113
|
```
|
|
113
114
|
|
|
114
115
|
---
|
|
@@ -119,18 +120,18 @@ CodeGraph supports configurable embedding models for semantic code search. Choos
|
|
|
119
120
|
|
|
120
121
|
| Model | Download | Dim | Quality | Command |
|
|
121
122
|
|-------|----------|-----|---------|---------|
|
|
122
|
-
| hash | 0 bytes | 256 | Keyword-only | `cg set-embedding hash` |
|
|
123
|
-
| minilm | ~80 MB | 384 | Decent | `cg set-embedding minilm` |
|
|
124
|
-
| bge-base | ~440 MB | 768 | Good | `cg set-embedding bge-base` |
|
|
125
|
-
| jina-code | ~550 MB | 768 | Code-aware | `cg set-embedding jina-code` |
|
|
126
|
-
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg set-embedding qodo-1.5b` |
|
|
123
|
+
| hash | 0 bytes | 256 | Keyword-only | `cg config set-embedding hash` |
|
|
124
|
+
| minilm | ~80 MB | 384 | Decent | `cg config set-embedding minilm` |
|
|
125
|
+
| bge-base | ~440 MB | 768 | Good | `cg config set-embedding bge-base` |
|
|
126
|
+
| jina-code | ~550 MB | 768 | Code-aware | `cg config set-embedding jina-code` |
|
|
127
|
+
| qodo-1.5b | ~6.2 GB | 1536 | Best | `cg config set-embedding qodo-1.5b` |
|
|
127
128
|
|
|
128
129
|
The default is `hash` (zero-dependency, no download). Neural models require the `[embeddings]` extra and are downloaded on first use from HuggingFace.
|
|
129
130
|
|
|
130
131
|
```bash
|
|
131
|
-
cg set-embedding jina-code # switch to a neural model
|
|
132
|
-
cg show-embedding # view current model and all options
|
|
133
|
-
cg unset-embedding # reset to hash default
|
|
132
|
+
cg config set-embedding jina-code # switch to a neural model
|
|
133
|
+
cg config show-embedding # view current model and all options
|
|
134
|
+
cg config unset-embedding # reset to hash default
|
|
134
135
|
```
|
|
135
136
|
|
|
136
137
|
After changing the embedding model, re-index your project:
|
|
@@ -250,7 +251,7 @@ CLI Layer (Typer)
|
|
|
250
251
|
+-- Code Analysis Agent ---> 3 search/analysis tools
|
|
251
252
|
```
|
|
252
253
|
|
|
253
|
-
**Embeddings**: Five models available via `cg set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
254
|
+
**Embeddings**: Five models available via `cg config set-embedding`. Hash (default, zero-dependency) through Qodo-Embed-1-1.5B (best quality, 6 GB). Neural models use raw `transformers` + `torch` — no sentence-transformers overhead. Models are cached in `~/.codegraph/models/`.
|
|
254
255
|
|
|
255
256
|
**Parser**: tree-sitter grammars for Python, JavaScript, and TypeScript. Extracts modules, classes, functions, imports, and call relationships into a directed graph.
|
|
256
257
|
|
|
@@ -2,17 +2,66 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import re
|
|
5
6
|
from collections import deque
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Dict, List, Set
|
|
8
9
|
|
|
9
10
|
from .embeddings import HashEmbeddingModel, TransformerEmbedder
|
|
10
11
|
from .llm import LocalLLM
|
|
11
|
-
from .models import ImpactReport
|
|
12
|
+
from .models import ImpactReport, Node
|
|
12
13
|
from .parser import PythonGraphParser
|
|
13
14
|
from .rag import RAGRetriever
|
|
14
15
|
from .storage import GraphStore
|
|
15
16
|
|
|
17
|
+
# Regex to strip bare import lines from chunk text
|
|
18
|
+
_IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
|
|
19
|
+
|
|
20
|
+
# Maximum characters to keep for a single chunk's code body.
|
|
21
|
+
# Module-level nodes can be very large; truncating keeps embeddings
|
|
22
|
+
# focused on the symbol's signature + docstring + first N lines.
|
|
23
|
+
_MAX_CHUNK_CODE_CHARS = 1500
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _build_chunk_text(node: Node) -> str:
|
|
27
|
+
"""Build structured chunk text for embedding.
|
|
28
|
+
|
|
29
|
+
The text is formatted so that the embedding model captures:
|
|
30
|
+
- **file path** (helps retrieval when users mention filenames)
|
|
31
|
+
- **symbol name + type** (boosts exact-match semantics)
|
|
32
|
+
- **docstring** (captures purpose / intent)
|
|
33
|
+
- **code body** (captures implementation detail)
|
|
34
|
+
|
|
35
|
+
Import lines and decorators-only boilerplate are stripped to
|
|
36
|
+
reduce noise. Module-level nodes are truncated to avoid huge
|
|
37
|
+
embeddings that dilute meaning.
|
|
38
|
+
"""
|
|
39
|
+
parts: List[str] = [
|
|
40
|
+
f"file: {node.file_path}",
|
|
41
|
+
f"symbol: {node.qualname}",
|
|
42
|
+
f"type: {node.node_type}",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if node.docstring:
|
|
46
|
+
parts.append(f"doc: {node.docstring.strip()}")
|
|
47
|
+
|
|
48
|
+
# Clean code: strip import lines for non-module nodes
|
|
49
|
+
code = node.code
|
|
50
|
+
if node.node_type != "module":
|
|
51
|
+
code = _IMPORT_RE.sub("", code).strip()
|
|
52
|
+
else:
|
|
53
|
+
# For modules keep only the first N chars to avoid huge chunks
|
|
54
|
+
code = code[:_MAX_CHUNK_CODE_CHARS]
|
|
55
|
+
|
|
56
|
+
# Truncate overly long code
|
|
57
|
+
if len(code) > _MAX_CHUNK_CODE_CHARS:
|
|
58
|
+
code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
|
|
59
|
+
|
|
60
|
+
if code:
|
|
61
|
+
parts.append(code)
|
|
62
|
+
|
|
63
|
+
return "\n".join(parts)
|
|
64
|
+
|
|
16
65
|
|
|
17
66
|
class GraphAgent:
|
|
18
67
|
"""Responsible for parsing projects and maintaining graph memory."""
|
|
@@ -31,7 +80,7 @@ class GraphAgent:
|
|
|
31
80
|
total_nodes = len(nodes)
|
|
32
81
|
|
|
33
82
|
for idx, node in enumerate(nodes, 1):
|
|
34
|
-
text =
|
|
83
|
+
text = _build_chunk_text(node)
|
|
35
84
|
emb = self.embedding_model.embed_text(text)
|
|
36
85
|
node_payload.append((node, emb))
|
|
37
86
|
|
|
@@ -43,13 +92,20 @@ class GraphAgent:
|
|
|
43
92
|
if show_progress:
|
|
44
93
|
print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
|
|
45
94
|
|
|
46
|
-
self.
|
|
95
|
+
emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
|
|
96
|
+
emb_dim = getattr(self.embedding_model, 'dim', 256)
|
|
97
|
+
|
|
98
|
+
self.store.insert_nodes(node_payload, model_key=emb_model_key)
|
|
47
99
|
self.store.insert_edges(edges)
|
|
100
|
+
|
|
101
|
+
# Record embedding model info in project metadata
|
|
48
102
|
self.store.set_metadata(
|
|
49
103
|
{
|
|
50
104
|
"project_root": str(project_root),
|
|
51
105
|
"node_count": len(nodes),
|
|
52
106
|
"edge_count": len(edges),
|
|
107
|
+
"embedding_model": emb_model_key,
|
|
108
|
+
"embedding_dim": emb_dim,
|
|
53
109
|
}
|
|
54
110
|
)
|
|
55
111
|
return {"nodes": len(nodes), "edges": len(edges)}
|
|
@@ -7,7 +7,7 @@ from typing import Optional
|
|
|
7
7
|
|
|
8
8
|
from .chat_session import SessionManager
|
|
9
9
|
from .codegen_agent import CodeGenAgent
|
|
10
|
-
from .context_manager import assemble_context_for_llm, detect_intent
|
|
10
|
+
from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
|
|
11
11
|
from .llm import LocalLLM
|
|
12
12
|
from .models_v2 import ChatSession, CodeProposal
|
|
13
13
|
from .orchestrator import MCPOrchestrator
|
|
@@ -59,11 +59,60 @@ class ChatAgent:
|
|
|
59
59
|
self.rag_retriever = rag_retriever
|
|
60
60
|
self.session_manager = SessionManager()
|
|
61
61
|
|
|
62
|
+
# Symbol memory — tracks recently discussed symbols & files
|
|
63
|
+
# so we can skip redundant RAG queries.
|
|
64
|
+
self.symbol_memory = SymbolMemory()
|
|
65
|
+
|
|
62
66
|
# Initialize specialized agents
|
|
63
67
|
from .codegen_agent import CodeGenAgent
|
|
64
68
|
from .refactor_agent import RefactorAgent
|
|
65
69
|
self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
|
|
66
70
|
self.refactor_agent = RefactorAgent(context.store)
|
|
71
|
+
|
|
72
|
+
# Build enhanced system prompt with auto-context
|
|
73
|
+
self.system_prompt = self._build_system_prompt()
|
|
74
|
+
|
|
75
|
+
def _build_system_prompt(self) -> str:
|
|
76
|
+
"""Build system prompt enriched with project context.
|
|
77
|
+
|
|
78
|
+
Includes project name, source path, indexed file/symbol counts,
|
|
79
|
+
node-type breakdown, and recently modified files so the LLM has
|
|
80
|
+
immediate awareness of the codebase.
|
|
81
|
+
"""
|
|
82
|
+
base = SYSTEM_PROMPT
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
summary = self.context.get_project_summary()
|
|
86
|
+
parts = [
|
|
87
|
+
"\n\nProject Context:",
|
|
88
|
+
f"- Project: {summary.get('project_name', 'unknown')}",
|
|
89
|
+
f"- Source: {summary.get('source_path', 'N/A')}",
|
|
90
|
+
f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
node_types = summary.get("node_types", {})
|
|
94
|
+
if node_types:
|
|
95
|
+
parts.append(
|
|
96
|
+
f"- Breakdown: {node_types.get('function', 0)} functions, "
|
|
97
|
+
f"{node_types.get('class', 0)} classes, "
|
|
98
|
+
f"{node_types.get('module', 0)} modules"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Recently modified files
|
|
102
|
+
if self.context.has_source_access:
|
|
103
|
+
try:
|
|
104
|
+
items = self.context.list_directory(".")
|
|
105
|
+
files = [f for f in items if f["type"] == "file"]
|
|
106
|
+
files.sort(key=lambda f: f.get("modified", ""), reverse=True)
|
|
107
|
+
recent = [f["name"] for f in files[:5]]
|
|
108
|
+
if recent:
|
|
109
|
+
parts.append(f"- Recently modified: {', '.join(recent)}")
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
return base + "\n".join(parts)
|
|
114
|
+
except Exception:
|
|
115
|
+
return base
|
|
67
116
|
|
|
68
117
|
def process_message(
|
|
69
118
|
self,
|
|
@@ -72,6 +121,10 @@ class ChatAgent:
|
|
|
72
121
|
) -> str:
|
|
73
122
|
"""Process user message and generate response.
|
|
74
123
|
|
|
124
|
+
Note: The caller (REPL) is responsible for adding messages to
|
|
125
|
+
the session. This method does NOT add messages itself to avoid
|
|
126
|
+
duplicate entries.
|
|
127
|
+
|
|
75
128
|
Args:
|
|
76
129
|
user_message: User's message
|
|
77
130
|
session: Current chat session
|
|
@@ -79,10 +132,6 @@ class ChatAgent:
|
|
|
79
132
|
Returns:
|
|
80
133
|
Assistant's response
|
|
81
134
|
"""
|
|
82
|
-
# Add user message to session
|
|
83
|
-
timestamp = datetime.now().isoformat()
|
|
84
|
-
session.add_message("user", user_message, timestamp)
|
|
85
|
-
|
|
86
135
|
# Detect intent
|
|
87
136
|
intent = detect_intent(user_message)
|
|
88
137
|
|
|
@@ -103,9 +152,6 @@ class ChatAgent:
|
|
|
103
152
|
# General chat - use LLM with RAG context
|
|
104
153
|
response = self._handle_chat(user_message, session)
|
|
105
154
|
|
|
106
|
-
# Add assistant response to session
|
|
107
|
-
session.add_message("assistant", response, datetime.now().isoformat())
|
|
108
|
-
|
|
109
155
|
# Save session
|
|
110
156
|
self.session_manager.save_session(session)
|
|
111
157
|
|
|
@@ -289,13 +335,14 @@ class ChatAgent:
|
|
|
289
335
|
|
|
290
336
|
def _handle_chat(self, message: str, session: ChatSession) -> str:
|
|
291
337
|
"""Handle general chat with LLM and RAG context."""
|
|
292
|
-
# Assemble context using smart RAG strategy
|
|
338
|
+
# Assemble context using smart RAG strategy + symbol memory
|
|
293
339
|
context_messages = assemble_context_for_llm(
|
|
294
340
|
user_message=message,
|
|
295
341
|
session=session,
|
|
296
342
|
rag_retriever=self.rag_retriever,
|
|
297
|
-
system_prompt=
|
|
298
|
-
max_tokens=8000
|
|
343
|
+
system_prompt=self.system_prompt,
|
|
344
|
+
max_tokens=8000,
|
|
345
|
+
symbol_memory=self.symbol_memory,
|
|
299
346
|
)
|
|
300
347
|
|
|
301
348
|
# Call LLM
|