codegraph-cli 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_cli/__init__.py +1 -1
- codegraph_cli/agents.py +59 -3
- codegraph_cli/chat_agent.py +58 -11
- codegraph_cli/cli.py +569 -54
- codegraph_cli/cli_chat.py +204 -94
- codegraph_cli/cli_diagnose.py +13 -2
- codegraph_cli/cli_docs.py +207 -0
- codegraph_cli/cli_explore.py +1053 -0
- codegraph_cli/cli_export.py +941 -0
- codegraph_cli/cli_groups.py +33 -0
- codegraph_cli/cli_health.py +316 -0
- codegraph_cli/cli_history.py +213 -0
- codegraph_cli/cli_onboard.py +380 -0
- codegraph_cli/cli_quickstart.py +256 -0
- codegraph_cli/cli_refactor.py +17 -3
- codegraph_cli/cli_setup.py +12 -12
- codegraph_cli/cli_suggestions.py +90 -0
- codegraph_cli/cli_test.py +17 -3
- codegraph_cli/cli_tui.py +210 -0
- codegraph_cli/cli_v2.py +24 -4
- codegraph_cli/cli_watch.py +158 -0
- codegraph_cli/cli_workflows.py +255 -0
- codegraph_cli/codegen_agent.py +15 -1
- codegraph_cli/config.py +18 -5
- codegraph_cli/context_manager.py +117 -15
- codegraph_cli/crew_agents.py +32 -8
- codegraph_cli/crew_chat.py +146 -13
- codegraph_cli/crew_tools.py +30 -2
- codegraph_cli/embeddings.py +95 -5
- codegraph_cli/llm.py +42 -55
- codegraph_cli/project_context.py +64 -1
- codegraph_cli/rag.py +282 -19
- codegraph_cli/storage.py +310 -14
- codegraph_cli/vector_store.py +110 -8
- {codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/METADATA +75 -21
- codegraph_cli-2.1.2.dist-info/RECORD +55 -0
- codegraph_cli-2.1.2.dist-info/entry_points.txt +2 -0
- codegraph_cli-2.1.0.dist-info/RECORD +0 -43
- codegraph_cli-2.1.0.dist-info/entry_points.txt +0 -2
- {codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/WHEEL +0 -0
- {codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/licenses/LICENSE +0 -0
- {codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/top_level.txt +0 -0
codegraph_cli/__init__.py
CHANGED
codegraph_cli/agents.py
CHANGED
|
@@ -2,17 +2,66 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import re
|
|
5
6
|
from collections import deque
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Dict, List, Set
|
|
8
9
|
|
|
9
10
|
from .embeddings import HashEmbeddingModel, TransformerEmbedder
|
|
10
11
|
from .llm import LocalLLM
|
|
11
|
-
from .models import ImpactReport
|
|
12
|
+
from .models import ImpactReport, Node
|
|
12
13
|
from .parser import PythonGraphParser
|
|
13
14
|
from .rag import RAGRetriever
|
|
14
15
|
from .storage import GraphStore
|
|
15
16
|
|
|
17
|
+
# Regex to strip bare import lines from chunk text
|
|
18
|
+
_IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
|
|
19
|
+
|
|
20
|
+
# Maximum characters to keep for a single chunk's code body.
|
|
21
|
+
# Module-level nodes can be very large; truncating keeps embeddings
|
|
22
|
+
# focused on the symbol's signature + docstring + first N lines.
|
|
23
|
+
_MAX_CHUNK_CODE_CHARS = 1500
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _build_chunk_text(node: Node) -> str:
|
|
27
|
+
"""Build structured chunk text for embedding.
|
|
28
|
+
|
|
29
|
+
The text is formatted so that the embedding model captures:
|
|
30
|
+
- **file path** (helps retrieval when users mention filenames)
|
|
31
|
+
- **symbol name + type** (boosts exact-match semantics)
|
|
32
|
+
- **docstring** (captures purpose / intent)
|
|
33
|
+
- **code body** (captures implementation detail)
|
|
34
|
+
|
|
35
|
+
Import lines and decorators-only boilerplate are stripped to
|
|
36
|
+
reduce noise. Module-level nodes are truncated to avoid huge
|
|
37
|
+
embeddings that dilute meaning.
|
|
38
|
+
"""
|
|
39
|
+
parts: List[str] = [
|
|
40
|
+
f"file: {node.file_path}",
|
|
41
|
+
f"symbol: {node.qualname}",
|
|
42
|
+
f"type: {node.node_type}",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if node.docstring:
|
|
46
|
+
parts.append(f"doc: {node.docstring.strip()}")
|
|
47
|
+
|
|
48
|
+
# Clean code: strip import lines for non-module nodes
|
|
49
|
+
code = node.code
|
|
50
|
+
if node.node_type != "module":
|
|
51
|
+
code = _IMPORT_RE.sub("", code).strip()
|
|
52
|
+
else:
|
|
53
|
+
# For modules keep only the first N chars to avoid huge chunks
|
|
54
|
+
code = code[:_MAX_CHUNK_CODE_CHARS]
|
|
55
|
+
|
|
56
|
+
# Truncate overly long code
|
|
57
|
+
if len(code) > _MAX_CHUNK_CODE_CHARS:
|
|
58
|
+
code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
|
|
59
|
+
|
|
60
|
+
if code:
|
|
61
|
+
parts.append(code)
|
|
62
|
+
|
|
63
|
+
return "\n".join(parts)
|
|
64
|
+
|
|
16
65
|
|
|
17
66
|
class GraphAgent:
|
|
18
67
|
"""Responsible for parsing projects and maintaining graph memory."""
|
|
@@ -31,7 +80,7 @@ class GraphAgent:
|
|
|
31
80
|
total_nodes = len(nodes)
|
|
32
81
|
|
|
33
82
|
for idx, node in enumerate(nodes, 1):
|
|
34
|
-
text =
|
|
83
|
+
text = _build_chunk_text(node)
|
|
35
84
|
emb = self.embedding_model.embed_text(text)
|
|
36
85
|
node_payload.append((node, emb))
|
|
37
86
|
|
|
@@ -43,13 +92,20 @@ class GraphAgent:
|
|
|
43
92
|
if show_progress:
|
|
44
93
|
print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
|
|
45
94
|
|
|
46
|
-
self.
|
|
95
|
+
emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
|
|
96
|
+
emb_dim = getattr(self.embedding_model, 'dim', 256)
|
|
97
|
+
|
|
98
|
+
self.store.insert_nodes(node_payload, model_key=emb_model_key)
|
|
47
99
|
self.store.insert_edges(edges)
|
|
100
|
+
|
|
101
|
+
# Record embedding model info in project metadata
|
|
48
102
|
self.store.set_metadata(
|
|
49
103
|
{
|
|
50
104
|
"project_root": str(project_root),
|
|
51
105
|
"node_count": len(nodes),
|
|
52
106
|
"edge_count": len(edges),
|
|
107
|
+
"embedding_model": emb_model_key,
|
|
108
|
+
"embedding_dim": emb_dim,
|
|
53
109
|
}
|
|
54
110
|
)
|
|
55
111
|
return {"nodes": len(nodes), "edges": len(edges)}
|
codegraph_cli/chat_agent.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Optional
|
|
|
7
7
|
|
|
8
8
|
from .chat_session import SessionManager
|
|
9
9
|
from .codegen_agent import CodeGenAgent
|
|
10
|
-
from .context_manager import assemble_context_for_llm, detect_intent
|
|
10
|
+
from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
|
|
11
11
|
from .llm import LocalLLM
|
|
12
12
|
from .models_v2 import ChatSession, CodeProposal
|
|
13
13
|
from .orchestrator import MCPOrchestrator
|
|
@@ -59,11 +59,60 @@ class ChatAgent:
|
|
|
59
59
|
self.rag_retriever = rag_retriever
|
|
60
60
|
self.session_manager = SessionManager()
|
|
61
61
|
|
|
62
|
+
# Symbol memory — tracks recently discussed symbols & files
|
|
63
|
+
# so we can skip redundant RAG queries.
|
|
64
|
+
self.symbol_memory = SymbolMemory()
|
|
65
|
+
|
|
62
66
|
# Initialize specialized agents
|
|
63
67
|
from .codegen_agent import CodeGenAgent
|
|
64
68
|
from .refactor_agent import RefactorAgent
|
|
65
69
|
self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
|
|
66
70
|
self.refactor_agent = RefactorAgent(context.store)
|
|
71
|
+
|
|
72
|
+
# Build enhanced system prompt with auto-context
|
|
73
|
+
self.system_prompt = self._build_system_prompt()
|
|
74
|
+
|
|
75
|
+
def _build_system_prompt(self) -> str:
|
|
76
|
+
"""Build system prompt enriched with project context.
|
|
77
|
+
|
|
78
|
+
Includes project name, source path, indexed file/symbol counts,
|
|
79
|
+
node-type breakdown, and recently modified files so the LLM has
|
|
80
|
+
immediate awareness of the codebase.
|
|
81
|
+
"""
|
|
82
|
+
base = SYSTEM_PROMPT
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
summary = self.context.get_project_summary()
|
|
86
|
+
parts = [
|
|
87
|
+
"\n\nProject Context:",
|
|
88
|
+
f"- Project: {summary.get('project_name', 'unknown')}",
|
|
89
|
+
f"- Source: {summary.get('source_path', 'N/A')}",
|
|
90
|
+
f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
node_types = summary.get("node_types", {})
|
|
94
|
+
if node_types:
|
|
95
|
+
parts.append(
|
|
96
|
+
f"- Breakdown: {node_types.get('function', 0)} functions, "
|
|
97
|
+
f"{node_types.get('class', 0)} classes, "
|
|
98
|
+
f"{node_types.get('module', 0)} modules"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Recently modified files
|
|
102
|
+
if self.context.has_source_access:
|
|
103
|
+
try:
|
|
104
|
+
items = self.context.list_directory(".")
|
|
105
|
+
files = [f for f in items if f["type"] == "file"]
|
|
106
|
+
files.sort(key=lambda f: f.get("modified", ""), reverse=True)
|
|
107
|
+
recent = [f["name"] for f in files[:5]]
|
|
108
|
+
if recent:
|
|
109
|
+
parts.append(f"- Recently modified: {', '.join(recent)}")
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
return base + "\n".join(parts)
|
|
114
|
+
except Exception:
|
|
115
|
+
return base
|
|
67
116
|
|
|
68
117
|
def process_message(
|
|
69
118
|
self,
|
|
@@ -72,6 +121,10 @@ class ChatAgent:
|
|
|
72
121
|
) -> str:
|
|
73
122
|
"""Process user message and generate response.
|
|
74
123
|
|
|
124
|
+
Note: The caller (REPL) is responsible for adding messages to
|
|
125
|
+
the session. This method does NOT add messages itself to avoid
|
|
126
|
+
duplicate entries.
|
|
127
|
+
|
|
75
128
|
Args:
|
|
76
129
|
user_message: User's message
|
|
77
130
|
session: Current chat session
|
|
@@ -79,10 +132,6 @@ class ChatAgent:
|
|
|
79
132
|
Returns:
|
|
80
133
|
Assistant's response
|
|
81
134
|
"""
|
|
82
|
-
# Add user message to session
|
|
83
|
-
timestamp = datetime.now().isoformat()
|
|
84
|
-
session.add_message("user", user_message, timestamp)
|
|
85
|
-
|
|
86
135
|
# Detect intent
|
|
87
136
|
intent = detect_intent(user_message)
|
|
88
137
|
|
|
@@ -103,9 +152,6 @@ class ChatAgent:
|
|
|
103
152
|
# General chat - use LLM with RAG context
|
|
104
153
|
response = self._handle_chat(user_message, session)
|
|
105
154
|
|
|
106
|
-
# Add assistant response to session
|
|
107
|
-
session.add_message("assistant", response, datetime.now().isoformat())
|
|
108
|
-
|
|
109
155
|
# Save session
|
|
110
156
|
self.session_manager.save_session(session)
|
|
111
157
|
|
|
@@ -289,13 +335,14 @@ class ChatAgent:
|
|
|
289
335
|
|
|
290
336
|
def _handle_chat(self, message: str, session: ChatSession) -> str:
|
|
291
337
|
"""Handle general chat with LLM and RAG context."""
|
|
292
|
-
# Assemble context using smart RAG strategy
|
|
338
|
+
# Assemble context using smart RAG strategy + symbol memory
|
|
293
339
|
context_messages = assemble_context_for_llm(
|
|
294
340
|
user_message=message,
|
|
295
341
|
session=session,
|
|
296
342
|
rag_retriever=self.rag_retriever,
|
|
297
|
-
system_prompt=
|
|
298
|
-
max_tokens=8000
|
|
343
|
+
system_prompt=self.system_prompt,
|
|
344
|
+
max_tokens=8000,
|
|
345
|
+
symbol_memory=self.symbol_memory,
|
|
299
346
|
)
|
|
300
347
|
|
|
301
348
|
# Call LLM
|