codegraph-cli 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. codegraph_cli/__init__.py +1 -1
  2. codegraph_cli/agents.py +59 -3
  3. codegraph_cli/chat_agent.py +58 -11
  4. codegraph_cli/cli.py +569 -54
  5. codegraph_cli/cli_chat.py +200 -95
  6. codegraph_cli/cli_diagnose.py +13 -2
  7. codegraph_cli/cli_docs.py +207 -0
  8. codegraph_cli/cli_explore.py +1053 -0
  9. codegraph_cli/cli_export.py +941 -0
  10. codegraph_cli/cli_groups.py +33 -0
  11. codegraph_cli/cli_health.py +316 -0
  12. codegraph_cli/cli_history.py +213 -0
  13. codegraph_cli/cli_onboard.py +380 -0
  14. codegraph_cli/cli_quickstart.py +256 -0
  15. codegraph_cli/cli_refactor.py +17 -3
  16. codegraph_cli/cli_setup.py +12 -12
  17. codegraph_cli/cli_suggestions.py +90 -0
  18. codegraph_cli/cli_test.py +17 -3
  19. codegraph_cli/cli_tui.py +210 -0
  20. codegraph_cli/cli_v2.py +24 -4
  21. codegraph_cli/cli_watch.py +158 -0
  22. codegraph_cli/cli_workflows.py +255 -0
  23. codegraph_cli/codegen_agent.py +15 -1
  24. codegraph_cli/config.py +18 -5
  25. codegraph_cli/context_manager.py +117 -15
  26. codegraph_cli/crew_agents.py +26 -7
  27. codegraph_cli/crew_chat.py +141 -12
  28. codegraph_cli/crew_tools.py +21 -1
  29. codegraph_cli/embeddings.py +95 -5
  30. codegraph_cli/llm.py +42 -55
  31. codegraph_cli/project_context.py +64 -1
  32. codegraph_cli/rag.py +282 -19
  33. codegraph_cli/storage.py +310 -14
  34. codegraph_cli/vector_store.py +110 -8
  35. {codegraph_cli-2.1.1.dist-info → codegraph_cli-2.1.2.dist-info}/METADATA +35 -24
  36. codegraph_cli-2.1.2.dist-info/RECORD +55 -0
  37. codegraph_cli-2.1.2.dist-info/entry_points.txt +2 -0
  38. codegraph_cli-2.1.1.dist-info/RECORD +0 -43
  39. codegraph_cli-2.1.1.dist-info/entry_points.txt +0 -2
  40. {codegraph_cli-2.1.1.dist-info → codegraph_cli-2.1.2.dist-info}/WHEEL +0 -0
  41. {codegraph_cli-2.1.1.dist-info → codegraph_cli-2.1.2.dist-info}/licenses/LICENSE +0 -0
  42. {codegraph_cli-2.1.1.dist-info → codegraph_cli-2.1.2.dist-info}/top_level.txt +0 -0
codegraph_cli/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """CodeGraph CLI package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "2.0.1"
4
+ __version__ = "2.1.2"
codegraph_cli/agents.py CHANGED
@@ -2,17 +2,66 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import re
5
6
  from collections import deque
6
7
  from pathlib import Path
7
8
  from typing import Dict, List, Set
8
9
 
9
10
  from .embeddings import HashEmbeddingModel, TransformerEmbedder
10
11
  from .llm import LocalLLM
11
- from .models import ImpactReport
12
+ from .models import ImpactReport, Node
12
13
  from .parser import PythonGraphParser
13
14
  from .rag import RAGRetriever
14
15
  from .storage import GraphStore
15
16
 
17
+ # Regex to strip bare import lines from chunk text
18
+ _IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
19
+
20
+ # Maximum characters to keep for a single chunk's code body.
21
+ # Module-level nodes can be very large; truncating keeps embeddings
22
+ # focused on the symbol's signature + docstring + first N lines.
23
+ _MAX_CHUNK_CODE_CHARS = 1500
24
+
25
+
26
+ def _build_chunk_text(node: Node) -> str:
27
+ """Build structured chunk text for embedding.
28
+
29
+ The text is formatted so that the embedding model captures:
30
+ - **file path** (helps retrieval when users mention filenames)
31
+ - **symbol name + type** (boosts exact-match semantics)
32
+ - **docstring** (captures purpose / intent)
33
+ - **code body** (captures implementation detail)
34
+
35
+ Import lines and decorators-only boilerplate are stripped to
36
+ reduce noise. Module-level nodes are truncated to avoid huge
37
+ embeddings that dilute meaning.
38
+ """
39
+ parts: List[str] = [
40
+ f"file: {node.file_path}",
41
+ f"symbol: {node.qualname}",
42
+ f"type: {node.node_type}",
43
+ ]
44
+
45
+ if node.docstring:
46
+ parts.append(f"doc: {node.docstring.strip()}")
47
+
48
+ # Clean code: strip import lines for non-module nodes
49
+ code = node.code
50
+ if node.node_type != "module":
51
+ code = _IMPORT_RE.sub("", code).strip()
52
+ else:
53
+ # For modules keep only the first N chars to avoid huge chunks
54
+ code = code[:_MAX_CHUNK_CODE_CHARS]
55
+
56
+ # Truncate overly long code
57
+ if len(code) > _MAX_CHUNK_CODE_CHARS:
58
+ code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
59
+
60
+ if code:
61
+ parts.append(code)
62
+
63
+ return "\n".join(parts)
64
+
16
65
 
17
66
  class GraphAgent:
18
67
  """Responsible for parsing projects and maintaining graph memory."""
@@ -31,7 +80,7 @@ class GraphAgent:
31
80
  total_nodes = len(nodes)
32
81
 
33
82
  for idx, node in enumerate(nodes, 1):
34
- text = "\n".join([node.qualname, node.docstring, node.code])
83
+ text = _build_chunk_text(node)
35
84
  emb = self.embedding_model.embed_text(text)
36
85
  node_payload.append((node, emb))
37
86
 
@@ -43,13 +92,20 @@ class GraphAgent:
43
92
  if show_progress:
44
93
  print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%) ")
45
94
 
46
- self.store.insert_nodes(node_payload)
95
+ emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
96
+ emb_dim = getattr(self.embedding_model, 'dim', 256)
97
+
98
+ self.store.insert_nodes(node_payload, model_key=emb_model_key)
47
99
  self.store.insert_edges(edges)
100
+
101
+ # Record embedding model info in project metadata
48
102
  self.store.set_metadata(
49
103
  {
50
104
  "project_root": str(project_root),
51
105
  "node_count": len(nodes),
52
106
  "edge_count": len(edges),
107
+ "embedding_model": emb_model_key,
108
+ "embedding_dim": emb_dim,
53
109
  }
54
110
  )
55
111
  return {"nodes": len(nodes), "edges": len(edges)}
@@ -7,7 +7,7 @@ from typing import Optional
7
7
 
8
8
  from .chat_session import SessionManager
9
9
  from .codegen_agent import CodeGenAgent
10
- from .context_manager import assemble_context_for_llm, detect_intent
10
+ from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
11
11
  from .llm import LocalLLM
12
12
  from .models_v2 import ChatSession, CodeProposal
13
13
  from .orchestrator import MCPOrchestrator
@@ -59,11 +59,60 @@ class ChatAgent:
59
59
  self.rag_retriever = rag_retriever
60
60
  self.session_manager = SessionManager()
61
61
 
62
+ # Symbol memory — tracks recently discussed symbols & files
63
+ # so we can skip redundant RAG queries.
64
+ self.symbol_memory = SymbolMemory()
65
+
62
66
  # Initialize specialized agents
63
67
  from .codegen_agent import CodeGenAgent
64
68
  from .refactor_agent import RefactorAgent
65
69
  self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
66
70
  self.refactor_agent = RefactorAgent(context.store)
71
+
72
+ # Build enhanced system prompt with auto-context
73
+ self.system_prompt = self._build_system_prompt()
74
+
75
+ def _build_system_prompt(self) -> str:
76
+ """Build system prompt enriched with project context.
77
+
78
+ Includes project name, source path, indexed file/symbol counts,
79
+ node-type breakdown, and recently modified files so the LLM has
80
+ immediate awareness of the codebase.
81
+ """
82
+ base = SYSTEM_PROMPT
83
+
84
+ try:
85
+ summary = self.context.get_project_summary()
86
+ parts = [
87
+ "\n\nProject Context:",
88
+ f"- Project: {summary.get('project_name', 'unknown')}",
89
+ f"- Source: {summary.get('source_path', 'N/A')}",
90
+ f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
91
+ ]
92
+
93
+ node_types = summary.get("node_types", {})
94
+ if node_types:
95
+ parts.append(
96
+ f"- Breakdown: {node_types.get('function', 0)} functions, "
97
+ f"{node_types.get('class', 0)} classes, "
98
+ f"{node_types.get('module', 0)} modules"
99
+ )
100
+
101
+ # Recently modified files
102
+ if self.context.has_source_access:
103
+ try:
104
+ items = self.context.list_directory(".")
105
+ files = [f for f in items if f["type"] == "file"]
106
+ files.sort(key=lambda f: f.get("modified", ""), reverse=True)
107
+ recent = [f["name"] for f in files[:5]]
108
+ if recent:
109
+ parts.append(f"- Recently modified: {', '.join(recent)}")
110
+ except Exception:
111
+ pass
112
+
113
+ return base + "\n".join(parts)
114
+ except Exception:
115
+ return base
67
116
 
68
117
  def process_message(
69
118
  self,
@@ -72,6 +121,10 @@ class ChatAgent:
72
121
  ) -> str:
73
122
  """Process user message and generate response.
74
123
 
124
+ Note: The caller (REPL) is responsible for adding messages to
125
+ the session. This method does NOT add messages itself to avoid
126
+ duplicate entries.
127
+
75
128
  Args:
76
129
  user_message: User's message
77
130
  session: Current chat session
@@ -79,10 +132,6 @@ class ChatAgent:
79
132
  Returns:
80
133
  Assistant's response
81
134
  """
82
- # Add user message to session
83
- timestamp = datetime.now().isoformat()
84
- session.add_message("user", user_message, timestamp)
85
-
86
135
  # Detect intent
87
136
  intent = detect_intent(user_message)
88
137
 
@@ -103,9 +152,6 @@ class ChatAgent:
103
152
  # General chat - use LLM with RAG context
104
153
  response = self._handle_chat(user_message, session)
105
154
 
106
- # Add assistant response to session
107
- session.add_message("assistant", response, datetime.now().isoformat())
108
-
109
155
  # Save session
110
156
  self.session_manager.save_session(session)
111
157
 
@@ -289,13 +335,14 @@ class ChatAgent:
289
335
 
290
336
  def _handle_chat(self, message: str, session: ChatSession) -> str:
291
337
  """Handle general chat with LLM and RAG context."""
292
- # Assemble context using smart RAG strategy
338
+ # Assemble context using smart RAG strategy + symbol memory
293
339
  context_messages = assemble_context_for_llm(
294
340
  user_message=message,
295
341
  session=session,
296
342
  rag_retriever=self.rag_retriever,
297
- system_prompt=SYSTEM_PROMPT,
298
- max_tokens=8000
343
+ system_prompt=self.system_prompt,
344
+ max_tokens=8000,
345
+ symbol_memory=self.symbol_memory,
299
346
  )
300
347
 
301
348
  # Call LLM