PyPI - codegraph-cli - Versions diffs - 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl - Mend

codegraph-cli 2.1.0py3-none-any.whl → 2.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

codegraph_cli/__init__.py +1 -1
codegraph_cli/agents.py +59 -3
codegraph_cli/chat_agent.py +58 -11
codegraph_cli/cli.py +569 -54
codegraph_cli/cli_chat.py +204 -94
codegraph_cli/cli_diagnose.py +13 -2
codegraph_cli/cli_docs.py +207 -0
codegraph_cli/cli_explore.py +1053 -0
codegraph_cli/cli_export.py +941 -0
codegraph_cli/cli_groups.py +33 -0
codegraph_cli/cli_health.py +316 -0
codegraph_cli/cli_history.py +213 -0
codegraph_cli/cli_onboard.py +380 -0
codegraph_cli/cli_quickstart.py +256 -0
codegraph_cli/cli_refactor.py +17 -3
codegraph_cli/cli_setup.py +12 -12
codegraph_cli/cli_suggestions.py +90 -0
codegraph_cli/cli_test.py +17 -3
codegraph_cli/cli_tui.py +210 -0
codegraph_cli/cli_v2.py +24 -4
codegraph_cli/cli_watch.py +158 -0
codegraph_cli/cli_workflows.py +255 -0
codegraph_cli/codegen_agent.py +15 -1
codegraph_cli/config.py +18 -5
codegraph_cli/context_manager.py +117 -15
codegraph_cli/crew_agents.py +32 -8
codegraph_cli/crew_chat.py +146 -13
codegraph_cli/crew_tools.py +30 -2
codegraph_cli/embeddings.py +95 -5
codegraph_cli/llm.py +42 -55
codegraph_cli/project_context.py +64 -1
codegraph_cli/rag.py +282 -19
codegraph_cli/storage.py +310 -14
codegraph_cli/vector_store.py +110 -8
{codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/METADATA +75 -21
codegraph_cli-2.1.2.dist-info/RECORD +55 -0
codegraph_cli-2.1.2.dist-info/entry_points.txt +2 -0
codegraph_cli-2.1.0.dist-info/RECORD +0 -43
codegraph_cli-2.1.0.dist-info/entry_points.txt +0 -2
{codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/WHEEL +0 -0
{codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/licenses/LICENSE +0 -0
{codegraph_cli-2.1.0.dist-info → codegraph_cli-2.1.2.dist-info}/top_level.txt +0 -0

codegraph_cli/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """CodeGraph CLI package."""
 __all__ = ["__version__"]
-__version__ = "2.0.1"
+__version__ = "2.1.2"

codegraph_cli/agents.py CHANGED Viewed

@@ -2,17 +2,66 @@
 from __future__ import annotations
+import re
 from collections import deque
 from pathlib import Path
 from typing import Dict, List, Set
 from .embeddings import HashEmbeddingModel, TransformerEmbedder
 from .llm import LocalLLM
-from .models import ImpactReport
+from .models import ImpactReport, Node
 from .parser import PythonGraphParser
 from .rag import RAGRetriever
 from .storage import GraphStore
+# Regex to strip bare import lines from chunk text
+_IMPORT_RE = re.compile(r"^(?:from\s+\S+\s+)?import\s+.+$", re.MULTILINE)
+# Maximum characters to keep for a single chunk's code body.
+# Module-level nodes can be very large; truncating keeps embeddings
+# focused on the symbol's signature + docstring + first N lines.
+_MAX_CHUNK_CODE_CHARS = 1500
+def _build_chunk_text(node: Node) -> str:
+    """Build structured chunk text for embedding.
+    The text is formatted so that the embedding model captures:
+    - **file path** (helps retrieval when users mention filenames)
+    - **symbol name + type** (boosts exact-match semantics)
+    - **docstring** (captures purpose / intent)
+    - **code body** (captures implementation detail)
+    Import lines and decorators-only boilerplate are stripped to
+    reduce noise.  Module-level nodes are truncated to avoid huge
+    embeddings that dilute meaning.
+    """
+    parts: List[str] = [
+        f"file: {node.file_path}",
+        f"symbol: {node.qualname}",
+        f"type: {node.node_type}",
+    ]
+    if node.docstring:
+        parts.append(f"doc: {node.docstring.strip()}")
+    # Clean code: strip import lines for non-module nodes
+    code = node.code
+    if node.node_type != "module":
+        code = _IMPORT_RE.sub("", code).strip()
+    else:
+        # For modules keep only the first N chars to avoid huge chunks
+        code = code[:_MAX_CHUNK_CODE_CHARS]
+    # Truncate overly long code
+    if len(code) > _MAX_CHUNK_CODE_CHARS:
+        code = code[:_MAX_CHUNK_CODE_CHARS] + "\n# ... (truncated)"
+    if code:
+        parts.append(code)
+    return "\n".join(parts)
 class GraphAgent:
     """Responsible for parsing projects and maintaining graph memory."""
@@ -31,7 +80,7 @@ class GraphAgent:
         total_nodes = len(nodes)
         for idx, node in enumerate(nodes, 1):
-            text = "\n".join([node.qualname, node.docstring, node.code])
+            text = _build_chunk_text(node)
             emb = self.embedding_model.embed_text(text)
             node_payload.append((node, emb))
@@ -43,13 +92,20 @@ class GraphAgent:
         if show_progress:
             print(f"\r📊 Indexing: {total_nodes}/{total_nodes} nodes (100%)  ")
-        self.store.insert_nodes(node_payload)
+        emb_model_key = getattr(self.embedding_model, 'model_key', 'hash')
+        emb_dim = getattr(self.embedding_model, 'dim', 256)
+        self.store.insert_nodes(node_payload, model_key=emb_model_key)
         self.store.insert_edges(edges)
+        # Record embedding model info in project metadata
         self.store.set_metadata(
             {
                 "project_root": str(project_root),
                 "node_count": len(nodes),
                 "edge_count": len(edges),
+                "embedding_model": emb_model_key,
+                "embedding_dim": emb_dim,
             }
         )
         return {"nodes": len(nodes), "edges": len(edges)}

codegraph_cli/chat_agent.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Optional
 from .chat_session import SessionManager
 from .codegen_agent import CodeGenAgent
-from .context_manager import assemble_context_for_llm, detect_intent
+from .context_manager import SymbolMemory, assemble_context_for_llm, detect_intent
 from .llm import LocalLLM
 from .models_v2 import ChatSession, CodeProposal
 from .orchestrator import MCPOrchestrator
@@ -59,11 +59,60 @@ class ChatAgent:
         self.rag_retriever = rag_retriever
         self.session_manager = SessionManager()
+        # Symbol memory — tracks recently discussed symbols & files
+        # so we can skip redundant RAG queries.
+        self.symbol_memory = SymbolMemory()
         # Initialize specialized agents
         from .codegen_agent import CodeGenAgent
         from .refactor_agent import RefactorAgent
         self.codegen_agent = CodeGenAgent(context.store, llm, project_context=context)
         self.refactor_agent = RefactorAgent(context.store)
+        # Build enhanced system prompt with auto-context
+        self.system_prompt = self._build_system_prompt()
+    def _build_system_prompt(self) -> str:
+        """Build system prompt enriched with project context.
+        Includes project name, source path, indexed file/symbol counts,
+        node-type breakdown, and recently modified files so the LLM has
+        immediate awareness of the codebase.
+        """
+        base = SYSTEM_PROMPT
+        try:
+            summary = self.context.get_project_summary()
+            parts = [
+                "\n\nProject Context:",
+                f"- Project: {summary.get('project_name', 'unknown')}",
+                f"- Source: {summary.get('source_path', 'N/A')}",
+                f"- Indexed: {summary.get('indexed_files', 0)} files, {summary.get('total_nodes', 0)} symbols",
+            ]
+            node_types = summary.get("node_types", {})
+            if node_types:
+                parts.append(
+                    f"- Breakdown: {node_types.get('function', 0)} functions, "
+                    f"{node_types.get('class', 0)} classes, "
+                    f"{node_types.get('module', 0)} modules"
+                )
+            # Recently modified files
+            if self.context.has_source_access:
+                try:
+                    items = self.context.list_directory(".")
+                    files = [f for f in items if f["type"] == "file"]
+                    files.sort(key=lambda f: f.get("modified", ""), reverse=True)
+                    recent = [f["name"] for f in files[:5]]
+                    if recent:
+                        parts.append(f"- Recently modified: {', '.join(recent)}")
+                except Exception:
+                    pass
+            return base + "\n".join(parts)
+        except Exception:
+            return base
     def process_message(
         self,
@@ -72,6 +121,10 @@ class ChatAgent:
     ) -> str:
         """Process user message and generate response.
+        Note: The caller (REPL) is responsible for adding messages to
+        the session.  This method does NOT add messages itself to avoid
+        duplicate entries.
         Args:
             user_message: User's message
             session: Current chat session
@@ -79,10 +132,6 @@ class ChatAgent:
         Returns:
             Assistant's response
         """
-        # Add user message to session
-        timestamp = datetime.now().isoformat()
-        session.add_message("user", user_message, timestamp)
         # Detect intent
         intent = detect_intent(user_message)
@@ -103,9 +152,6 @@ class ChatAgent:
             # General chat - use LLM with RAG context
             response = self._handle_chat(user_message, session)
-        # Add assistant response to session
-        session.add_message("assistant", response, datetime.now().isoformat())
         # Save session
         self.session_manager.save_session(session)
@@ -289,13 +335,14 @@ class ChatAgent:
     def _handle_chat(self, message: str, session: ChatSession) -> str:
         """Handle general chat with LLM and RAG context."""
-        # Assemble context using smart RAG strategy
+        # Assemble context using smart RAG strategy + symbol memory
         context_messages = assemble_context_for_llm(
             user_message=message,
             session=session,
             rag_retriever=self.rag_retriever,
-            system_prompt=SYSTEM_PROMPT,
-            max_tokens=8000
+            system_prompt=self.system_prompt,
+            max_tokens=8000,
+            symbol_memory=self.symbol_memory,
         )
         # Call LLM

codegraph-cli 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl

codegraph-cli 2.1.0py3-none-any.whl → 2.1.2py3-none-any.whl