PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.26__py3-none-any.whl → 2.0.28__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.26py3-none-any.whl → 2.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

agent_server/langchain/llm_factory.py CHANGED Viewed

@@ -93,7 +93,6 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
     from langchain_openai import ChatOpenAI
     vllm_config = llm_config.get("vllm", {})
-    # User provides full base URL (e.g., https://openrouter.ai/api/v1)
     endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
     model = vllm_config.get("model", "default")
     api_key = vllm_config.get("apiKey", "dummy")
@@ -140,9 +139,11 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
 def create_summarization_llm(llm_config: Dict[str, Any]):
-    """Create LLM for summarization middleware.
+    """Create LLM for summarization middleware and /compact feature.
-    Uses the same provider as the main LLM but with simpler configuration.
+    Priority:
+    1. If llm_config["summarization"]["enabled"] is True, use that config
+    2. Otherwise, fall back to main provider with default summarization model
     Args:
         llm_config: Configuration dictionary
@@ -150,60 +151,107 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
     Returns:
         LLM instance suitable for summarization, or None if unavailable
     """
-    provider = llm_config.get("provider", "gemini")
     try:
-        if provider == "gemini":
-            from langchain_google_genai import ChatGoogleGenerativeAI
-            gemini_config = llm_config.get("gemini", {})
-            api_key = gemini_config.get("apiKey")
-            if api_key:
-                return ChatGoogleGenerativeAI(
-                    model="gemini-2.5-flash",
-                    google_api_key=api_key,
-                    temperature=0.0,
-                )
-        elif provider == "openai":
-            from langchain_openai import ChatOpenAI
-            openai_config = llm_config.get("openai", {})
-            api_key = openai_config.get("apiKey")
-            if api_key:
-                return ChatOpenAI(
-                    model="gpt-4o-mini",
-                    api_key=api_key,
-                    temperature=0.0,
-                )
-        elif provider == "vllm":
-            vllm_config = llm_config.get("vllm", {})
-            # User provides full base URL (e.g., https://openrouter.ai/api/v1)
-            endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
-            model = vllm_config.get("model", "default")
-            api_key = vllm_config.get("apiKey", "dummy")
-            # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
-            is_openrouter = "openrouter" in endpoint.lower()
-            if "gpt-oss" in model.lower() and not is_openrouter:
-                from agent_server.langchain.models import ChatGPTOSS
-                return ChatGPTOSS(
-                    model=model,
-                    base_url=endpoint,
-                    api_key=api_key,
-                    temperature=0.0,
-                )
-            from langchain_openai import ChatOpenAI
-            return ChatOpenAI(
-                model=model,
-                api_key=api_key,
-                base_url=endpoint,  # Use endpoint as-is
-                temperature=0.0,
+        # 1. Check for dedicated summarization config
+        summarization_config = llm_config.get("summarization", {})
+        if summarization_config.get("enabled"):
+            sum_provider = summarization_config.get("provider", "gemini")
+            sum_model = summarization_config.get("model")
+            logger.info(
+                f"Using dedicated summarization LLM: provider={sum_provider}, model={sum_model or 'default'}"
+            )
+            return _create_llm_for_provider(
+                llm_config, sum_provider, sum_model, for_summarization=True
             )
+        # 2. Fall back to main provider with default summarization model
+        provider = llm_config.get("provider", "gemini")
+        logger.info(f"Using main provider for summarization: {provider}")
+        return _create_llm_for_provider(
+            llm_config, provider, None, for_summarization=True
+        )
     except Exception as e:
         logger.warning(f"Failed to create summarization LLM: {e}")
         return None
-    return None
+def _create_llm_for_provider(
+    llm_config: Dict[str, Any],
+    provider: str,
+    model_override: str = None,
+    for_summarization: bool = False,
+):
+    """Create LLM instance for a specific provider.
+    Args:
+        llm_config: Full configuration dictionary (for credentials)
+        provider: Provider to use ('gemini', 'openai', 'vllm')
+        model_override: Optional model name override
+        for_summarization: If True, use lightweight default models
+    Returns:
+        LLM instance or None
+    """
+    if provider == "gemini":
+        from langchain_google_genai import ChatGoogleGenerativeAI
+        gemini_config = llm_config.get("gemini", {})
+        api_key = gemini_config.get("apiKey")
+        if not api_key:
+            logger.warning("No Gemini API key found")
+            return None
+        model = model_override or ("gemini-2.5-flash" if for_summarization else gemini_config.get("model", "gemini-2.5-flash"))
+        return ChatGoogleGenerativeAI(
+            model=model,
+            google_api_key=api_key,
+            temperature=0.0,
+        )
+    elif provider == "openai":
+        from langchain_openai import ChatOpenAI
+        openai_config = llm_config.get("openai", {})
+        api_key = openai_config.get("apiKey")
+        if not api_key:
+            logger.warning("No OpenAI API key found")
+            return None
+        model = model_override or ("gpt-4o-mini" if for_summarization else openai_config.get("model", "gpt-4"))
+        return ChatOpenAI(
+            model=model,
+            api_key=api_key,
+            temperature=0.0,
+        )
+    elif provider == "vllm":
+        vllm_config = llm_config.get("vllm", {})
+        endpoint = vllm_config.get("endpoint", "http://localhost:8000/v1")
+        api_key = vllm_config.get("apiKey", "dummy")
+        model = model_override or vllm_config.get("model", "default")
+        # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
+        is_openrouter = "openrouter" in endpoint.lower()
+        if "gpt-oss" in model.lower() and not is_openrouter:
+            from agent_server.langchain.models import ChatGPTOSS
+            return ChatGPTOSS(
+                model=model,
+                base_url=endpoint,
+                api_key=api_key,
+                temperature=0.0,
+            )
+        from langchain_openai import ChatOpenAI
+        return ChatOpenAI(
+            model=model,
+            api_key=api_key,
+            base_url=endpoint,
+            temperature=0.0,
+        )
+    else:
+        logger.warning(f"Unknown provider: {provider}")
+        return None

agent_server/langchain/logging_utils.py CHANGED Viewed

@@ -37,7 +37,7 @@ def disable_langchain_logging():
 # Auto-disable on import (comment this line to re-enable all logs)
-disable_langchain_logging()
+# disable_langchain_logging()  # TEMPORARILY ENABLED FOR DEBUGGING
 LOG_SEPARATOR = "=" * 96
 LOG_SUBSECTION = "-" * 96

agent_server/langchain/middleware/__init__.py CHANGED Viewed

@@ -3,10 +3,14 @@ Middleware Module
 Custom middleware for the multi-agent architecture:
 - SubAgentMiddleware: Handles subagent delegation via task tool
+- ContentInjectionMiddleware: Injects generated code/SQL into tool args
 - SkillMiddleware: Progressive skill loading for code generation agents
 - Existing middleware from custom_middleware.py is also available
 """
+from agent_server.langchain.middleware.content_injection_middleware import (
+    ContentInjectionMiddleware,
+)
 from agent_server.langchain.middleware.skill_middleware import (
     SkillMiddleware,
     get_skill_middleware,
@@ -18,6 +22,7 @@ from agent_server.langchain.middleware.subagent_middleware import (
 __all__ = [
     "SubAgentMiddleware",
+    "ContentInjectionMiddleware",
     "create_task_tool",
     "SkillMiddleware",
     "get_skill_middleware",

agent_server/langchain/middleware/code_history_middleware.py CHANGED Viewed

@@ -12,11 +12,12 @@ Features:
 import logging
 import threading
-import tiktoken
 from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any, Dict, List, Optional
+import tiktoken
 logger = logging.getLogger(__name__)
 # Token limit for context (including system prompt)
@@ -31,7 +32,9 @@ PYTHON_DEV_SYSTEM_PROMPT_TOKENS = 2000
 class CodeHistoryEntry:
     """Represents a single code execution or file operation."""
-    tool_name: str  # jupyter_cell_tool, write_file_tool, edit_file_tool, multiedit_file_tool
+    tool_name: (
+        str  # jupyter_cell_tool, write_file_tool, edit_file_tool, multiedit_file_tool
+    )
     timestamp: datetime = field(default_factory=datetime.now)
     # For jupyter_cell_tool
@@ -50,7 +53,9 @@ class CodeHistoryEntry:
         timestamp_str = self.timestamp.strftime("%H:%M:%S")
         if self.tool_name == "jupyter_cell_tool":
-            output_preview = self._truncate(self.output, 500) if self.output else "(no output)"
+            output_preview = (
+                self._truncate(self.output, 500) if self.output else "(no output)"
+            )
             return f"""## Cell ({timestamp_str})
 ```python
 {self.code}
@@ -86,7 +91,7 @@ Changes: {edit_count} edits applied"""
         if self.tool_name == "jupyter_cell_tool":
             # Extract first meaningful line of code
             if self.code:
-                first_line = self.code.strip().split('\n')[0][:60]
+                first_line = self.code.strip().split("\n")[0][:60]
                 return f"- Cell: {first_line}..."
             return "- Cell: (empty)"
@@ -156,7 +161,9 @@ class CodeHistoryTracker:
                 output=output,
             )
             self._history.append(entry)
-            logger.info(f"CodeHistory: Added jupyter_cell (total: {len(self._history)})")
+            logger.info(
+                f"CodeHistory: Added jupyter_cell (total: {len(self._history)})"
+            )
     def add_write_file(self, file_path: str, content: str) -> None:
         """Track a write_file_tool execution."""
@@ -167,7 +174,9 @@ class CodeHistoryTracker:
                 content=content,
             )
             self._history.append(entry)
-            logger.info(f"CodeHistory: Added write_file {file_path} (total: {len(self._history)})")
+            logger.info(
+                f"CodeHistory: Added write_file {file_path} (total: {len(self._history)})"
+            )
     def add_edit_file(self, file_path: str, old_content: str, new_content: str) -> None:
         """Track an edit_file_tool execution."""
@@ -179,7 +188,9 @@ class CodeHistoryTracker:
                 new_content=new_content,
             )
             self._history.append(entry)
-            logger.info(f"CodeHistory: Added edit_file {file_path} (total: {len(self._history)})")
+            logger.info(
+                f"CodeHistory: Added edit_file {file_path} (total: {len(self._history)})"
+            )
     def add_multiedit_file(self, file_path: str, edits: List[Dict[str, str]]) -> None:
         """Track a multiedit_file_tool execution."""
@@ -190,7 +201,9 @@ class CodeHistoryTracker:
                 edits=edits,
             )
             self._history.append(entry)
-            logger.info(f"CodeHistory: Added multiedit_file {file_path} (total: {len(self._history)})")
+            logger.info(
+                f"CodeHistory: Added multiedit_file {file_path} (total: {len(self._history)})"
+            )
     def get_context_for_subagent(
         self,
@@ -216,8 +229,12 @@ class CodeHistoryTracker:
                 return existing_context or ""
             # Calculate available tokens for history
-            existing_tokens = self._count_tokens(existing_context) if existing_context else 0
-            available_tokens = max_tokens - system_prompt_tokens - existing_tokens - 500  # 500 buffer
+            existing_tokens = (
+                self._count_tokens(existing_context) if existing_context else 0
+            )
+            available_tokens = (
+                max_tokens - system_prompt_tokens - existing_tokens - 500
+            )  # 500 buffer
             # Build full history string
             full_history = self._build_full_history()
@@ -311,49 +328,110 @@ class CodeHistoryTracker:
             return len(self._history)
-# Global tracker instance (per-thread tracking could be added if needed)
-_code_history_tracker: Optional[CodeHistoryTracker] = None
+# Global tracker instances per threadId
+_code_history_trackers: Dict[str, CodeHistoryTracker] = {}
+_trackers_lock = threading.Lock()
+def get_code_history_tracker(thread_id: Optional[str] = None) -> CodeHistoryTracker:
+    """
+    Get the CodeHistoryTracker instance for the given thread_id.
+    Args:
+        thread_id: Thread ID for session isolation. If None, returns a temporary tracker.
+    Returns:
+        CodeHistoryTracker instance for the thread
+    """
+    if thread_id is None:
+        logger.warning(
+            "get_code_history_tracker called without thread_id - using temporary tracker"
+        )
+        return CodeHistoryTracker()
-def get_code_history_tracker() -> CodeHistoryTracker:
-    """Get the global CodeHistoryTracker instance."""
-    global _code_history_tracker
-    if _code_history_tracker is None:
-        _code_history_tracker = CodeHistoryTracker()
-    return _code_history_tracker
+    with _trackers_lock:
+        if thread_id not in _code_history_trackers:
+            _code_history_trackers[thread_id] = CodeHistoryTracker()
+            logger.info(f"CodeHistory: Created new tracker for thread_id={thread_id}")
+        return _code_history_trackers[thread_id]
-def track_jupyter_cell(code: str, output: str) -> None:
+def track_jupyter_cell(code: str, output: str, thread_id: Optional[str] = None) -> None:
     """Convenience function to track jupyter_cell_tool execution."""
-    get_code_history_tracker().add_jupyter_cell(code, output)
+    get_code_history_tracker(thread_id).add_jupyter_cell(code, output)
-def track_write_file(file_path: str, content: str) -> None:
+def track_write_file(
+    file_path: str, content: str, thread_id: Optional[str] = None
+) -> None:
     """Convenience function to track write_file_tool execution."""
-    get_code_history_tracker().add_write_file(file_path, content)
+    get_code_history_tracker(thread_id).add_write_file(file_path, content)
-def track_edit_file(file_path: str, old_content: str, new_content: str) -> None:
+def track_edit_file(
+    file_path: str,
+    old_content: str,
+    new_content: str,
+    thread_id: Optional[str] = None,
+) -> None:
     """Convenience function to track edit_file_tool execution."""
-    get_code_history_tracker().add_edit_file(file_path, old_content, new_content)
+    get_code_history_tracker(thread_id).add_edit_file(
+        file_path, old_content, new_content
+    )
-def track_multiedit_file(file_path: str, edits: List[Dict[str, str]]) -> None:
+def track_multiedit_file(
+    file_path: str,
+    edits: List[Dict[str, str]],
+    thread_id: Optional[str] = None,
+) -> None:
     """Convenience function to track multiedit_file_tool execution."""
-    get_code_history_tracker().add_multiedit_file(file_path, edits)
+    get_code_history_tracker(thread_id).add_multiedit_file(file_path, edits)
-def get_context_with_history(existing_context: Optional[str] = None) -> str:
+def get_context_with_history(
+    existing_context: Optional[str] = None,
+    thread_id: Optional[str] = None,
+) -> str:
     """Get context string with code history injected."""
-    return get_code_history_tracker().get_context_for_subagent(existing_context)
+    return get_code_history_tracker(thread_id).get_context_for_subagent(
+        existing_context
+    )
+def clear_code_history(thread_id: Optional[str] = None) -> None:
+    """
+    Clear code history for a specific thread or all threads.
-def clear_code_history() -> None:
-    """Clear all code history."""
-    get_code_history_tracker().clear()
+    Args:
+        thread_id: Thread ID to clear. If None, clears all threads.
+    """
+    if thread_id is None:
+        # Clear all trackers
+        with _trackers_lock:
+            for tid, tracker in _code_history_trackers.items():
+                tracker.clear()
+                logger.info(f"CodeHistory: Cleared history for thread_id={tid}")
+            _code_history_trackers.clear()
+            logger.info("CodeHistory: Cleared all thread trackers")
+    else:
+        # Clear specific thread
+        with _trackers_lock:
+            if thread_id in _code_history_trackers:
+                _code_history_trackers[thread_id].clear()
+                del _code_history_trackers[thread_id]
+                logger.info(
+                    f"CodeHistory: Cleared and removed tracker for thread_id={thread_id}"
+                )
+            else:
+                logger.info(f"CodeHistory: No tracker found for thread_id={thread_id}")
-def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
+def track_tool_execution(
+    tool_name: str,
+    args: Dict[str, Any],
+    thread_id: Optional[str] = None,
+) -> None:
     """
     Track a tool execution from HITL decision processing.
@@ -363,6 +441,7 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
     Args:
         tool_name: Name of the tool (jupyter_cell_tool, write_file_tool, etc.)
         args: Tool arguments including execution_result
+        thread_id: Thread ID for session isolation
     """
     if not args:
         return
@@ -371,21 +450,26 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
     if not execution_result:
         return
-    tracker = get_code_history_tracker()
+    tracker = get_code_history_tracker(thread_id)
     if tool_name == "jupyter_cell_tool":
         code = args.get("code", "")
         output = execution_result.get("output", "")
         if code:
             tracker.add_jupyter_cell(code, output)
-            logger.info(f"CodeHistory: Tracked jupyter_cell execution (code len={len(code)})")
+            logger.info(
+                f"CodeHistory: Tracked jupyter_cell execution "
+                f"(code len={len(code)}, thread_id={thread_id})"
+            )
     elif tool_name == "write_file_tool":
         file_path = args.get("path", "")
         content = args.get("content", "")
         if file_path:
             tracker.add_write_file(file_path, content)
-            logger.info(f"CodeHistory: Tracked write_file to {file_path}")
+            logger.info(
+                f"CodeHistory: Tracked write_file to {file_path} (thread_id={thread_id})"
+            )
     elif tool_name == "edit_file_tool":
         file_path = args.get("path", "")
@@ -393,7 +477,9 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
         new_string = args.get("new_string", "")
         if file_path:
             tracker.add_edit_file(file_path, old_string, new_string)
-            logger.info(f"CodeHistory: Tracked edit_file to {file_path}")
+            logger.info(
+                f"CodeHistory: Tracked edit_file to {file_path} (thread_id={thread_id})"
+            )
     elif tool_name == "multiedit_file_tool":
         file_path = args.get("path", "")
@@ -409,4 +495,7 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
                 elif isinstance(edit, dict):
                     edits_as_dicts.append(edit)
             tracker.add_multiedit_file(file_path, edits_as_dicts)
-            logger.info(f"CodeHistory: Tracked multiedit_file to {file_path} ({len(edits)} edits)")
+            logger.info(
+                f"CodeHistory: Tracked multiedit_file to {file_path} "
+                f"({len(edits)} edits, thread_id={thread_id})"
+            )

agent_server/langchain/middleware/content_injection_middleware.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""
+ContentInjectionMiddleware
+Injects generated_content from LangGraph state into target tool args.
+This eliminates JSON escaping issues when passing code/SQL between agents.
+Runs BEFORE HumanInTheLoopMiddleware so HITL shows the full injected content.
+Flow:
+1. Subagent generates code/SQL → stored in state via Command
+2. Main Agent calls target tool (e.g., jupyter_cell_tool) without args
+3. This middleware reads state and injects content into tool args
+4. HITL middleware sees full content for user approval
+content_type → tool injection mapping:
+- "python" → jupyter_cell_tool(code=...), write_file_tool(content=...)
+- "sql"    → markdown_tool(content="```sql\\n...\\n```")
+"""
+import logging
+from typing import Any, Callable, Union
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import ToolMessage
+logger = logging.getLogger(__name__)
+class ContentInjectionMiddleware(AgentMiddleware):
+    """Inject state's generated_content into target tool call args.
+    When a subagent generates code/SQL via task_tool, it's stored in
+    LangGraph state (generated_content, generated_content_type, content_description).
+    This middleware reads the state and injects the content into the
+    appropriate tool's arguments before execution.
+    This ensures:
+    1. Code/SQL bypasses LLM JSON serialization (no escaping issues)
+    2. HITL middleware sees the full injected content for approval
+    3. Main Agent doesn't need to copy code into tool args
+    Usage in agent_factory.py:
+        middleware = [
+            ContentInjectionMiddleware(),  # BEFORE HITL
+            ...,
+            hitl_middleware,               # Sees injected content
+        ]
+    """
+    def wrap_tool_call(self, request, handler):
+        """Intercept tool calls and inject generated content from state.
+        Args:
+            request: ToolCallRequest with tool_call, state, runtime
+            handler: Next handler in middleware chain
+        Returns:
+            ToolMessage or Command from handler
+        """
+        state = request.state
+        if not state:
+            return handler(request)
+        content = state.get("generated_content") if isinstance(state, dict) else getattr(state, "generated_content", None)
+        content_type = state.get("generated_content_type") if isinstance(state, dict) else getattr(state, "generated_content_type", None)
+        desc = state.get("content_description") if isinstance(state, dict) else getattr(state, "content_description", None)
+        if not content or not content_type:
+            return handler(request)
+        tool_call = request.tool_call
+        tool_name = tool_call["name"]
+        args = tool_call.get("args", {})
+        new_args = None
+        # Python code injection
+        if content_type == "python":
+            if tool_name == "jupyter_cell_tool" and not args.get("code"):
+                new_args = {**args, "code": content}
+                if desc and not args.get("description"):
+                    new_args["description"] = desc
+                logger.info(
+                    "[ContentInjection] Injected python code (%d chars) into jupyter_cell_tool",
+                    len(content),
+                )
+            elif tool_name == "write_file_tool" and not args.get("content"):
+                new_args = {**args, "content": content}
+                logger.info(
+                    "[ContentInjection] Injected python code (%d chars) into write_file_tool",
+                    len(content),
+                )
+        # SQL query injection
+        elif content_type == "sql":
+            if tool_name == "markdown_tool" and not args.get("content"):
+                sql_markdown = f"```sql\n{content}\n```"
+                if desc:
+                    sql_markdown = f"{desc}\n\n{sql_markdown}"
+                new_args = {**args, "content": sql_markdown}
+                logger.info(
+                    "[ContentInjection] Injected SQL (%d chars) into markdown_tool",
+                    len(content),
+                )
+        if new_args is not None:
+            modified_call = {**tool_call, "args": new_args}
+            request = request.override(tool_call=modified_call)
+        return handler(request)

hdsp-jupyter-extension 2.0.26__py3-none-any.whl → 2.0.28__py3-none-any.whl

hdsp-jupyter-extension 2.0.26py3-none-any.whl → 2.0.28py3-none-any.whl