PyPI - hdsp-jupyter-extension - Versions diffs - 2.0.20__py3-none-any.whl → 2.0.22__py3-none-any.whl - Mend

hdsp-jupyter-extension 2.0.20py3-none-any.whl → 2.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

agent_server/langchain/llm_factory.py CHANGED Viewed

@@ -106,10 +106,12 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
     )
     # Use ChatGPTOSS for gpt-oss models (Harmony format with developer role)
-    if "gpt-oss" in model.lower():
+    # NOTE: OpenRouter doesn't support 'developer' role - only use for direct gpt-oss endpoints
+    is_openrouter = "openrouter" in endpoint.lower()
+    if "gpt-oss" in model.lower() and not is_openrouter:
         from agent_server.langchain.models import ChatGPTOSS
-        logger.info(f"Using ChatGPTOSS for gpt-oss model (developer role support)")
+        logger.info("Using ChatGPTOSS for gpt-oss model (developer role support)")
         return ChatGPTOSS(
             model=model,
             base_url=endpoint,
@@ -119,6 +121,11 @@ def _create_vllm_llm(llm_config: Dict[str, Any], callbacks):
             streaming=False,
             callbacks=callbacks,
         )
+    elif "gpt-oss" in model.lower() and is_openrouter:
+        logger.warning(
+            "gpt-oss model via OpenRouter - using standard ChatOpenAI "
+            "(developer role not supported by OpenRouter)"
+        )
     return ChatOpenAI(
         model=model,
@@ -175,8 +182,9 @@ def create_summarization_llm(llm_config: Dict[str, Any]):
             model = vllm_config.get("model", "default")
             api_key = vllm_config.get("apiKey", "dummy")
-            # Use ChatGPTOSS for gpt-oss models
-            if "gpt-oss" in model.lower():
+            # Use ChatGPTOSS for gpt-oss models (but not via OpenRouter)
+            is_openrouter = "openrouter" in endpoint.lower()
+            if "gpt-oss" in model.lower() and not is_openrouter:
                 from agent_server.langchain.models import ChatGPTOSS
                 return ChatGPTOSS(

agent_server/langchain/logging_utils.py CHANGED Viewed

@@ -22,7 +22,7 @@ llm_response_logger.propagate = True  # Propagate to root logger
 # Ensure it has a handler if running standalone
 if not llm_response_logger.handlers and not logging.getLogger().handlers:
     _handler = logging.StreamHandler()
-    _handler.setFormatter(logging.Formatter('%(message)s'))
+    _handler.setFormatter(logging.Formatter("%(message)s"))
     llm_response_logger.addHandler(_handler)
@@ -42,7 +42,11 @@ disable_langchain_logging()
 LOG_SEPARATOR = "=" * 96
 LOG_SUBSECTION = "-" * 96
 LOG_EMOJI_LINE = "🔵" * 48
-LOG_RESPONSE_START = f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n  ✨ LLM RESPONSE START\n{'=' * 96}"
+LOG_REQUEST_START = f"\n\n{'🟢' * 48}\n{'=' * 96}\n  📤 LLM REQUEST START\n{'=' * 96}"
+LOG_REQUEST_END = f"{'=' * 96}\n  📤 LLM REQUEST END\n{'=' * 96}\n{'🟢' * 48}\n"
+LOG_RESPONSE_START = (
+    f"\n\n{LOG_EMOJI_LINE}\n{'=' * 96}\n  ✨ LLM RESPONSE START\n{'=' * 96}"
+)
 LOG_RESPONSE_END = f"{'=' * 96}\n  ✅ LLM RESPONSE END\n{'=' * 96}\n{LOG_EMOJI_LINE}\n"
@@ -207,42 +211,91 @@ class LLMTraceLogger(BaseCallbackHandler):
             logger.info("%s", "\n".join(lines))
     def on_chat_model_start(self, serialized, messages, **kwargs) -> None:
-        # Request logging disabled - only log responses
-        pass
+        """Log LLM request messages as raw structured JSON."""
+        print(LOG_REQUEST_START, flush=True)
+        # Build raw structured request data
+        request_data = {
+            "model": serialized.get("name", "unknown") if serialized else "unknown",
+            "kwargs": {k: str(v)[:200] for k, v in kwargs.items() if k != "messages"},
+            "messages": [],
+        }
+        for batch in self._normalize_batches(messages):
+            batch_messages = []
+            for msg in batch:
+                batch_messages.append(_serialize_message(msg))
+            request_data["messages"].append(batch_messages)
+        # Output beautified JSON
+        print(_pretty_json(request_data), flush=True)
+        print(LOG_REQUEST_END, flush=True)
+        # --- OLD TEXT-PARSED LOGGING (commented out) ---
+        # for batch_idx, batch in enumerate(self._normalize_batches(messages)):
+        #     msg_types = {}
+        #     for msg in batch:
+        #         msg_type = msg.__class__.__name__
+        #         msg_types[msg_type] = msg_types.get(msg_type, 0) + 1
+        #     print(f"\nBatch {batch_idx}: {len(batch)} messages - {msg_types}", flush=True)
+        #     recent_count = min(5, len(batch))
+        #     if len(batch) > recent_count:
+        #         print(f"... ({len(batch) - recent_count} earlier messages omitted)", flush=True)
+        #     for idx, message in enumerate(batch[-recent_count:], start=len(batch) - recent_count):
+        #         lines = [LOG_SUBSECTION]
+        #         lines.append(f"[{idx}] {message.__class__.__name__}")
+        #         lines.append(_pretty_json(_serialize_message(message)))
+        #         print("\n".join(lines), flush=True)
     def on_chat_model_end(self, response, **kwargs) -> None:
-        # Debug: Check if callback is even called
-        print("[DEBUG] on_chat_model_end CALLED!", flush=True)
-        # Use print for guaranteed visibility
+        """Log LLM response as raw structured JSON."""
         print(LOG_RESPONSE_START, flush=True)
+        # Build raw structured response data
+        response_data = {
+            "llm_output": getattr(response, "llm_output", None),
+            "generations": [],
+        }
         generations = getattr(response, "generations", None) or []
         if generations and isinstance(generations[0], list):
             batches = generations
         else:
             batches = [generations]
-        for batch_idx, batch in enumerate(batches):
-            for gen_idx, generation in enumerate(batch):
+        for batch in batches:
+            batch_data = []
+            for generation in batch:
+                gen_data = {}
                 message = getattr(generation, "message", None)
-                if not message:
-                    continue
-                title = (
-                    f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
+                if message:
+                    gen_data["message"] = _serialize_message(message)
+                gen_data["text"] = getattr(generation, "text", None)
+                gen_data["generation_info"] = getattr(
+                    generation, "generation_info", None
                 )
-                print(_format_messages_block(title, [message]), flush=True)
+                batch_data.append(gen_data)
+            response_data["generations"].append(batch_data)
-                tool_calls = getattr(message, "tool_calls", None)
-                if tool_calls:
-                    tool_title = (
-                        "LLM -> AGENT TOOL CALLS "
-                        f"(batch={batch_idx}, generation={gen_idx})"
-                    )
-                    print(_format_json_block(tool_title, tool_calls), flush=True)
+        # Output beautified JSON
+        print(_pretty_json(response_data), flush=True)
         print(LOG_RESPONSE_END, flush=True)
+        # --- OLD TEXT-PARSED LOGGING (commented out) ---
+        # for batch_idx, batch in enumerate(batches):
+        #     for gen_idx, generation in enumerate(batch):
+        #         message = getattr(generation, "message", None)
+        #         if not message:
+        #             continue
+        #         title = f"LLM -> AGENT RESPONSE (batch={batch_idx}, generation={gen_idx})"
+        #         print(_format_messages_block(title, [message]), flush=True)
+        #         tool_calls = getattr(message, "tool_calls", None)
+        #         if tool_calls:
+        #             tool_title = f"LLM -> AGENT TOOL CALLS (batch={batch_idx}, generation={gen_idx})"
+        #             print(_format_json_block(tool_title, tool_calls), flush=True)
     def on_llm_start(self, serialized, prompts, **kwargs) -> None:
         # Request logging disabled - only log responses
         pass

agent_server/langchain/middleware/subagent_middleware.py CHANGED Viewed

@@ -9,8 +9,11 @@ Key features:
 - Context isolation: subagents run in clean context
 - Synchronous execution: subagent returns result directly to caller
 - Nested subagent support: python_developer can call athena_query
+- Subagent caching: compiled agents are cached to avoid recompilation overhead
 """
+import hashlib
+import json
 import logging
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
@@ -25,6 +28,8 @@ logger = logging.getLogger(__name__)
 # Global registry for subagent factories (set by AgentFactory)
 _subagent_factory = None
 _current_llm_config = None
+# Subagent cache: key = "{agent_name}_{config_hash}" -> compiled agent
+_subagent_cache: Dict[str, Any] = {}
 def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
@@ -32,10 +37,12 @@ def set_subagent_factory(factory_func, llm_config: Dict[str, Any]):
     Set the subagent factory function.
     Called by AgentFactory during initialization.
     """
-    global _subagent_factory, _current_llm_config
+    global _subagent_factory, _current_llm_config, _subagent_cache
     _subagent_factory = factory_func
     _current_llm_config = llm_config
-    logger.info("SubAgentMiddleware factory initialized")
+    # Clear cache when factory changes (new LLM config)
+    _subagent_cache.clear()
+    logger.info("SubAgentMiddleware factory initialized (cache cleared)")
 def get_subagent_factory():
@@ -43,6 +50,48 @@ def get_subagent_factory():
     return _subagent_factory, _current_llm_config
+def _get_config_hash(llm_config: Dict[str, Any]) -> str:
+    """Generate a hash of llm_config for caching."""
+    config_str = json.dumps(llm_config, sort_keys=True, default=str)
+    return hashlib.md5(config_str.encode()).hexdigest()[:12]
+def get_or_create_subagent(
+    agent_name: str, factory_func, llm_config: Dict[str, Any]
+) -> Any:
+    """
+    Get cached subagent or create new one.
+    Caching avoids expensive recompilation of LangGraph agents.
+    Cache key = "{agent_name}_{config_hash}" to handle different LLM configs.
+    """
+    global _subagent_cache
+    config_hash = _get_config_hash(llm_config)
+    cache_key = f"{agent_name}_{config_hash}"
+    if cache_key in _subagent_cache:
+        logger.info(f"Using cached subagent '{agent_name}' (key={cache_key})")
+        return _subagent_cache[cache_key]
+    logger.info(f"Creating new subagent '{agent_name}' (key={cache_key})...")
+    subagent = factory_func(agent_name, llm_config)
+    _subagent_cache[cache_key] = subagent
+    logger.info(
+        f"Cached subagent '{agent_name}' (total cached: {len(_subagent_cache)})"
+    )
+    return subagent
+def clear_subagent_cache():
+    """Clear the subagent cache. Useful for testing or config changes."""
+    global _subagent_cache
+    count = len(_subagent_cache)
+    _subagent_cache.clear()
+    logger.info(f"Subagent cache cleared ({count} entries removed)")
 def create_task_tool(
     caller_name: str,
     allowed_subagents: Optional[List[str]] = None,
@@ -96,11 +145,13 @@ def create_task_tool(
         )
         context: Optional[str] = Field(
             default=None,
-            description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc."
+            description="Additional context for the subagent: resource info (file sizes, memory), previous code, variable state, etc.",
         )
     @tool(args_schema=TaskInput)
-    def task_tool(agent_name: str, description: str, context: Optional[str] = None) -> str:
+    def task_tool(
+        agent_name: str, description: str, context: Optional[str] = None
+    ) -> str:
         """
         Delegate a task to a specialized subagent.
@@ -133,10 +184,10 @@ def create_task_tool(
         # Import subagent event emitters
         from agent_server.langchain.middleware.subagent_events import (
-            emit_subagent_start,
+            clear_current_subagent,
             emit_subagent_complete,
+            emit_subagent_start,
             set_current_subagent,
-            clear_current_subagent,
         )
         # Emit subagent start event for UI
@@ -148,11 +199,17 @@ def create_task_tool(
             return "Error: SubAgentMiddleware not initialized. Call set_subagent_factory first."
         try:
+            import time
             # Set current subagent context for tool call tracking
             set_current_subagent(agent_name)
-            # Create the subagent
-            subagent = factory_func(agent_name, llm_config)
+            # Get or create the subagent (cached for performance)
+            # Avoids expensive LangGraph recompilation on each call
+            t0 = time.time()
+            subagent = get_or_create_subagent(agent_name, factory_func, llm_config)
+            t1 = time.time()
+            logger.info(f"[TIMING] get_or_create_subagent took {t1-t0:.2f}s")
             # Execute subagent synchronously with clean context
             # The subagent runs in isolation, receiving task description + optional context
@@ -169,15 +226,18 @@ def create_task_tool(
             enhanced_context = context
             if agent_name == "python_developer":
                 try:
+                    t2 = time.time()
                     from agent_server.langchain.middleware.code_history_middleware import (
-                        get_context_with_history,
                         get_code_history_tracker,
+                        get_context_with_history,
                     )
                     tracker = get_code_history_tracker()
                     if tracker.get_entry_count() > 0:
                         enhanced_context = get_context_with_history(context)
+                        t3 = time.time()
                         logger.info(
-                            f"[{caller_name}] Injected code history into context "
+                            f"[TIMING] code history injection took {t3-t2:.2f}s "
                             f"(entries={tracker.get_entry_count()}, "
                             f"context_len={len(enhanced_context) if enhanced_context else 0})"
                         )
@@ -194,13 +254,21 @@ def create_task_tool(
             else:
                 message_content = description
-            logger.info(f"[{caller_name}] Subagent message length: {len(message_content)}")
+            logger.info(
+                f"[{caller_name}] Subagent message length: {len(message_content)}"
+            )
             # Execute the subagent
+            t_invoke_start = time.time()
+            logger.info(f"[TIMING] About to invoke subagent '{agent_name}'...")
             result = subagent.invoke(
                 {"messages": [{"role": "user", "content": message_content}]},
                 config=subagent_config,
             )
+            t_invoke_end = time.time()
+            logger.info(
+                f"[TIMING] subagent.invoke() took {t_invoke_end-t_invoke_start:.2f}s"
+            )
             # Extract the final message from the result
             messages = result.get("messages", [])
@@ -223,6 +291,7 @@ def create_task_tool(
                     from agent_server.langchain.middleware.description_injector import (
                         process_task_tool_response,
                     )
                     process_task_tool_response(agent_name, str(response))
                 except Exception as e:
                     logger.warning(f"Failed to extract description: {e}")

hdsp-jupyter-extension 2.0.20__py3-none-any.whl → 2.0.22__py3-none-any.whl

hdsp-jupyter-extension 2.0.20py3-none-any.whl → 2.0.22py3-none-any.whl