PyPI - alita-sdk - Versions diffs - 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl - Mend

alita-sdk 0.3.379py3-none-any.whl → 0.3.627py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +258 -0
alita_sdk/cli/agent_executor.py +156 -0
alita_sdk/cli/agent_loader.py +245 -0
alita_sdk/cli/agent_ui.py +228 -0
alita_sdk/cli/agents.py +3113 -0
alita_sdk/cli/callbacks.py +647 -0
alita_sdk/cli/cli.py +168 -0
alita_sdk/cli/config.py +306 -0
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/input_handler.py +419 -0
alita_sdk/cli/inventory.py +1073 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/testcases/__init__.py +94 -0
alita_sdk/cli/testcases/data_generation.py +119 -0
alita_sdk/cli/testcases/discovery.py +96 -0
alita_sdk/cli/testcases/executor.py +84 -0
alita_sdk/cli/testcases/logger.py +85 -0
alita_sdk/cli/testcases/parser.py +172 -0
alita_sdk/cli/testcases/prompts.py +91 -0
alita_sdk/cli/testcases/reporting.py +125 -0
alita_sdk/cli/testcases/setup.py +108 -0
alita_sdk/cli/testcases/test_runner.py +282 -0
alita_sdk/cli/testcases/utils.py +39 -0
alita_sdk/cli/testcases/validation.py +90 -0
alita_sdk/cli/testcases/workflow.py +196 -0
alita_sdk/cli/toolkit.py +327 -0
alita_sdk/cli/toolkit_loader.py +85 -0
alita_sdk/cli/tools/__init__.py +43 -0
alita_sdk/cli/tools/approval.py +224 -0
alita_sdk/cli/tools/filesystem.py +1751 -0
alita_sdk/cli/tools/planning.py +389 -0
alita_sdk/cli/tools/terminal.py +414 -0
alita_sdk/community/__init__.py +72 -12
alita_sdk/community/inventory/__init__.py +236 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +173 -0
alita_sdk/community/inventory/toolkit_utils.py +176 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/__init__.py +1 -1
alita_sdk/configurations/ado.py +141 -20
alita_sdk/configurations/bitbucket.py +94 -2
alita_sdk/configurations/confluence.py +130 -1
alita_sdk/configurations/figma.py +76 -0
alita_sdk/configurations/gitlab.py +91 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/openapi.py +329 -0
alita_sdk/configurations/qtest.py +72 -1
alita_sdk/configurations/report_portal.py +96 -0
alita_sdk/configurations/sharepoint.py +148 -0
alita_sdk/configurations/testio.py +83 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/artifact.py +3 -3
alita_sdk/runtime/clients/client.py +388 -46
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +8 -21
alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
alita_sdk/runtime/langchain/assistant.py +157 -39
alita_sdk/runtime/langchain/constants.py +647 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
alita_sdk/runtime/langchain/langraph_agent.py +405 -84
alita_sdk/runtime/langchain/utils.py +106 -7
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/skills/__init__.py +91 -0
alita_sdk/runtime/skills/callbacks.py +498 -0
alita_sdk/runtime/skills/discovery.py +540 -0
alita_sdk/runtime/skills/executor.py +610 -0
alita_sdk/runtime/skills/input_builder.py +371 -0
alita_sdk/runtime/skills/models.py +330 -0
alita_sdk/runtime/skills/registry.py +355 -0
alita_sdk/runtime/skills/skill_runner.py +330 -0
alita_sdk/runtime/toolkits/__init__.py +31 -0
alita_sdk/runtime/toolkits/application.py +29 -10
alita_sdk/runtime/toolkits/artifact.py +20 -11
alita_sdk/runtime/toolkits/datasource.py +13 -6
alita_sdk/runtime/toolkits/mcp.py +783 -0
alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
alita_sdk/runtime/toolkits/planning.py +178 -0
alita_sdk/runtime/toolkits/skill_router.py +238 -0
alita_sdk/runtime/toolkits/subgraph.py +251 -6
alita_sdk/runtime/toolkits/tools.py +356 -69
alita_sdk/runtime/toolkits/vectorstore.py +11 -5
alita_sdk/runtime/tools/__init__.py +10 -3
alita_sdk/runtime/tools/application.py +27 -6
alita_sdk/runtime/tools/artifact.py +511 -28
alita_sdk/runtime/tools/data_analysis.py +183 -0
alita_sdk/runtime/tools/function.py +67 -35
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +148 -46
alita_sdk/runtime/tools/llm.py +1003 -128
alita_sdk/runtime/tools/loop.py +3 -1
alita_sdk/runtime/tools/loop_output.py +3 -1
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/router.py +2 -4
alita_sdk/runtime/tools/sandbox.py +65 -48
alita_sdk/runtime/tools/skill_router.py +776 -0
alita_sdk/runtime/tools/tool.py +3 -1
alita_sdk/runtime/tools/vectorstore.py +9 -3
alita_sdk/runtime/tools/vectorstore_base.py +70 -14
alita_sdk/runtime/utils/AlitaCallback.py +137 -21
alita_sdk/runtime/utils/constants.py +5 -1
alita_sdk/runtime/utils/mcp_client.py +492 -0
alita_sdk/runtime/utils/mcp_oauth.py +361 -0
alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/serialization.py +155 -0
alita_sdk/runtime/utils/streamlit.py +40 -13
alita_sdk/runtime/utils/toolkit_utils.py +30 -9
alita_sdk/runtime/utils/utils.py +36 -0
alita_sdk/tools/__init__.py +134 -35
alita_sdk/tools/ado/repos/__init__.py +51 -32
alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
alita_sdk/tools/ado/test_plan/__init__.py +25 -9
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
alita_sdk/tools/ado/utils.py +1 -18
alita_sdk/tools/ado/wiki/__init__.py +25 -12
alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
alita_sdk/tools/ado/work_item/__init__.py +26 -13
alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
alita_sdk/tools/aws/delta_lake/tool.py +5 -1
alita_sdk/tools/azure_ai/search/__init__.py +11 -8
alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
alita_sdk/tools/base/tool.py +5 -1
alita_sdk/tools/base_indexer_toolkit.py +271 -84
alita_sdk/tools/bitbucket/__init__.py +17 -11
alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
alita_sdk/tools/browser/__init__.py +5 -4
alita_sdk/tools/carrier/__init__.py +5 -6
alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/cloud/aws/__init__.py +10 -7
alita_sdk/tools/cloud/azure/__init__.py +10 -7
alita_sdk/tools/cloud/gcp/__init__.py +10 -7
alita_sdk/tools/cloud/k8s/__init__.py +10 -7
alita_sdk/tools/code/linter/__init__.py +10 -8
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code/sonar/__init__.py +11 -8
alita_sdk/tools/code_indexer_toolkit.py +82 -22
alita_sdk/tools/confluence/__init__.py +22 -16
alita_sdk/tools/confluence/api_wrapper.py +107 -30
alita_sdk/tools/confluence/loader.py +14 -2
alita_sdk/tools/custom_open_api/__init__.py +12 -5
alita_sdk/tools/elastic/__init__.py +11 -8
alita_sdk/tools/elitea_base.py +493 -30
alita_sdk/tools/figma/__init__.py +58 -11
alita_sdk/tools/figma/api_wrapper.py +1235 -143
alita_sdk/tools/figma/figma_client.py +73 -0
alita_sdk/tools/figma/toon_tools.py +2748 -0
alita_sdk/tools/github/__init__.py +14 -15
alita_sdk/tools/github/github_client.py +224 -100
alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
alita_sdk/tools/github/schemas.py +14 -5
alita_sdk/tools/github/tool.py +5 -1
alita_sdk/tools/github/tool_prompts.py +9 -22
alita_sdk/tools/gitlab/__init__.py +16 -11
alita_sdk/tools/gitlab/api_wrapper.py +218 -48
alita_sdk/tools/gitlab_org/__init__.py +10 -9
alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
alita_sdk/tools/google/bigquery/__init__.py +13 -12
alita_sdk/tools/google/bigquery/tool.py +5 -1
alita_sdk/tools/google_places/__init__.py +11 -8
alita_sdk/tools/google_places/api_wrapper.py +1 -1
alita_sdk/tools/jira/__init__.py +17 -10
alita_sdk/tools/jira/api_wrapper.py +92 -41
alita_sdk/tools/keycloak/__init__.py +11 -8
alita_sdk/tools/localgit/__init__.py +9 -3
alita_sdk/tools/localgit/local_git.py +62 -54
alita_sdk/tools/localgit/tool.py +5 -1
alita_sdk/tools/memory/__init__.py +12 -4
alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
alita_sdk/tools/ocr/__init__.py +11 -8
alita_sdk/tools/openapi/__init__.py +491 -106
alita_sdk/tools/openapi/api_wrapper.py +1368 -0
alita_sdk/tools/openapi/tool.py +20 -0
alita_sdk/tools/pandas/__init__.py +20 -12
alita_sdk/tools/pandas/api_wrapper.py +38 -25
alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
alita_sdk/tools/postman/__init__.py +10 -9
alita_sdk/tools/pptx/__init__.py +11 -10
alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
alita_sdk/tools/qtest/__init__.py +31 -11
alita_sdk/tools/qtest/api_wrapper.py +2135 -86
alita_sdk/tools/rally/__init__.py +10 -9
alita_sdk/tools/rally/api_wrapper.py +1 -1
alita_sdk/tools/report_portal/__init__.py +12 -8
alita_sdk/tools/salesforce/__init__.py +10 -8
alita_sdk/tools/servicenow/__init__.py +17 -15
alita_sdk/tools/servicenow/api_wrapper.py +1 -1
alita_sdk/tools/sharepoint/__init__.py +10 -7
alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +10 -7
alita_sdk/tools/slack/api_wrapper.py +2 -2
alita_sdk/tools/sql/__init__.py +12 -9
alita_sdk/tools/testio/__init__.py +10 -7
alita_sdk/tools/testrail/__init__.py +11 -10
alita_sdk/tools/testrail/api_wrapper.py +1 -1
alita_sdk/tools/utils/__init__.py +9 -4
alita_sdk/tools/utils/content_parser.py +103 -18
alita_sdk/tools/utils/text_operations.py +410 -0
alita_sdk/tools/utils/tool_prompts.py +79 -0
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
alita_sdk/tools/xray/__init__.py +13 -9
alita_sdk/tools/yagmail/__init__.py +9 -3
alita_sdk/tools/zephyr/__init__.py +10 -7
alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
alita_sdk/tools/zephyr_essential/__init__.py +10 -7
alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
alita_sdk/tools/zephyr_essential/client.py +2 -2
alita_sdk/tools/zephyr_scale/__init__.py +11 -8
alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
alita_sdk/tools/zephyr_squad/__init__.py +10 -7
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
alita_sdk-0.3.627.dist-info/RECORD +468 -0
alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
alita_sdk-0.3.379.dist-info/RECORD +0 -360
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/tools/llm.py CHANGED Viewed

@@ -1,16 +1,60 @@
+import asyncio
 import logging
 from traceback import format_exc
-from typing import Any, Optional, List, Union
+from typing import Any, Optional, List, Union, Literal, Dict
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 from langchain_core.runnables import RunnableConfig
 from langchain_core.tools import BaseTool, ToolException
+from langchain_core.callbacks import dispatch_custom_event
 from pydantic import Field
+from ..langchain.constants import ELITEA_RS
 from ..langchain.utils import create_pydantic_model, propagate_the_input_mapping
 logger = logging.getLogger(__name__)
+# def _is_thinking_model(llm_client: Any) -> bool:
+#     """
+#     Check if a model uses extended thinking capability by reading cached metadata.
+#     Thinking models require special message formatting where assistant messages
+#     must start with thinking blocks before tool_use blocks.
+#     This function reads the `_supports_reasoning` attribute that should be set
+#     when the LLM client is created (by checking the model's supports_reasoning field).
+#     Args:
+#         llm_client: LLM client instance with optional _supports_reasoning attribute
+#     Returns:
+#         True if the model is a thinking model, False otherwise
+#     """
+#     if not llm_client:
+#         return False
+#     # Check if supports_reasoning was cached on the client
+#     supports_reasoning = getattr(llm_client, '_supports_reasoning', False)
+#     if supports_reasoning:
+#         model_name = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', 'unknown')
+#         logger.debug(f"Model '{model_name}' is a thinking/reasoning model (cached from API metadata)")
+#     return supports_reasoning
+JSON_INSTRUCTION_TEMPLATE = (
+        "\n\n**IMPORTANT: You MUST respond with ONLY a valid JSON object.**\n\n"
+        "Required JSON fields:\n{field_descriptions}\n\n"
+        "Example format:\n"
+        "{{\n{example_fields}\n}}\n\n"
+        "Rules:\n"
+        "1. Output ONLY the JSON object - no markdown, no explanations, no extra text\n"
+        "2. Ensure all required fields are present\n"
+        "3. Use proper JSON syntax with double quotes for strings\n"
+        "4. Do not wrap the JSON in code blocks or backticks"
+    )
 class LLMNode(BaseTool):
     """Enhanced LLM node with chat history and tool binding support"""
@@ -23,13 +67,250 @@ class LLMNode(BaseTool):
     client: Any = Field(default=None, description='LLM client instance')
     return_type: str = Field(default="str", description='Return type')
     response_key: str = Field(default="messages", description='Response key')
-    structured_output_dict: Optional[dict[str, str]] = Field(default=None, description='Structured output dictionary')
+    structured_output_dict: Optional[Dict[str, Any]] = Field(default=None, description='Structured output dictionary')
     output_variables: Optional[List[str]] = Field(default=None, description='Output variables')
     input_mapping: Optional[dict[str, dict]] = Field(default=None, description='Input mapping')
     input_variables: Optional[List[str]] = Field(default=None, description='Input variables')
     structured_output: Optional[bool] = Field(default=False, description='Whether to use structured output')
     available_tools: Optional[List[BaseTool]] = Field(default=None, description='Available tools for binding')
     tool_names: Optional[List[str]] = Field(default=None, description='Specific tool names to filter')
+    steps_limit: Optional[int] = Field(default=25, description='Maximum steps for tool execution')
+    tool_execution_timeout: Optional[int] = Field(default=900, description='Timeout (seconds) for tool execution. Default is 15 minutes.')
+    def _prepare_structured_output_params(self) -> dict:
+        """
+        Prepare structured output parameters from structured_output_dict.
+        Expected self.structured_output_dict formats:
+          - {"field": "str"} / {"field": "list"} / {"field": "list[dict]"} / {"field": "any"} ...
+          - OR {"field": {"type": "...", "description": "...", "default": ...}}  (optional)
+        Returns:
+            Dict[str, Dict] suitable for create_pydantic_model(...)
+        """
+        struct_params: dict[str, dict] = {}
+        for key, value in (self.structured_output_dict or {}).items():
+            # Allow either a plain type string or a dict with details
+            if isinstance(value, dict):
+                type_str = str(value.get("type") or "any")
+                desc = value.get("description", "") or ""
+                entry: dict = {"type": type_str, "description": desc}
+                if "default" in value:
+                    entry["default"] = value["default"]
+            else:
+                # Ensure we always have a string type
+                if isinstance(value, str):
+                    type_str = value
+                else:
+                    # If it's already a type object, convert to string representation
+                    type_str = getattr(value, '__name__', 'any')
+                entry = {"type": type_str, "description": ""}
+            struct_params[key] = entry
+        # Add default output field for proper response to user
+        struct_params[ELITEA_RS] = {
+            "description": "final output to user (summarized output from LLM)",
+            "type": "str",
+            "default": None,
+        }
+        return struct_params
+    def _invoke_with_structured_output(self, llm_client: Any, messages: List, struct_model: Any, config: RunnableConfig):
+        """
+        Invoke LLM with structured output, handling tool calls if present.
+        Args:
+            llm_client: LLM client instance
+            messages: List of conversation messages
+            struct_model: Pydantic model for structured output
+            config: Runnable configuration
+        Returns:
+            Tuple of (completion, initial_completion, final_messages)
+        """
+        initial_completion = llm_client.invoke(messages, config=config)
+        if hasattr(initial_completion, 'tool_calls') and initial_completion.tool_calls:
+            # Handle tool calls first, then apply structured output
+            new_messages, _ = self._run_async_in_sync_context(
+                self.__perform_tool_calling(initial_completion, messages, llm_client, config)
+            )
+            llm = self.__get_struct_output_model(llm_client, struct_model)
+            completion = llm.invoke(new_messages, config=config)
+            return completion, initial_completion, new_messages
+        else:
+            # Direct structured output without tool calls
+            llm = self.__get_struct_output_model(llm_client, struct_model)
+            completion = llm.invoke(messages, config=config)
+            return completion, initial_completion, messages
+    def _build_json_instruction(self, struct_model: Any) -> str:
+        """
+        Build JSON instruction message for fallback handling.
+        Args:
+            struct_model: Pydantic model with field definitions
+        Returns:
+            Formatted JSON instruction string
+        """
+        field_descriptions = []
+        for name, field in struct_model.model_fields.items():
+            field_type = field.annotation.__name__ if hasattr(field.annotation, '__name__') else str(field.annotation)
+            field_desc = field.description or field_type
+            field_descriptions.append(f"  - {name} ({field_type}): {field_desc}")
+        example_fields = ",\n".join([
+            f'  "{k}": <{field.annotation.__name__ if hasattr(field.annotation, "__name__") else "value"}>'
+            for k, field in struct_model.model_fields.items()
+        ])
+        return JSON_INSTRUCTION_TEMPLATE.format(
+            field_descriptions="\n".join(field_descriptions),
+            example_fields=example_fields
+        )
+    def _create_fallback_completion(self, content: str, struct_model: Any) -> Any:
+        """
+        Create a fallback completion object when JSON parsing fails.
+        Args:
+            content: Plain text content from LLM
+            struct_model: Pydantic model to construct
+        Returns:
+            Pydantic model instance with fallback values
+        """
+        result_dict = {}
+        for k, field in struct_model.model_fields.items():
+            if k == ELITEA_RS:
+                result_dict[k] = content
+            elif field.is_required():
+                # Set default values for required fields based on type
+                result_dict[k] = field.default if field.default is not None else None
+            else:
+                result_dict[k] = field.default
+        return struct_model.model_construct(**result_dict)
+    def _handle_structured_output_fallback(self, llm_client: Any, messages: List, struct_model: Any,
+                                          config: RunnableConfig, original_error: Exception) -> Any:
+        """
+        Handle structured output fallback through multiple strategies.
+        Tries fallback methods in order:
+        1. json_mode with explicit instructions
+        2. function_calling method
+        3. Plain text with JSON extraction
+        Args:
+            llm_client: LLM client instance
+            messages: Original conversation messages
+            struct_model: Pydantic model for structured output
+            config: Runnable configuration
+            original_error: The original ValueError that triggered fallback
+        Returns:
+            Completion with structured output (best effort)
+        Raises:
+            Propagates exceptions from LLM invocation
+        """
+        logger.error(f"Error invoking structured output model: {format_exc()}")
+        logger.info("Attempting to fall back to json mode")
+        # Build JSON instruction once
+        json_instruction = self._build_json_instruction(struct_model)
+        # Add instruction to messages
+        modified_messages = messages.copy()
+        if modified_messages and isinstance(modified_messages[-1], HumanMessage):
+            modified_messages[-1] = HumanMessage(
+                content=modified_messages[-1].content + json_instruction
+            )
+        else:
+            modified_messages.append(HumanMessage(content=json_instruction))
+        # Try json_mode with explicit instructions
+        try:
+            completion = self.__get_struct_output_model(
+                llm_client, struct_model, method="json_mode"
+            ).invoke(modified_messages, config=config)
+            return completion
+        except Exception as json_mode_error:
+            logger.warning(f"json_mode also failed: {json_mode_error}")
+            logger.info("Falling back to function_calling method")
+            # Try function_calling as a third fallback
+            try:
+                completion = self.__get_struct_output_model(
+                    llm_client, struct_model, method="function_calling"
+                ).invoke(modified_messages, config=config)
+                return completion
+            except Exception as function_calling_error:
+                logger.error(f"function_calling also failed: {function_calling_error}")
+                logger.info("Final fallback: using plain LLM response")
+                # Last resort: get plain text response and wrap in structure
+                plain_completion = llm_client.invoke(modified_messages, config=config)
+                content = plain_completion.content.strip() if hasattr(plain_completion, 'content') else str(plain_completion)
+                # Try to extract JSON from the response
+                import json
+                import re
+                json_match = re.search(r'\{.*\}', content, re.DOTALL)
+                if json_match:
+                    try:
+                        parsed_json = json.loads(json_match.group(0))
+                        # Validate it has expected fields and wrap in pydantic model
+                        completion = struct_model(**parsed_json)
+                        return completion
+                    except (json.JSONDecodeError, Exception) as parse_error:
+                        logger.warning(f"Could not parse extracted JSON: {parse_error}")
+                        return self._create_fallback_completion(content, struct_model)
+                else:
+                    # No JSON found, create response with content in elitea_response
+                    return self._create_fallback_completion(content, struct_model)
+    def _format_structured_output_result(self, result: dict, messages: List, initial_completion: Any) -> dict:
+        """
+        Format structured output result with properly formatted messages.
+        Args:
+            result: Result dictionary from model_dump()
+            messages: Original conversation messages
+            initial_completion: Initial completion before tool calls
+        Returns:
+            Formatted result dictionary with messages
+        """
+        # Ensure messages are properly formatted
+        if result.get('messages') and isinstance(result['messages'], list):
+            result['messages'] = [{'role': 'assistant', 'content': '\n'.join(result['messages'])}]
+        else:
+            # Extract content from initial_completion, handling thinking blocks
+            fallback_content = result.get(ELITEA_RS, '')
+            if not fallback_content and initial_completion:
+                content_parts = self._extract_content_from_completion(initial_completion)
+                fallback_content = content_parts.get('text') or ''
+                thinking = content_parts.get('thinking')
+                # Log thinking if present
+                if thinking:
+                    logger.debug(f"Thinking content present in structured output: {thinking[:100]}...")
+                if not fallback_content:
+                    # Final fallback to raw content
+                    content = initial_completion.content
+                    fallback_content = content if isinstance(content, str) else str(content)
+            result['messages'] = messages + [AIMessage(content=fallback_content)]
+        return result
     def get_filtered_tools(self) -> List[BaseTool]:
         """
@@ -58,6 +339,47 @@ class LLMNode(BaseTool):
         return filtered_tools
+    def _get_tool_truncation_suggestions(self, tool_name: Optional[str]) -> str:
+        """
+        Get context-specific suggestions for how to reduce output from a tool.
+        First checks if the tool itself provides truncation suggestions via
+        `truncation_suggestions` attribute or `get_truncation_suggestions()` method.
+        Falls back to generic suggestions if the tool doesn't provide any.
+        Args:
+            tool_name: Name of the tool that caused the context overflow
+        Returns:
+            Formatted string with numbered suggestions for the specific tool
+        """
+        suggestions = None
+        # Try to get suggestions from the tool itself
+        if tool_name:
+            filtered_tools = self.get_filtered_tools()
+            for tool in filtered_tools:
+                if tool.name == tool_name:
+                    # Check for truncation_suggestions attribute
+                    if hasattr(tool, 'truncation_suggestions') and tool.truncation_suggestions:
+                        suggestions = tool.truncation_suggestions
+                        break
+                    # Check for get_truncation_suggestions method
+                    elif hasattr(tool, 'get_truncation_suggestions') and callable(tool.get_truncation_suggestions):
+                        suggestions = tool.get_truncation_suggestions()
+                        break
+        # Fall back to generic suggestions if tool doesn't provide any
+        if not suggestions:
+            suggestions = [
+                "Check if the tool has parameters to limit output size (e.g., max_items, max_results, max_depth)",
+                "Target a more specific path or query instead of broad searches",
+                "Break the operation into smaller, focused requests",
+            ]
+        # Format as numbered list
+        return "\n".join(f"{i+1}. {s}" for i, s in enumerate(suggestions))
     def invoke(
             self,
             state: Union[str, dict],
@@ -84,12 +406,15 @@ class LLMNode(BaseTool):
         # or standalone LLM node for chat (with messages only)
         if 'system' in func_args.keys():
             # Flow for LLM node with prompt/task from pipeline
-            if not func_args.get('system') or not func_args.get('task'):
+            if func_args.get('system') is None or func_args.get('task') is None:
                 raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
                                     f"Actual params: {func_args}")
             # cast to str in case user passes variable different from str
-            messages = [SystemMessage(content=str(func_args.get('system'))), HumanMessage(content=str(func_args.get('task')))]
-            messages.extend(func_args.get('chat_history', []))
+            messages = [SystemMessage(content=str(func_args.get('system'))), *func_args.get('chat_history', []), HumanMessage(content=str(func_args.get('task')))]
+            # Remove pre-last item if last two messages are same type and content
+            if len(messages) >= 2 and type(messages[-1]) == type(messages[-2]) and messages[-1].content == messages[
+                -2].content:
+                messages.pop(-2)
         else:
             # Flow for chat-based LLM node w/o prompt/task from pipeline but with messages in state
             # verify messages structure
@@ -115,21 +440,23 @@ class LLMNode(BaseTool):
         try:
             if self.structured_output and self.output_variables:
                 # Handle structured output
-                struct_params = {
-                    key: {
-                        "type": 'list[str]' if 'list' in value else value,
-                        "description": ""
-                    }
-                    for key, value in (self.structured_output_dict or {}).items()
-                }
+                struct_params = self._prepare_structured_output_params()
                 struct_model = create_pydantic_model(f"LLMOutput", struct_params)
-                llm = llm_client.with_structured_output(struct_model)
-                completion = llm.invoke(messages, config=config)
-                result = completion.model_dump()
-                # Ensure messages are properly formatted
-                if result.get('messages') and isinstance(result['messages'], list):
-                    result['messages'] = [{'role': 'assistant', 'content': '\n'.join(result['messages'])}]
+                try:
+                    completion, initial_completion, final_messages = self._invoke_with_structured_output(
+                        llm_client, messages, struct_model, config
+                    )
+                except ValueError as e:
+                    # Handle fallback for structured output failures
+                    completion = self._handle_structured_output_fallback(
+                        llm_client, messages, struct_model, config, e
+                    )
+                    initial_completion = None
+                    final_messages = messages
+                result = completion.model_dump()
+                result = self._format_structured_output_result(result, final_messages, initial_completion or completion)
                 return result
             else:
@@ -139,138 +466,686 @@ class LLMNode(BaseTool):
                 # Handle both tool-calling and regular responses
                 if hasattr(completion, 'tool_calls') and completion.tool_calls:
                     # Handle iterative tool-calling and execution
-                    new_messages = messages + [completion]
-                    max_iterations = 15
-                    iteration = 0
-                    # Continue executing tools until no more tool calls or max iterations reached
-                    current_completion = completion
-                    while (hasattr(current_completion, 'tool_calls') and
-                           current_completion.tool_calls and
-                           iteration < max_iterations):
-                        iteration += 1
-                        logger.info(f"Tool execution iteration {iteration}/{max_iterations}")
-                        # Execute each tool call in the current completion
-                        tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
-                                                                              '__iter__') else []
-                        for tool_call in tool_calls:
-                            tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
-                                                                                                              'name',
-                                                                                                              '')
-                            tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
-                                                                                                              'args',
-                                                                                                              {})
-                            tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
-                                tool_call, 'id', '')
-                            # Find the tool in filtered tools
-                            filtered_tools = self.get_filtered_tools()
-                            tool_to_execute = None
-                            for tool in filtered_tools:
-                                if tool.name == tool_name:
-                                    tool_to_execute = tool
-                                    break
+                    new_messages, current_completion = self._run_async_in_sync_context(
+                        self.__perform_tool_calling(completion, messages, llm_client, config)
+                    )
-                            if tool_to_execute:
+                    output_msgs = {"messages": new_messages}
+                    if self.output_variables:
+                        if self.output_variables[0] == 'messages':
+                            return output_msgs
+                        # Extract content properly from thinking-enabled responses
+                        if current_completion:
+                            content_parts = self._extract_content_from_completion(current_completion)
+                            text_content = content_parts.get('text')
+                            thinking = content_parts.get('thinking')
+                            # Dispatch thinking event if present
+                            if thinking:
                                 try:
-                                    logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
-                                    tool_result = tool_to_execute.invoke(tool_args)
-                                    # Create tool message with result - preserve structured content
-                                    from langchain_core.messages import ToolMessage
-                                    # Check if tool_result is structured content (list of dicts)
-                                    # TODO: need solid check for being compatible with ToolMessage content format
-                                    if isinstance(tool_result, list) and all(
-                                        isinstance(item, dict) and 'type' in item for item in tool_result
-                                    ):
-                                        # Use structured content directly for multimodal support
-                                        tool_message = ToolMessage(
-                                            content=tool_result,
-                                            tool_call_id=tool_call_id
-                                        )
-                                    else:
-                                        # Fallback to string conversion for other tool results
-                                        tool_message = ToolMessage(
-                                            content=str(tool_result),
-                                            tool_call_id=tool_call_id
-                                        )
-                                    new_messages.append(tool_message)
-                                except Exception as e:
-                                    logger.error(f"Error executing tool '{tool_name}': {e}")
-                                    # Create error tool message
-                                    from langchain_core.messages import ToolMessage
-                                    tool_message = ToolMessage(
-                                        content=f"Error executing {tool_name}: {str(e)}",
-                                        tool_call_id=tool_call_id
+                                    model_name = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', 'LLM')
+                                    dispatch_custom_event(
+                                        name="thinking_step",
+                                        data={
+                                            "message": thinking,
+                                            "tool_name": f"LLM ({model_name})",
+                                            "toolkit": "reasoning",
+                                        },
+                                        config=config,
                                     )
-                                    new_messages.append(tool_message)
+                                except Exception as e:
+                                    logger.warning(f"Failed to dispatch thinking event: {e}")
+                            if text_content:
+                                output_msgs[self.output_variables[0]] = text_content
                             else:
-                                logger.warning(f"Tool '{tool_name}' not found in available tools")
-                                # Create error tool message for missing tool
-                                from langchain_core.messages import ToolMessage
-                                tool_message = ToolMessage(
-                                    content=f"Tool '{tool_name}' not available",
-                                    tool_call_id=tool_call_id
-                                )
-                                new_messages.append(tool_message)
-                        # Call LLM again with tool results to get next response
-                        try:
-                            current_completion = llm_client.invoke(new_messages, config=config)
-                            new_messages.append(current_completion)
+                                # Fallback to raw content
+                                content = current_completion.content
+                                output_msgs[self.output_variables[0]] = content if isinstance(content, str) else str(content)
+                        else:
+                            output_msgs[self.output_variables[0]] = None
-                            # Check if we still have tool calls
-                            if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
-                                logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
-                            else:
-                                logger.info("LLM completed without requesting more tools")
-                                break
-                        except Exception as e:
-                            logger.error(f"Error in LLM call during iteration {iteration}: {e}")
-                            # Add error message and break the loop
-                            error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
-                            new_messages.append(AIMessage(content=error_msg))
-                            break
-                    # Log completion status
-                    if iteration >= max_iterations:
-                        logger.warning(f"Reached maximum iterations ({max_iterations}) for tool execution")
-                        # Add a warning message to the chat
-                        warning_msg = f"Maximum tool execution iterations ({max_iterations}) reached. Stopping tool execution."
-                        new_messages.append(AIMessage(content=warning_msg))
-                    else:
-                        logger.info(f"Tool execution completed after {iteration} iterations")
-                    return {"messages": new_messages}
+                    return output_msgs
                 else:
-                    # Regular text response
-                    content = completion.content.strip() if hasattr(completion, 'content') else str(completion)
+                    # Regular text response - handle both simple strings and thinking-enabled responses
+                    content_parts = self._extract_content_from_completion(completion)
+                    thinking = content_parts.get('thinking')
+                    text_content = content_parts.get('text') or ''
+                    # Fallback to string representation if no content extracted
+                    if not text_content:
+                        if hasattr(completion, 'content'):
+                            content = completion.content
+                            text_content = content.strip() if isinstance(content, str) else str(content)
+                        else:
+                            text_content = str(completion)
+                    # Dispatch thinking step event to chat if present
+                    if thinking:
+                        logger.info(f"Model thinking: {thinking[:200]}..." if len(thinking) > 200 else f"Model thinking: {thinking}")
+                        # Dispatch custom event for thinking step to be displayed in chat
+                        try:
+                            model_name = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', 'LLM')
+                            dispatch_custom_event(
+                                name="thinking_step",
+                                data={
+                                    "message": thinking,
+                                    "tool_name": f"LLM ({model_name})",
+                                    "toolkit": "reasoning",
+                                },
+                                config=config,
+                            )
+                        except Exception as e:
+                            logger.warning(f"Failed to dispatch thinking event: {e}")
+                    # Build the AI message with both thinking and text
+                    # Store thinking in additional_kwargs for potential future use
+                    ai_message_kwargs = {'content': text_content}
+                    if thinking:
+                        ai_message_kwargs['additional_kwargs'] = {'thinking': thinking}
+                    ai_message = AIMessage(**ai_message_kwargs)
                     # Try to extract JSON if output variables are specified (but exclude 'messages' which is handled separately)
                     json_output_vars = [var for var in (self.output_variables or []) if var != 'messages']
                     if json_output_vars:
                         # set response to be the first output variable for non-structured output
-                        response_data = {json_output_vars[0]: content}
-                        new_messages = messages + [AIMessage(content=content)]
+                        response_data = {json_output_vars[0]: text_content}
+                        new_messages = messages + [ai_message]
                         response_data['messages'] = new_messages
                         return response_data
                     # Simple text response (either no output variables or JSON parsing failed)
-                    new_messages = messages + [AIMessage(content=content)]
+                    new_messages = messages + [ai_message]
                     return {"messages": new_messages}
         except Exception as e:
+            # Enhanced error logging with model diagnostics
+            model_info = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', 'unknown')
             logger.error(f"Error in LLM Node: {format_exc()}")
+            logger.error(f"Model being used: {model_info}")
+            logger.error(f"Error type: {type(e).__name__}")
             error_msg = f"Error: {e}"
             new_messages = messages + [AIMessage(content=error_msg)]
             return {"messages": new_messages}
     def _run(self, *args, **kwargs):
         # Legacy support for old interface
-        return self.invoke(kwargs, **kwargs)
+        return self.invoke(kwargs, **kwargs)
+    @staticmethod
+    def _extract_content_from_completion(completion) -> dict:
+        """Extract thinking and text content from LLM completion.
+        Handles Anthropic's extended thinking format where content is a list
+        of blocks with types: 'thinking' and 'text'.
+        Args:
+            completion: LLM completion object with content attribute
+        Returns:
+            dict with 'thinking' and 'text' keys
+        """
+        result = {'thinking': None, 'text': None}
+        if not hasattr(completion, 'content'):
+            return result
+        content = completion.content
+        # Handle list of content blocks (Anthropic extended thinking format)
+        if isinstance(content, list):
+            thinking_blocks = []
+            text_blocks = []
+            for block in content:
+                if isinstance(block, dict):
+                    block_type = block.get('type', '')
+                    if block_type == 'thinking':
+                        thinking_blocks.append(block.get('thinking', ''))
+                    elif block_type == 'text':
+                        text_blocks.append(block.get('text', ''))
+                elif hasattr(block, 'type'):
+                    # Handle object format
+                    if block.type == 'thinking':
+                        thinking_blocks.append(getattr(block, 'thinking', ''))
+                    elif block.type == 'text':
+                        text_blocks.append(getattr(block, 'text', ''))
+            if thinking_blocks:
+                result['thinking'] = '\n\n'.join(thinking_blocks)
+            if text_blocks:
+                result['text'] = '\n\n'.join(text_blocks)
+        # Handle simple string content
+        elif isinstance(content, str):
+            result['text'] = content
+        return result
+    def _run_async_in_sync_context(self, coro):
+        """Run async coroutine from sync context.
+        For MCP tools with persistent sessions, we reuse the same event loop
+        that was used to create the MCP client and sessions (set by CLI).
+        When called from within a running event loop (e.g., nested LLM nodes),
+        we need to handle this carefully to avoid "event loop already running" errors.
+        This method handles three scenarios:
+        1. Called from async context (event loop running) - creates new thread with new loop
+        2. Called from sync context with persistent loop - reuses persistent loop
+        3. Called from sync context without loop - creates new persistent loop
+        """
+        import threading
+        # Check if there's a running loop
+        try:
+            running_loop = asyncio.get_running_loop()
+            loop_is_running = True
+            logger.debug(f"Detected running event loop (id: {id(running_loop)}), executing tool calls in separate thread")
+        except RuntimeError:
+            loop_is_running = False
+        # Scenario 1: Loop is currently running - MUST use thread
+        if loop_is_running:
+            result_container = []
+            exception_container = []
+            # Try to capture Streamlit context from current thread for propagation
+            streamlit_ctx = None
+            try:
+                from streamlit.runtime.scriptrunner import get_script_run_ctx, add_script_run_ctx
+                streamlit_ctx = get_script_run_ctx()
+                if streamlit_ctx:
+                    logger.debug("Captured Streamlit context for propagation to worker thread")
+            except (ImportError, Exception) as e:
+                logger.debug(f"Streamlit context not available or failed to capture: {e}")
+            def run_in_thread():
+                """Run coroutine in a new thread with its own event loop."""
+                new_loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(new_loop)
+                try:
+                    result = new_loop.run_until_complete(coro)
+                    result_container.append(result)
+                except Exception as e:
+                    logger.debug(f"Exception in async thread: {e}")
+                    exception_container.append(e)
+                finally:
+                    new_loop.close()
+                    asyncio.set_event_loop(None)
+            thread = threading.Thread(target=run_in_thread, daemon=False)
+            # Propagate Streamlit context to the worker thread if available
+            if streamlit_ctx is not None:
+                try:
+                    add_script_run_ctx(thread, streamlit_ctx)
+                    logger.debug("Successfully propagated Streamlit context to worker thread")
+                except Exception as e:
+                    logger.warning(f"Failed to propagate Streamlit context to worker thread: {e}")
+            thread.start()
+            thread.join(timeout=self.tool_execution_timeout)  # 15 minute timeout for safety
+            if thread.is_alive():
+                logger.error("Async operation timed out after 5 minutes")
+                raise TimeoutError("Async operation in thread timed out")
+            # Re-raise exception if one occurred
+            if exception_container:
+                raise exception_container[0]
+            return result_container[0] if result_container else None
+        # Scenario 2 & 3: No loop running - use or create persistent loop
+        else:
+            # Get or create persistent loop
+            if not hasattr(self.__class__, '_persistent_loop') or \
+               self.__class__._persistent_loop is None or \
+               self.__class__._persistent_loop.is_closed():
+                self.__class__._persistent_loop = asyncio.new_event_loop()
+                logger.debug("Created persistent event loop for async tools")
+            loop = self.__class__._persistent_loop
+            # Double-check the loop is not running (safety check)
+            if loop.is_running():
+                logger.debug("Persistent loop is unexpectedly running, using thread execution")
+                result_container = []
+                exception_container = []
+                # Try to capture Streamlit context from current thread for propagation
+                streamlit_ctx = None
+                try:
+                    from streamlit.runtime.scriptrunner import get_script_run_ctx, add_script_run_ctx
+                    streamlit_ctx = get_script_run_ctx()
+                    if streamlit_ctx:
+                        logger.debug("Captured Streamlit context for propagation to worker thread")
+                except (ImportError, Exception) as e:
+                    logger.debug(f"Streamlit context not available or failed to capture: {e}")
+                def run_in_thread():
+                    """Run coroutine in a new thread with its own event loop."""
+                    new_loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(new_loop)
+                    try:
+                        result = new_loop.run_until_complete(coro)
+                        result_container.append(result)
+                    except Exception as ex:
+                        logger.debug(f"Exception in async thread: {ex}")
+                        exception_container.append(ex)
+                    finally:
+                        new_loop.close()
+                        asyncio.set_event_loop(None)
+                thread = threading.Thread(target=run_in_thread, daemon=False)
+                # Propagate Streamlit context to the worker thread if available
+                if streamlit_ctx is not None:
+                    try:
+                        add_script_run_ctx(thread, streamlit_ctx)
+                        logger.debug("Successfully propagated Streamlit context to worker thread")
+                    except Exception as e:
+                        logger.warning(f"Failed to propagate Streamlit context to worker thread: {e}")
+                thread.start()
+                thread.join(timeout=self.tool_execution_timeout)
+                if thread.is_alive():
+                    logger.error("Async operation timed out after 15 minutes")
+                    raise TimeoutError("Async operation in thread timed out")
+                if exception_container:
+                    raise exception_container[0]
+                return result_container[0] if result_container else None
+            else:
+                # Loop exists but not running - safe to use run_until_complete
+                logger.debug(f"Using persistent loop (id: {id(loop)}) with run_until_complete")
+                asyncio.set_event_loop(loop)
+                return loop.run_until_complete(coro)
+    async def _arun(self, *args, **kwargs):
+        # Legacy async support
+        return self.invoke(kwargs, **kwargs)
+    async def __perform_tool_calling(self, completion, messages, llm_client, config):
+        # Handle iterative tool-calling and execution
+        logger.info(f"__perform_tool_calling called with {len(completion.tool_calls) if hasattr(completion, 'tool_calls') else 0} tool calls")
+        # Check if this is a thinking model - they require special message handling
+        # model_name = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', '')
+        # if _is_thinking_model(llm_client):
+        #     logger.warning(
+        #         f"⚠️ THINKING/REASONING MODEL DETECTED: '{model_name}'\n"
+        #         f"Tool execution with thinking models may fail due to message format requirements.\n"
+        #         f"Thinking models require 'thinking_blocks' to be preserved between turns, which this "
+        #         f"framework cannot do.\n"
+        #         f"Recommendation: Use standard model variants (e.g., claude-3-5-sonnet-20241022-v2:0) "
+        #         f"instead of thinking/reasoning variants for tool calling.\n"
+        #         f"See: https://docs.litellm.ai/docs/reasoning_content"
+        #     )
+        new_messages = messages + [completion]
+        iteration = 0
+        # Continue executing tools until no more tool calls or max iterations reached
+        current_completion = completion
+        while (hasattr(current_completion, 'tool_calls') and
+               current_completion.tool_calls and
+               iteration < self.steps_limit):
+            iteration += 1
+            logger.info(f"Tool execution iteration {iteration}/{self.steps_limit}")
+            # Execute each tool call in the current completion
+            tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
+                                                                  '__iter__') else []
+            for tool_call in tool_calls:
+                tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
+                                                                                                  'name',
+                                                                                                  '')
+                tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
+                                                                                                  'args',
+                                                                                                  {})
+                tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
+                    tool_call, 'id', '')
+                # Find the tool in filtered tools
+                filtered_tools = self.get_filtered_tools()
+                tool_to_execute = None
+                for tool in filtered_tools:
+                    if tool.name == tool_name:
+                        tool_to_execute = tool
+                        break
+                if tool_to_execute:
+                    try:
+                        logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
+                        # Try async invoke first (for MCP tools), fallback to sync
+                        tool_result = None
+                        if hasattr(tool_to_execute, 'ainvoke'):
+                            try:
+                                tool_result = await tool_to_execute.ainvoke(tool_args, config=config)
+                            except (NotImplementedError, AttributeError):
+                                logger.debug(f"Tool '{tool_name}' ainvoke failed, falling back to sync invoke")
+                                tool_result = tool_to_execute.invoke(tool_args, config=config)
+                        else:
+                            # Sync-only tool
+                            tool_result = tool_to_execute.invoke(tool_args, config=config)
+                        # Create tool message with result - preserve structured content
+                        from langchain_core.messages import ToolMessage
+                        # Check if tool_result is structured content (list of dicts)
+                        # TODO: need solid check for being compatible with ToolMessage content format
+                        if isinstance(tool_result, list) and all(
+                                isinstance(item, dict) and 'type' in item for item in tool_result
+                        ):
+                            # Use structured content directly for multimodal support
+                            tool_message = ToolMessage(
+                                content=tool_result,
+                                tool_call_id=tool_call_id
+                            )
+                        else:
+                            # Fallback to string conversion for other tool results
+                            tool_message = ToolMessage(
+                                content=str(tool_result),
+                                tool_call_id=tool_call_id
+                            )
+                        new_messages.append(tool_message)
+                    except Exception as e:
+                        import traceback
+                        error_details = traceback.format_exc()
+                        # Use debug level to avoid duplicate output when CLI callbacks are active
+                        logger.debug(f"Error executing tool '{tool_name}': {e}\n{error_details}")
+                        # Create error tool message
+                        from langchain_core.messages import ToolMessage
+                        tool_message = ToolMessage(
+                            content=f"Error executing {tool_name}: {str(e)}",
+                            tool_call_id=tool_call_id
+                        )
+                        new_messages.append(tool_message)
+                else:
+                    logger.warning(f"Tool '{tool_name}' not found in available tools")
+                    # Create error tool message for missing tool
+                    from langchain_core.messages import ToolMessage
+                    tool_message = ToolMessage(
+                        content=f"Tool '{tool_name}' not available",
+                        tool_call_id=tool_call_id
+                    )
+                    new_messages.append(tool_message)
+            # Call LLM again with tool results to get next response
+            try:
+                current_completion = llm_client.invoke(new_messages, config=config)
+                new_messages.append(current_completion)
+                # Check if we still have tool calls
+                if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
+                    logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
+                else:
+                    logger.info("LLM completed without requesting more tools")
+                    break
+            except Exception as e:
+                error_str = str(e).lower()
+                # Check for thinking model message format errors
+                is_thinking_format_error = any(indicator in error_str for indicator in [
+                    'expected `thinking`',
+                    'expected `redacted_thinking`',
+                    'thinking block',
+                    'must start with a thinking block',
+                    'when `thinking` is enabled'
+                ])
+                # Check for non-recoverable errors that should fail immediately
+                # These indicate configuration or permission issues, not content size issues
+                is_non_recoverable = any(indicator in error_str for indicator in [
+                    'model identifier is invalid',
+                    'authentication',
+                    'unauthorized',
+                    'access denied',
+                    'permission denied',
+                    'invalid credentials',
+                    'api key',
+                    'quota exceeded',
+                    'rate limit'
+                ])
+                # Check for context window / token limit errors
+                is_context_error = any(indicator in error_str for indicator in [
+                    'context window', 'context_window', 'token limit', 'too long',
+                    'maximum context length', 'input is too long', 'exceeds the limit',
+                    'contextwindowexceedederror', 'max_tokens', 'content too large'
+                ])
+                # Check for Bedrock/Claude output limit errors (recoverable by truncation)
+                is_output_limit_error = any(indicator in error_str for indicator in [
+                    'output token',
+                    'response too large',
+                    'max_tokens_to_sample',
+                    'output_token_limit',
+                    'output exceeds'
+                ])
+                # Handle thinking model format errors
+                if is_thinking_format_error:
+                    model_info = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', 'unknown')
+                    logger.error(f"Thinking model message format error during tool execution iteration {iteration}")
+                    logger.error(f"Model: {model_info}")
+                    logger.error(f"Error details: {e}")
+                    error_msg = (
+                        f"⚠️ THINKING MODEL FORMAT ERROR\n\n"
+                        f"The model '{model_info}' uses extended thinking and requires specific message formatting.\n\n"
+                        f"**Issue**: When 'thinking' is enabled, assistant messages must start with thinking blocks "
+                        f"before any tool_use blocks. This framework cannot preserve thinking_blocks during iterative "
+                        f"tool execution.\n\n"
+                        f"**Root Cause**: Anthropic's Messages API is stateless - clients must manually preserve and "
+                        f"resend thinking_blocks with every tool response. LangChain's message abstraction doesn't "
+                        f"include thinking_blocks, so they are lost between turns.\n\n"
+                        f"**Solutions**:\n"
+                        f"1. **Recommended**: Use non-thinking model variants:\n"
+                        f"   - claude-3-5-sonnet-20241022-v2:0 (instead of thinking variants)\n"
+                        f"   - anthropic.claude-3-5-sonnet-20241022-v2:0 (Bedrock)\n"
+                        f"2. Disable extended thinking: Set reasoning_effort=None or remove thinking config\n"
+                        f"3. Use LiteLLM directly with modify_params=True (handles thinking_blocks automatically)\n"
+                        f"4. Avoid tool calling with thinking models (use for reasoning tasks only)\n\n"
+                        f"**Technical Context**: {str(e)}\n\n"
+                        f"References:\n"
+                        f"- https://docs.claude.com/en/docs/build-with-claude/extended-thinking\n"
+                        f"- https://docs.litellm.ai/docs/reasoning_content (See 'Tool Calling with thinking' section)"
+                    )
+                    new_messages.append(AIMessage(content=error_msg))
+                    raise ValueError(error_msg)
+                # Handle non-recoverable errors immediately
+                if is_non_recoverable:
+                    # Enhanced error logging with model information for better diagnostics
+                    model_info = getattr(llm_client, 'model_name', None) or getattr(llm_client, 'model', 'unknown')
+                    logger.error(f"Non-recoverable error during tool execution iteration {iteration}")
+                    logger.error(f"Model: {model_info}")
+                    logger.error(f"Error details: {e}")
+                    logger.error(f"Error type: {type(e).__name__}")
+                    # Provide detailed error message for debugging
+                    error_details = []
+                    error_details.append(f"Model configuration error: {str(e)}")
+                    error_details.append(f"Model identifier: {model_info}")
+                    # Check for common Bedrock model ID issues
+                    if 'model identifier is invalid' in error_str:
+                        error_details.append("\nPossible causes:")
+                        error_details.append("1. Model not available in the configured AWS region")
+                        error_details.append("2. Model not enabled in your AWS Bedrock account")
+                        error_details.append("3. LiteLLM model group prefix not stripped (check for prefixes like '1_')")
+                        error_details.append("4. Incorrect model version or typo in model name")
+                        error_details.append("\nPlease verify:")
+                        error_details.append("- AWS Bedrock console shows this model as available")
+                        error_details.append("- LiteLLM router configuration is correct")
+                        error_details.append("- Model ID doesn't contain unexpected prefixes")
+                    error_msg = "\n".join(error_details)
+                    new_messages.append(AIMessage(content=error_msg))
+                    break
+                if is_context_error or is_output_limit_error:
+                    error_type = "output limit" if is_output_limit_error else "context window"
+                    logger.warning(f"{error_type.title()} exceeded during tool execution iteration {iteration}")
+                    # Find the last tool message and its associated tool name
+                    last_tool_msg_idx = None
+                    last_tool_name = None
+                    last_tool_call_id = None
+                    # First, find the last tool message
+                    for i in range(len(new_messages) - 1, -1, -1):
+                        msg = new_messages[i]
+                        if hasattr(msg, 'tool_call_id') or (hasattr(msg, 'type') and getattr(msg, 'type', None) == 'tool'):
+                            last_tool_msg_idx = i
+                            last_tool_call_id = getattr(msg, 'tool_call_id', None)
+                            break
+                    # Find the tool name from the AIMessage that requested this tool call
+                    if last_tool_call_id:
+                        for i in range(last_tool_msg_idx - 1, -1, -1):
+                            msg = new_messages[i]
+                            if hasattr(msg, 'tool_calls') and msg.tool_calls:
+                                for tc in msg.tool_calls:
+                                    tc_id = tc.get('id', '') if isinstance(tc, dict) else getattr(tc, 'id', '')
+                                    if tc_id == last_tool_call_id:
+                                        last_tool_name = tc.get('name', '') if isinstance(tc, dict) else getattr(tc, 'name', '')
+                                        break
+                                if last_tool_name:
+                                    break
+                    # Build dynamic suggestion based on the tool that caused the overflow
+                    tool_suggestions = self._get_tool_truncation_suggestions(last_tool_name)
+                    # Truncate the problematic tool result if found
+                    if last_tool_msg_idx is not None:
+                        from langchain_core.messages import ToolMessage
+                        original_msg = new_messages[last_tool_msg_idx]
+                        tool_call_id = getattr(original_msg, 'tool_call_id', 'unknown')
+                        # Build error-specific guidance
+                        if is_output_limit_error:
+                            truncated_content = (
+                                f"⚠️ MODEL OUTPUT LIMIT EXCEEDED\n\n"
+                                f"The tool '{last_tool_name or 'unknown'}' returned data, but the model's response was too large.\n\n"
+                                f"IMPORTANT: You must provide a SMALLER, more focused response.\n"
+                                f"- Break down your response into smaller chunks\n"
+                                f"- Summarize instead of listing everything\n"
+                                f"- Focus on the most relevant information first\n"
+                                f"- If listing items, show only top 5-10 most important\n\n"
+                                f"Tool-specific tips:\n{tool_suggestions}\n\n"
+                                f"Please retry with a more concise response."
+                            )
+                        else:
+                            truncated_content = (
+                                f"⚠️ TOOL OUTPUT TRUNCATED - Context window exceeded\n\n"
+                                f"The tool '{last_tool_name or 'unknown'}' returned too much data for the model's context window.\n\n"
+                                f"To fix this:\n{tool_suggestions}\n\n"
+                                f"Please retry with more restrictive parameters."
+                            )
+                        truncated_msg = ToolMessage(
+                            content=truncated_content,
+                            tool_call_id=tool_call_id
+                        )
+                        new_messages[last_tool_msg_idx] = truncated_msg
+                        logger.info(f"Truncated large tool result from '{last_tool_name}' and retrying LLM call")
+                        # CRITICAL FIX: Call LLM again with truncated message to get fresh completion
+                        # This prevents duplicate tool_call_ids that occur when we continue with
+                        # the same current_completion that still has the original tool_calls
+                        try:
+                            current_completion = llm_client.invoke(new_messages, config=config)
+                            new_messages.append(current_completion)
+                            # Continue to process any new tool calls in the fresh completion
+                            if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
+                                logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls after truncation")
+                                continue
+                            else:
+                                logger.info("LLM completed after truncation without requesting more tools")
+                                break
+                        except Exception as retry_error:
+                            logger.error(f"Error retrying LLM after truncation: {retry_error}")
+                            error_msg = f"Failed to retry after truncation: {str(retry_error)}"
+                            new_messages.append(AIMessage(content=error_msg))
+                            break
+                    else:
+                        # Couldn't find tool message, add error and break
+                        if is_output_limit_error:
+                            error_msg = (
+                                "Model output limit exceeded. Please provide a more concise response. "
+                                "Break down your answer into smaller parts and summarize where possible."
+                            )
+                        else:
+                            error_msg = (
+                                "Context window exceeded. The conversation or tool results are too large. "
+                                "Try using tools with smaller output limits (e.g., max_items, max_depth parameters)."
+                            )
+                        new_messages.append(AIMessage(content=error_msg))
+                        break
+                else:
+                    logger.error(f"Error in LLM call during iteration {iteration}: {e}")
+                    # Add error message and break the loop
+                    error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
+                    new_messages.append(AIMessage(content=error_msg))
+                    break
+        # Handle max iterations
+        if iteration >= self.steps_limit:
+            logger.warning(f"Reached maximum iterations ({self.steps_limit}) for tool execution")
+            # CRITICAL: Check if the last message is an AIMessage with pending tool_calls
+            # that were not processed. If so, we need to add placeholder ToolMessages to prevent
+            # the "assistant message with 'tool_calls' must be followed by tool messages" error
+            # when the conversation continues.
+            if new_messages:
+                last_msg = new_messages[-1]
+                if hasattr(last_msg, 'tool_calls') and last_msg.tool_calls:
+                    from langchain_core.messages import ToolMessage
+                    pending_tool_calls = last_msg.tool_calls if hasattr(last_msg.tool_calls, '__iter__') else []
+                    # Check which tool_call_ids already have responses
+                    existing_tool_call_ids = set()
+                    for msg in new_messages:
+                        if hasattr(msg, 'tool_call_id'):
+                            existing_tool_call_ids.add(msg.tool_call_id)
+                    # Add placeholder responses for any tool calls without responses
+                    for tool_call in pending_tool_calls:
+                        tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(tool_call, 'id', '')
+                        tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call, 'name', '')
+                        if tool_call_id and tool_call_id not in existing_tool_call_ids:
+                            logger.info(f"Adding placeholder ToolMessage for interrupted tool call: {tool_name} ({tool_call_id})")
+                            placeholder_msg = ToolMessage(
+                                content=f"[Tool execution interrupted - step limit ({self.steps_limit}) reached before {tool_name} could be executed]",
+                                tool_call_id=tool_call_id
+                            )
+                            new_messages.append(placeholder_msg)
+            # Add warning message - CLI or calling code can detect this and prompt user
+            warning_msg = f"Maximum tool execution iterations ({self.steps_limit}) reached. Stopping tool execution."
+            new_messages.append(AIMessage(content=warning_msg))
+        else:
+            logger.info(f"Tool execution completed after {iteration} iterations")
+        return new_messages, current_completion
+    def __get_struct_output_model(self, llm_client, pydantic_model, method: Literal["function_calling", "json_mode", "json_schema"] = "function_calling"):
+        return llm_client.with_structured_output(pydantic_model, method=method)

alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl

alita-sdk 0.3.379py3-none-any.whl → 0.3.627py3-none-any.whl