PyPI - aiecs - Versions diffs - 1.7.6__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

aiecs 1.7.6py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (35) hide show

aiecs/__init__.py +1 -1
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +5 -1
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +7 -5
aiecs/config/config.py +3 -0
aiecs/config/tool_config.py +55 -19
aiecs/domain/agent/base_agent.py +79 -0
aiecs/domain/agent/hybrid_agent.py +552 -175
aiecs/domain/agent/knowledge_aware_agent.py +3 -2
aiecs/domain/agent/llm_agent.py +2 -0
aiecs/domain/agent/models.py +10 -0
aiecs/domain/agent/tools/schema_generator.py +17 -4
aiecs/llm/callbacks/custom_callbacks.py +9 -4
aiecs/llm/client_factory.py +20 -7
aiecs/llm/clients/base_client.py +50 -5
aiecs/llm/clients/google_function_calling_mixin.py +46 -88
aiecs/llm/clients/googleai_client.py +183 -9
aiecs/llm/clients/openai_client.py +12 -0
aiecs/llm/clients/openai_compatible_mixin.py +42 -2
aiecs/llm/clients/openrouter_client.py +272 -0
aiecs/llm/clients/vertex_client.py +385 -22
aiecs/llm/clients/xai_client.py +41 -3
aiecs/llm/protocols.py +19 -1
aiecs/llm/utils/image_utils.py +179 -0
aiecs/main.py +2 -2
aiecs/tools/docs/document_creator_tool.py +143 -2
aiecs/tools/docs/document_parser_tool.py +9 -4
aiecs/tools/docs/document_writer_tool.py +179 -0
aiecs/tools/task_tools/image_tool.py +49 -14
aiecs/tools/task_tools/scraper_tool.py +39 -2
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/METADATA +4 -2
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/RECORD +35 -33
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/WHEEL +0 -0
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/entry_points.txt +0 -0
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.7.6.dist-info → aiecs-1.8.4.dist-info}/top_level.txt +0 -0

aiecs/llm/clients/vertex_client.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import asyncio
+import json
 import logging
 import os
 import warnings
-from typing import Dict, Any, Optional, List, AsyncGenerator
+import hashlib
+import base64
+from typing import Dict, Any, Optional, List, AsyncGenerator, Union
 import vertexai
 from vertexai.generative_models import (
     GenerativeModel,
@@ -14,6 +17,45 @@ from vertexai.generative_models import (
     Part,
 )
+from aiecs.llm.utils.image_utils import parse_image_source, ImageContent
+logger = logging.getLogger(__name__)
+# Try to import CachedContent for prompt caching support
+# CachedContent API requires google-cloud-aiplatform >= 1.38.0
+# Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/cached-content
+CACHED_CONTENT_AVAILABLE = False
+CACHED_CONTENT_IMPORT_PATH = None
+CACHED_CONTENT_SDK_VERSION = None
+# Check SDK version
+try:
+    import google.cloud.aiplatform as aiplatform
+    CACHED_CONTENT_SDK_VERSION = getattr(aiplatform, '__version__', None)
+except ImportError:
+    pass
+# Try to import CachedContent for prompt caching support
+try:
+    from vertexai.preview import caching
+    if hasattr(caching, 'CachedContent'):
+        CACHED_CONTENT_AVAILABLE = True
+        CACHED_CONTENT_IMPORT_PATH = 'vertexai.preview.caching'
+    else:
+        # Module exists but CachedContent class not found
+        CACHED_CONTENT_AVAILABLE = False
+except ImportError:
+    try:
+        # Alternative import path for different SDK versions
+        from vertexai import caching
+        if hasattr(caching, 'CachedContent'):
+            CACHED_CONTENT_AVAILABLE = True
+            CACHED_CONTENT_IMPORT_PATH = 'vertexai.caching'
+        else:
+            CACHED_CONTENT_AVAILABLE = False
+    except ImportError:
+        CACHED_CONTENT_AVAILABLE = False
 from aiecs.llm.clients.base_client import (
     BaseLLMClient,
     LLMMessage,
@@ -147,17 +189,20 @@ def _build_safety_block_error(
     error_parts = [default_message]
     if block_reason:
         error_parts.append(f"Block reason: {block_reason}")
-    blocked_categories = [
-        r.get("category", "UNKNOWN")
-        for r in safety_ratings
-        if r.get("blocked", False)
-    ]
+    # Safely extract blocked categories, handling potential non-dict elements
+    blocked_categories = []
+    for r in safety_ratings:
+        if isinstance(r, dict) and r.get("blocked", False):
+            blocked_categories.append(r.get("category", "UNKNOWN"))
     if blocked_categories:
         error_parts.append(f"Blocked categories: {', '.join(blocked_categories)}")
     # Add severity/probability information
     for rating in safety_ratings:
+        # Skip non-dict elements
+        if not isinstance(rating, dict):
+            continue
         if rating.get("blocked"):
             if "severity" in rating:
                 error_parts.append(
@@ -193,6 +238,8 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             "part_counts": {},  # {part_count: frequency}
             "last_part_count": None,
         }
+        # Cache for CachedContent objects (key: content hash, value: cached_content_id)
+        self._cached_content_cache: Dict[str, str] = {}
     def _init_vertex_ai(self):
         """Lazy initialization of Vertex AI with proper authentication"""
@@ -230,14 +277,140 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             except Exception as e:
                 raise ProviderNotAvailableError(f"Failed to initialize Vertex AI: {str(e)}")
+    def _generate_content_hash(self, content: str) -> str:
+        """Generate a hash for content to use as cache key."""
+        return hashlib.md5(content.encode('utf-8')).hexdigest()
+    async def _create_or_get_cached_content(
+        self,
+        content: str,
+        model_name: str,
+        ttl_seconds: Optional[int] = None,
+    ) -> Optional[str]:
+        """
+        Create or get a CachedContent for the given content.
+        This method implements Gemini's CachedContent API for prompt caching.
+        It preserves the existing cache_control mechanism for developer convenience.
+        The method supports multiple Vertex AI SDK versions and gracefully falls back
+        to regular system_instruction if CachedContent API is unavailable.
+        Args:
+            content: Content to cache (typically system instruction)
+            model_name: Model name to use for caching
+            ttl_seconds: Time to live in seconds (optional, defaults to 3600)
+        Returns:
+            CachedContent resource name (e.g., "projects/.../cachedContents/...") or None if caching unavailable
+        """
+        if not CACHED_CONTENT_AVAILABLE:
+            # Provide version info if available
+            version_info = ""
+            if CACHED_CONTENT_SDK_VERSION:
+                version_info = f" (SDK version: {CACHED_CONTENT_SDK_VERSION})"
+            elif CACHED_CONTENT_IMPORT_PATH:
+                version_info = f" (import path '{CACHED_CONTENT_IMPORT_PATH}' available but CachedContent class not found)"
+            self.logger.debug(
+                f"CachedContent API not available{version_info}, skipping cache creation. "
+                f"Requires google-cloud-aiplatform >=1.38.0"
+            )
+            return None
+        if not content or not content.strip():
+            return None
+        # Generate cache key
+        cache_key = self._generate_content_hash(content)
+        # Check if we already have this cached
+        if cache_key in self._cached_content_cache:
+            cached_content_id = self._cached_content_cache[cache_key]
+            self.logger.debug(f"Using existing CachedContent: {cached_content_id}")
+            return cached_content_id
+        try:
+            self._init_vertex_ai()
+            # Build the content to cache (system instruction as Content)
+            # For CachedContent, we typically cache the system instruction
+            cached_content_obj = Content(
+                role="user",
+                parts=[Part.from_text(content)]
+            )
+            # Try different API patterns based on SDK version
+            cached_content_id = None
+            # Pattern 1: caching.CachedContent.create() (most common)
+            if hasattr(caching, 'CachedContent'):
+                try:
+                    cached_content = await asyncio.get_event_loop().run_in_executor(
+                        None,
+                        lambda: caching.CachedContent.create(
+                            model=model_name,
+                            contents=[cached_content_obj],
+                            ttl_seconds=ttl_seconds or 3600,  # Default 1 hour
+                        )
+                    )
+                    # Extract the resource name
+                    if hasattr(cached_content, 'name'):
+                        cached_content_id = cached_content.name
+                    elif hasattr(cached_content, 'resource_name'):
+                        cached_content_id = cached_content.resource_name
+                    else:
+                        cached_content_id = str(cached_content)
+                    if cached_content_id:
+                        # Store in cache
+                        self._cached_content_cache[cache_key] = cached_content_id
+                        self.logger.info(f"Created CachedContent for prompt caching: {cached_content_id}")
+                        return cached_content_id
+                except AttributeError as e:
+                    self.logger.debug(f"CachedContent.create() signature may differ: {str(e)}")
+                except Exception as e:
+                    self.logger.debug(f"Failed to create CachedContent using pattern 1: {str(e)}")
+            # Pattern 2: Try alternative API patterns if Pattern 1 fails
+            # Note: Different SDK versions may have different APIs
+            # This is a fallback that allows graceful degradation
+            # Build informative warning message with version info
+            version_info = ""
+            if CACHED_CONTENT_SDK_VERSION:
+                version_info = f" Current SDK version: {CACHED_CONTENT_SDK_VERSION}."
+            else:
+                version_info = " Unable to detect SDK version."
+            required_version = ">=1.38.0"
+            upgrade_command = "pip install --upgrade 'google-cloud-aiplatform>=1.38.0'"
+            self.logger.warning(
+                f"CachedContent API not available or incompatible with current SDK version.{version_info} "
+                f"Falling back to system_instruction (prompt caching disabled). "
+                f"To enable prompt caching, upgrade to google-cloud-aiplatform {required_version} or later: "
+                f"{upgrade_command}"
+            )
+            return None
+        except Exception as e:
+            self.logger.warning(
+                f"Failed to create CachedContent (prompt caching disabled, using system_instruction): {str(e)}"
+            )
+            # Don't raise - allow fallback to regular generation without caching
+            return None
     def _convert_messages_to_contents(
         self, messages: List[LLMMessage]
     ) -> List[Content]:
         """
         Convert LLMMessage list to Vertex AI Content objects.
-        This properly handles multi-turn conversations instead of
-        string concatenation.
+        This properly handles multi-turn conversations including
+        function/tool responses for Vertex AI Function Calling.
         Args:
             messages: List of LLMMessage objects (system messages should be filtered out)
@@ -246,13 +419,118 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             List of Content objects for Vertex AI API
         """
         contents = []
         for msg in messages:
-            # Map role: Vertex AI uses "model" for assistant responses
-            role = "model" if msg.role == "assistant" else msg.role
-            contents.append(Content(
-                role=role,
-                parts=[Part.from_text(msg.content)]
-            ))
+            # Handle tool/function responses (role="tool")
+            if msg.role == "tool":
+                # Vertex AI expects function responses as user messages with FunctionResponse parts
+                # The tool_call_id maps to the function name
+                func_name = msg.tool_call_id or "unknown_function"
+                # Parse content as the function response
+                try:
+                    # Try to parse as JSON if it looks like JSON
+                    if msg.content and msg.content.strip().startswith('{'):
+                        response_data = json.loads(msg.content)
+                    else:
+                        response_data = {"result": msg.content}
+                except json.JSONDecodeError:
+                    response_data = {"result": msg.content}
+                # Create FunctionResponse part using Part.from_function_response
+                func_response_part = Part.from_function_response(
+                    name=func_name,
+                    response=response_data
+                )
+                contents.append(Content(
+                    role="user",  # Function responses are sent as "user" role in Vertex AI
+                    parts=[func_response_part]
+                ))
+            # Handle assistant messages with tool calls
+            elif msg.role == "assistant" and msg.tool_calls:
+                parts = []
+                if msg.content:
+                    parts.append(Part.from_text(msg.content))
+                # Add images if present
+                if msg.images:
+                    for image_source in msg.images:
+                        image_content = parse_image_source(image_source)
+                        if image_content.is_url():
+                            parts.append(Part.from_uri(
+                                uri=image_content.get_url(),
+                                mime_type=image_content.mime_type
+                            ))
+                        else:
+                            base64_data = image_content.get_base64_data()
+                            image_bytes = base64.b64decode(base64_data)
+                            parts.append(Part.from_bytes(
+                                data=image_bytes,
+                                mime_type=image_content.mime_type
+                            ))
+                for tool_call in msg.tool_calls:
+                    func = tool_call.get("function", {})
+                    func_name = func.get("name", "")
+                    func_args = func.get("arguments", "{}")
+                    # Parse arguments
+                    try:
+                        args_dict = json.loads(func_args) if isinstance(func_args, str) else func_args
+                    except json.JSONDecodeError:
+                        args_dict = {}
+                    # Create FunctionCall part using Part.from_dict
+                    # Note: Part.from_function_call() does NOT exist in Vertex AI SDK
+                    # Must use from_dict with function_call structure
+                    function_call_part = Part.from_dict({
+                        "function_call": {
+                            "name": func_name,
+                            "args": args_dict
+                        }
+                    })
+                    parts.append(function_call_part)
+                contents.append(Content(
+                    role="model",
+                    parts=parts
+                ))
+            # Handle regular messages (user, assistant without tool_calls)
+            else:
+                role = "model" if msg.role == "assistant" else msg.role
+                parts = []
+                # Add text content if present
+                if msg.content:
+                    parts.append(Part.from_text(msg.content))
+                # Add images if present
+                if msg.images:
+                    for image_source in msg.images:
+                        image_content = parse_image_source(image_source)
+                        if image_content.is_url():
+                            # Use Part.from_uri for URLs
+                            parts.append(Part.from_uri(
+                                uri=image_content.get_url(),
+                                mime_type=image_content.mime_type
+                            ))
+                        else:
+                            # Convert to bytes for inline_data
+                            base64_data = image_content.get_base64_data()
+                            image_bytes = base64.b64decode(base64_data)
+                            parts.append(Part.from_bytes(
+                                data=image_bytes,
+                                mime_type=image_content.mime_type
+                            ))
+                if parts:
+                    contents.append(Content(role=role, parts=parts))
         return contents
     async def generate_text(
@@ -261,13 +539,36 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
         model: Optional[str] = None,
         temperature: float = 0.7,
         max_tokens: Optional[int] = None,
+        context: Optional[Dict[str, Any]] = None,
         functions: Optional[List[Dict[str, Any]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Any] = None,
         system_instruction: Optional[str] = None,
         **kwargs,
     ) -> LLMResponse:
-        """Generate text using Vertex AI"""
+        """
+        Generate text using Vertex AI.
+        Args:
+            messages: List of conversation messages
+            model: Model name (optional, uses default if not provided)
+            temperature: Sampling temperature (0.0 to 1.0)
+            max_tokens: Maximum tokens to generate
+            context: Optional context dictionary containing metadata such as:
+                - user_id: User identifier for tracking/billing
+                - tenant_id: Tenant identifier for multi-tenant setups
+                - request_id: Request identifier for tracing
+                - session_id: Session identifier
+                - Any other custom metadata for observability or middleware
+            functions: List of function schemas (legacy format)
+            tools: List of tool schemas (new format, recommended)
+            tool_choice: Tool choice strategy
+            system_instruction: System instruction for the model
+            **kwargs: Additional provider-specific parameters
+        Returns:
+            LLMResponse with generated text and metadata
+        """
         self._init_vertex_ai()
         # Get model name from config if not provided
@@ -281,17 +582,37 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
         try:
             # Extract system message from messages if present
             system_msg = None
+            system_cache_control = None
             user_messages = []
             for msg in messages:
                 if msg.role == "system":
                     system_msg = msg.content
+                    system_cache_control = msg.cache_control
                 else:
                     user_messages.append(msg)
             # Use explicit system_instruction parameter if provided, else use extracted system message
             final_system_instruction = system_instruction or system_msg
+            # Check if we should use CachedContent API for prompt caching
+            cached_content_id = None
+            if final_system_instruction and system_cache_control:
+                # Create or get CachedContent for the system instruction
+                # Extract TTL from cache_control if available (defaults to 3600 seconds)
+                ttl_seconds = getattr(system_cache_control, 'ttl_seconds', None) or 3600
+                cached_content_id = await self._create_or_get_cached_content(
+                    content=final_system_instruction,
+                    model_name=model_name,
+                    ttl_seconds=ttl_seconds,
+                )
+                if cached_content_id:
+                    self.logger.debug(f"Using CachedContent for prompt caching: {cached_content_id}")
+                    # When using CachedContent, we don't pass system_instruction to GenerativeModel
+                    # Instead, we'll pass cached_content_id to generate_content
+                    final_system_instruction = None
             # Initialize model WITH system instruction for prompt caching support
+            # Note: If using CachedContent, system_instruction will be None
             model_instance = GenerativeModel(
                 model_name,
                 system_instruction=final_system_instruction
@@ -362,13 +683,18 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
                 "safety_settings": safety_settings,
             }
+            # Add cached_content if using CachedContent API for prompt caching
+            if cached_content_id:
+                api_params["cached_content"] = cached_content_id
+                self.logger.debug(f"Added cached_content to API params: {cached_content_id}")
             # Add tools if available
             if tools_for_api:
                 api_params["tools"] = tools_for_api
-            # Add any additional kwargs (but exclude tools/safety_settings to avoid conflicts)
+            # Add any additional kwargs (but exclude tools/safety_settings/cached_content to avoid conflicts)
             for key, value in kwargs.items():
-                if key not in ["tools", "safety_settings"]:
+                if key not in ["tools", "safety_settings", "cached_content"]:
                     api_params[key] = value
             response = await asyncio.get_event_loop().run_in_executor(
@@ -553,7 +879,9 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             # Vertex AI doesn't provide detailed token usage in the response
             # Use estimation method as fallback
-            input_tokens = self._count_tokens_estimate(prompt)
+            # Estimate input tokens from messages content
+            prompt_text = " ".join(msg.content for msg in messages if msg.content)
+            input_tokens = self._count_tokens_estimate(prompt_text)
             output_tokens = self._count_tokens_estimate(content)
             tokens_used = input_tokens + output_tokens
@@ -608,7 +936,9 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             ):
                 self.logger.warning(f"Vertex AI response issue: {str(e)}")
                 # Return a response indicating the issue
-                estimated_prompt_tokens = self._count_tokens_estimate(prompt)
+                # Estimate prompt tokens from messages content
+                prompt_text = " ".join(msg.content for msg in messages if msg.content)
+                estimated_prompt_tokens = self._count_tokens_estimate(prompt_text)
                 return LLMResponse(
                     content="[Response unavailable due to content processing issues or safety filters]",
                     provider=self.provider_name,
@@ -626,6 +956,7 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
         model: Optional[str] = None,
         temperature: float = 0.7,
         max_tokens: Optional[int] = None,
+        context: Optional[Dict[str, Any]] = None,
         functions: Optional[List[Dict[str, Any]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Any] = None,
@@ -641,6 +972,12 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             model: Model name (optional)
             temperature: Temperature for generation
             max_tokens: Maximum tokens to generate
+            context: Optional context dictionary containing metadata such as:
+                - user_id: User identifier for tracking/billing
+                - tenant_id: Tenant identifier for multi-tenant setups
+                - request_id: Request identifier for tracing
+                - session_id: Session identifier
+                - Any other custom metadata for observability or middleware
             functions: List of function schemas (legacy format)
             tools: List of tool schemas (new format)
             tool_choice: Tool choice strategy (not used for Google Vertex AI)
@@ -664,17 +1001,37 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
         try:
             # Extract system message from messages if present
             system_msg = None
+            system_cache_control = None
             user_messages = []
             for msg in messages:
                 if msg.role == "system":
                     system_msg = msg.content
+                    system_cache_control = msg.cache_control
                 else:
                     user_messages.append(msg)
             # Use explicit system_instruction parameter if provided, else use extracted system message
             final_system_instruction = system_instruction or system_msg
+            # Check if we should use CachedContent API for prompt caching
+            cached_content_id = None
+            if final_system_instruction and system_cache_control:
+                # Create or get CachedContent for the system instruction
+                # Extract TTL from cache_control if available (defaults to 3600 seconds)
+                ttl_seconds = getattr(system_cache_control, 'ttl_seconds', None) or 3600
+                cached_content_id = await self._create_or_get_cached_content(
+                    content=final_system_instruction,
+                    model_name=model_name,
+                    ttl_seconds=ttl_seconds,
+                )
+                if cached_content_id:
+                    self.logger.debug(f"Using CachedContent for prompt caching in streaming: {cached_content_id}")
+                    # When using CachedContent, we don't pass system_instruction to GenerativeModel
+                    # Instead, we'll pass cached_content_id to generate_content
+                    final_system_instruction = None
             # Initialize model WITH system instruction for prompt caching support
+            # Note: If using CachedContent, system_instruction will be None
             model_instance = GenerativeModel(
                 model_name,
                 system_instruction=final_system_instruction
@@ -738,6 +1095,12 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
             # Use mixin method for Function Calling support
             from aiecs.llm.clients.openai_compatible_mixin import StreamChunk
+            # Add cached_content to kwargs if using CachedContent API
+            stream_kwargs = kwargs.copy()
+            if cached_content_id:
+                stream_kwargs["cached_content"] = cached_content_id
+                self.logger.debug(f"Added cached_content to streaming API params: {cached_content_id}")
             async for chunk in self._stream_text_with_function_calling(
                 model_instance=model_instance,
                 contents=contents,
@@ -745,7 +1108,7 @@ class VertexAIClient(BaseLLMClient, GoogleFunctionCallingMixin):
                 safety_settings=safety_settings,
                 tools=tools_for_api,
                 return_chunks=return_chunks,
-                **kwargs,
+                **stream_kwargs,
             ):
                 # Yield chunk (can be str or StreamChunk)
                 yield chunk

aiecs/llm/clients/xai_client.py CHANGED Viewed

@@ -87,6 +87,7 @@ class XAIClient(BaseLLMClient, OpenAICompatibleFunctionCallingMixin):
         model: Optional[str] = None,
         temperature: float = 0.7,
         max_tokens: Optional[int] = None,
+        context: Optional[Dict[str, Any]] = None,
         functions: Optional[List[Dict[str, Any]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Any] = None,
@@ -94,8 +95,27 @@ class XAIClient(BaseLLMClient, OpenAICompatibleFunctionCallingMixin):
     ) -> LLMResponse:
         """
         Generate text using xAI API via OpenAI library (supports all Grok models).
         xAI API is OpenAI-compatible, so it supports Function Calling.
+        Args:
+            messages: List of conversation messages
+            model: Model name (optional, uses default if not provided)
+            temperature: Sampling temperature (0.0 to 1.0)
+            max_tokens: Maximum tokens to generate
+            context: Optional context dictionary containing metadata such as:
+                - user_id: User identifier for tracking/billing
+                - tenant_id: Tenant identifier for multi-tenant setups
+                - request_id: Request identifier for tracing
+                - session_id: Session identifier
+                - Any other custom metadata for observability or middleware
+            functions: List of function schemas (legacy format)
+            tools: List of tool schemas (new format, recommended)
+            tool_choice: Tool choice strategy
+            **kwargs: Additional provider-specific parameters
+        Returns:
+            LLMResponse with generated text and metadata
         """
         # Check API key availability
         api_key = self._get_api_key()
@@ -144,6 +164,7 @@ class XAIClient(BaseLLMClient, OpenAICompatibleFunctionCallingMixin):
         model: Optional[str] = None,
         temperature: float = 0.7,
         max_tokens: Optional[int] = None,
+        context: Optional[Dict[str, Any]] = None,
         functions: Optional[List[Dict[str, Any]]] = None,
         tools: Optional[List[Dict[str, Any]]] = None,
         tool_choice: Optional[Any] = None,
@@ -152,11 +173,28 @@ class XAIClient(BaseLLMClient, OpenAICompatibleFunctionCallingMixin):
     ) -> AsyncGenerator[Any, None]:
         """
         Stream text using xAI API via OpenAI library (supports all Grok models).
         xAI API is OpenAI-compatible, so it supports Function Calling.
         Args:
+            messages: List of conversation messages
+            model: Model name (optional, uses default if not provided)
+            temperature: Sampling temperature (0.0 to 1.0)
+            max_tokens: Maximum tokens to generate
+            context: Optional context dictionary containing metadata such as:
+                - user_id: User identifier for tracking/billing
+                - tenant_id: Tenant identifier for multi-tenant setups
+                - request_id: Request identifier for tracing
+                - session_id: Session identifier
+                - Any other custom metadata for observability or middleware
+            functions: List of function schemas (legacy format)
+            tools: List of tool schemas (new format, recommended)
+            tool_choice: Tool choice strategy
             return_chunks: If True, returns StreamChunk objects with tool_calls info; if False, returns str tokens only
+            **kwargs: Additional provider-specific parameters
+        Yields:
+            str or StreamChunk: Text tokens or StreamChunk objects
         """
         # Check API key availability
         api_key = self._get_api_key()

aiecs 1.7.6__py3-none-any.whl → 1.8.4__py3-none-any.whl

Potentially problematic release.

aiecs 1.7.6py3-none-any.whl → 1.8.4py3-none-any.whl