PyPI - praisonaiagents - Versions diffs - 0.0.144__py3-none-any.whl → 0.0.146__py3-none-any.whl - Mend

praisonaiagents 0.0.144py3-none-any.whl → 0.0.146py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

praisonaiagents/__init__.py +71 -7
praisonaiagents/agent/__init__.py +2 -1
praisonaiagents/agent/agent.py +358 -48
praisonaiagents/agent/context_agent.py +2315 -0
praisonaiagents/agents/agents.py +30 -12
praisonaiagents/knowledge/knowledge.py +9 -1
praisonaiagents/llm/__init__.py +40 -14
praisonaiagents/llm/llm.py +485 -59
praisonaiagents/llm/openai_client.py +98 -16
praisonaiagents/memory/memory.py +84 -15
praisonaiagents/task/task.py +7 -6
praisonaiagents/telemetry/__init__.py +63 -3
praisonaiagents/telemetry/integration.py +78 -10
praisonaiagents/telemetry/performance_cli.py +397 -0
praisonaiagents/telemetry/performance_monitor.py +573 -0
praisonaiagents/telemetry/performance_utils.py +571 -0
praisonaiagents/telemetry/telemetry.py +35 -11
{praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/METADATA +9 -3
{praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/RECORD +21 -17
{praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/WHEEL +0 -0
{praisonaiagents-0.0.144.dist-info → praisonaiagents-0.0.146.dist-info}/top_level.txt +0 -0

praisonaiagents/llm/llm.py CHANGED Viewed

@@ -971,13 +971,39 @@ class LLM:
                             # Provider doesn't support streaming with tools, use non-streaming
                             use_streaming = False
+                        # Track whether fallback was successful to avoid duplicate API calls
+                        fallback_completed = False
                         if use_streaming:
                             # Streaming approach (with or without tools)
                             tool_calls = []
                             response_text = ""
+                            streaming_success = False
-                            if verbose:
-                                with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
+                            # Wrap streaming with error handling for LiteLLM JSON parsing errors
+                            try:
+                                if verbose:
+                                    # Verbose streaming: show display_generating during streaming
+                                    with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
+                                        for chunk in litellm.completion(
+                                            **self._build_completion_params(
+                                                messages=messages,
+                                                tools=formatted_tools,
+                                                temperature=temperature,
+                                                stream=True,
+                                                output_json=output_json,
+                                                output_pydantic=output_pydantic,
+                                                **kwargs
+                                            )
+                                        ):
+                                            if chunk and chunk.choices and chunk.choices[0].delta:
+                                                delta = chunk.choices[0].delta
+                                                response_text, tool_calls = self._process_stream_delta(
+                                                    delta, response_text, tool_calls, formatted_tools
+                                                )
+                                                live.update(display_generating(response_text, current_time))
+                                else:
+                                    # Non-verbose streaming: no display_generating during streaming
                                     for chunk in litellm.completion(
                                         **self._build_completion_params(
                                             messages=messages,
@@ -994,74 +1020,205 @@ class LLM:
                                             response_text, tool_calls = self._process_stream_delta(
                                                 delta, response_text, tool_calls, formatted_tools
                                             )
-                                            if delta.content:
+                                streaming_success = True
+                            except Exception as streaming_error:
+                                # Handle streaming errors with recovery logic
+                                if self._is_streaming_error_recoverable(streaming_error):
+                                    if verbose:
+                                        logging.warning(f"Streaming error (recoverable): {streaming_error}")
+                                        logging.warning("Falling back to non-streaming mode")
+                                    # Immediately perform non-streaming fallback with actual API call
+                                    try:
+                                        if verbose:
+                                            # When verbose=True, always use streaming for better UX
+                                            with Live(display_generating("", current_time), console=console, refresh_per_second=4, transient=True) as live:
+                                                response_text = ""
+                                                # Use streaming when verbose for progressive display
+                                                for chunk in litellm.completion(
+                                                    **self._build_completion_params(
+                                                        messages=messages,
+                                                        tools=formatted_tools,
+                                                        temperature=temperature,
+                                                        stream=True,  # Always stream when verbose=True
+                                                        output_json=output_json,
+                                                        output_pydantic=output_pydantic,
+                                                        **kwargs
+                                                    )
+                                                ):
+                                                    if chunk and chunk.choices and chunk.choices[0].delta:
+                                                        delta = chunk.choices[0].delta
+                                                        if hasattr(delta, "content") and delta.content:
+                                                            response_text += delta.content
+                                                            live.update(display_generating(response_text, current_time))
+                                            # Clear the live display after completion
+                                            console.print()
+                                            # Create final response structure
+                                            final_response = {
+                                                "choices": [{
+                                                    "message": {
+                                                        "content": response_text,
+                                                        "tool_calls": None
+                                                    }
+                                                }]
+                                            }
+                                        else:
+                                            # For non-streaming + non-verbose: no display_generating (per user requirements)
+                                            final_response = litellm.completion(
+                                                **self._build_completion_params(
+                                                    messages=messages,
+                                                    tools=formatted_tools,
+                                                    temperature=temperature,
+                                                    stream=False,
+                                                    output_json=output_json,
+                                                    output_pydantic=output_pydantic,
+                                                    **kwargs
+                                                )
+                                            )
+                                            response_text = final_response["choices"][0]["message"]["content"]
+                                        # Execute callbacks and display based on verbose setting
+                                        if verbose and not interaction_displayed:
+                                            # Display the complete response at once (this will trigger callbacks internally)
+                                            display_interaction(
+                                                original_prompt,
+                                                response_text,
+                                                markdown=markdown,
+                                                generation_time=time.time() - current_time,
+                                                console=console,
+                                                agent_name=agent_name,
+                                                agent_role=agent_role,
+                                                agent_tools=agent_tools,
+                                                task_name=task_name,
+                                                task_description=task_description,
+                                                task_id=task_id
+                                            )
+                                            interaction_displayed = True
+                                            callback_executed = True
+                                        elif not callback_executed:
+                                            # Only execute callback if display_interaction hasn't been called
+                                            execute_sync_callback(
+                                                'interaction',
+                                                message=original_prompt,
+                                                response=response_text,
+                                                markdown=markdown,
+                                                generation_time=time.time() - current_time,
+                                                agent_name=agent_name,
+                                                agent_role=agent_role,
+                                                agent_tools=agent_tools,
+                                                task_name=task_name,
+                                                task_description=task_description,
+                                                task_id=task_id
+                                            )
+                                            callback_executed = True
+                                        # Mark that fallback completed successfully
+                                        fallback_completed = True
+                                        streaming_success = False
+                                    except Exception as fallback_error:
+                                        # If non-streaming also fails, create a graceful fallback with partial streaming data
+                                        logging.warning(f"Non-streaming fallback also failed: {fallback_error}")
+                                        logging.warning("Using partial streaming response data")
+                                        response_text = response_text or ""
+                                        # Create a mock response with whatever partial data we have
+                                        final_response = {
+                                            "choices": [{
+                                                "message": {
+                                                    "content": response_text,
+                                                    "tool_calls": tool_calls if tool_calls else None
+                                                }
+                                            }]
+                                        }
+                                        fallback_completed = True
+                                        streaming_success = False
+                                else:
+                                    # For non-recoverable errors, re-raise immediately
+                                    logging.error(f"Non-recoverable streaming error: {streaming_error}")
+                                    raise streaming_error
+                            if streaming_success:
+                                response_text = response_text.strip() if response_text else ""
+                                # Execute callbacks after streaming completes (only if not verbose, since verbose will call display_interaction later)
+                                if not verbose and not callback_executed:
+                                    execute_sync_callback(
+                                        'interaction',
+                                        message=original_prompt,
+                                        response=response_text,
+                                        markdown=markdown,
+                                        generation_time=time.time() - current_time,
+                                        agent_name=agent_name,
+                                        agent_role=agent_role,
+                                        agent_tools=agent_tools,
+                                        task_name=task_name,
+                                        task_description=task_description,
+                                        task_id=task_id
+                                    )
+                                    callback_executed = True
+                                # Create a mock final_response with the captured data
+                                final_response = {
+                                    "choices": [{
+                                        "message": {
+                                            "content": response_text,
+                                            "tool_calls": tool_calls if tool_calls else None
+                                        }
+                                    }]
+                                }
+                        # Only execute non-streaming if we haven't used streaming AND fallback hasn't completed
+                        if not use_streaming and not fallback_completed:
+                            # Non-streaming approach (when tools require it, streaming is disabled, or streaming fallback)
+                            if verbose:
+                                # When verbose=True, always use streaming for better UX
+                                with Live(display_generating("", current_time), console=console, refresh_per_second=4, transient=True) as live:
+                                    response_text = ""
+                                    # Use streaming when verbose for progressive display
+                                    for chunk in litellm.completion(
+                                        **self._build_completion_params(
+                                            messages=messages,
+                                            tools=formatted_tools,
+                                            temperature=temperature,
+                                            stream=True,  # Always stream when verbose=True
+                                            output_json=output_json,
+                                            output_pydantic=output_pydantic,
+                                            **kwargs
+                                        )
+                                    ):
+                                        if chunk and chunk.choices and chunk.choices[0].delta:
+                                            delta = chunk.choices[0].delta
+                                            if hasattr(delta, "content") and delta.content:
+                                                response_text += delta.content
                                                 live.update(display_generating(response_text, current_time))
+                                # Clear the live display after completion
+                                console.print()
+                                # Create final response structure
+                                final_response = {
+                                    "choices": [{
+                                        "message": {
+                                            "content": response_text,
+                                            "tool_calls": None
+                                        }
+                                    }]
+                                }
                             else:
-                                # Non-verbose streaming
-                                for chunk in litellm.completion(
+                                # For non-streaming + non-verbose: no display_generating (per user requirements)
+                                final_response = litellm.completion(
                                     **self._build_completion_params(
                                         messages=messages,
                                         tools=formatted_tools,
                                         temperature=temperature,
-                                        stream=True,
+                                        stream=False,
                                         output_json=output_json,
                                         output_pydantic=output_pydantic,
                                         **kwargs
                                     )
-                                ):
-                                    if chunk and chunk.choices and chunk.choices[0].delta:
-                                        delta = chunk.choices[0].delta
-                                        if delta.content:
-                                            response_text += delta.content
-                                        # Capture tool calls from streaming chunks if provider supports it
-                                        if formatted_tools and self._supports_streaming_tools():
-                                            tool_calls = self._process_tool_calls_from_stream(delta, tool_calls)
-                            response_text = response_text.strip() if response_text else ""
-                            # Execute callbacks after streaming completes (only if not verbose, since verbose will call display_interaction later)
-                            if not verbose and not callback_executed:
-                                execute_sync_callback(
-                                    'interaction',
-                                    message=original_prompt,
-                                    response=response_text,
-                                    markdown=markdown,
-                                    generation_time=time.time() - current_time,
-                                    agent_name=agent_name,
-                                    agent_role=agent_role,
-                                    agent_tools=agent_tools,
-                                    task_name=task_name,
-                                    task_description=task_description,
-                                    task_id=task_id
                                 )
-                                callback_executed = True
-                            # Create a mock final_response with the captured data
-                            final_response = {
-                                "choices": [{
-                                    "message": {
-                                        "content": response_text,
-                                        "tool_calls": tool_calls if tool_calls else None
-                                    }
-                                }]
-                            }
-                        else:
-                            # Non-streaming approach (when tools require it or streaming is disabled)
-                            final_response = litellm.completion(
-                                **self._build_completion_params(
-                                    messages=messages,
-                                    tools=formatted_tools,
-                                    temperature=temperature,
-                                    stream=False,
-                                    output_json=output_json,
-                                    output_pydantic=output_pydantic,
-                                    **kwargs
-                                )
-                            )
-                            response_text = final_response["choices"][0]["message"]["content"]
+                                response_text = final_response["choices"][0]["message"]["content"]
                             # Execute callbacks and display based on verbose setting
                             if verbose and not interaction_displayed:
@@ -1557,11 +1714,280 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
             total_time = time.time() - start_time
             logging.debug(f"get_response completed in {total_time:.2f} seconds")
+    def get_response_stream(
+        self,
+        prompt: Union[str, List[Dict]],
+        system_prompt: Optional[str] = None,
+        chat_history: Optional[List[Dict]] = None,
+        temperature: float = 0.2,
+        tools: Optional[List[Any]] = None,
+        output_json: Optional[BaseModel] = None,
+        output_pydantic: Optional[BaseModel] = None,
+        verbose: bool = False,  # Default to non-verbose for streaming
+        markdown: bool = True,
+        agent_name: Optional[str] = None,
+        agent_role: Optional[str] = None,
+        agent_tools: Optional[List[str]] = None,
+        task_name: Optional[str] = None,
+        task_description: Optional[str] = None,
+        task_id: Optional[str] = None,
+        execute_tool_fn: Optional[Callable] = None,
+        **kwargs
+    ):
+        """Generator that yields real-time response chunks from the LLM.
+        This method provides true streaming by yielding content chunks as they
+        are received from the underlying LLM, enabling real-time display of
+        responses without waiting for the complete response.
+        Args:
+            prompt: The prompt to send to the LLM
+            system_prompt: Optional system prompt
+            chat_history: Optional chat history
+            temperature: Sampling temperature
+            tools: Optional list of tools for function calling
+            output_json: Optional JSON schema for structured output
+            output_pydantic: Optional Pydantic model for structured output
+            verbose: Whether to enable verbose logging (default False for streaming)
+            markdown: Whether to enable markdown processing
+            agent_name: Optional agent name for logging
+            agent_role: Optional agent role for logging
+            agent_tools: Optional list of agent tools for logging
+            task_name: Optional task name for logging
+            task_description: Optional task description for logging
+            task_id: Optional task ID for logging
+            execute_tool_fn: Optional function for executing tools
+            **kwargs: Additional parameters
+        Yields:
+            str: Individual content chunks as they are received from the LLM
+        Raises:
+            Exception: If streaming fails or LLM call encounters an error
+        """
+        try:
+            import litellm
+            # Build messages using existing logic
+            messages, original_prompt = self._build_messages(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                chat_history=chat_history,
+                output_json=output_json,
+                output_pydantic=output_pydantic
+            )
+            # Format tools for litellm
+            formatted_tools = self._format_tools_for_litellm(tools)
+            # Determine if we should use streaming based on tool support
+            use_streaming = True
+            if formatted_tools and not self._supports_streaming_tools():
+                # Provider doesn't support streaming with tools, fall back to non-streaming
+                use_streaming = False
+            if use_streaming:
+                # Real-time streaming approach with tool call support
+                try:
+                    tool_calls = []
+                    response_text = ""
+                    consecutive_errors = 0
+                    max_consecutive_errors = 3  # Fallback to non-streaming after 3 consecutive errors
+                    stream_iterator = litellm.completion(
+                        **self._build_completion_params(
+                            messages=messages,
+                            tools=formatted_tools,
+                            temperature=temperature,
+                            stream=True,
+                            output_json=output_json,
+                            output_pydantic=output_pydantic,
+                            **kwargs
+                        )
+                    )
+                    # Wrap the iteration with additional error handling for LiteLLM JSON parsing errors
+                    try:
+                        for chunk in stream_iterator:
+                            try:
+                                if chunk and chunk.choices and chunk.choices[0].delta:
+                                    delta = chunk.choices[0].delta
+                                    # Process both content and tool calls using existing helper
+                                    response_text, tool_calls = self._process_stream_delta(
+                                        delta, response_text, tool_calls, formatted_tools
+                                    )
+                                    # Yield content chunks in real-time as they arrive
+                                    if delta.content:
+                                        yield delta.content
+                                # Reset consecutive error counter only after successful chunk processing
+                                consecutive_errors = 0
+                            except Exception as chunk_error:
+                                consecutive_errors += 1
+                                # Log the specific error for debugging
+                                if verbose:
+                                    logging.warning(f"Chunk processing error ({consecutive_errors}/{max_consecutive_errors}): {chunk_error}")
+                                # Check if this error is recoverable using our helper method
+                                if self._is_streaming_error_recoverable(chunk_error):
+                                    if verbose:
+                                        logging.warning("Recoverable streaming error detected, skipping malformed chunk and continuing")
+                                    # Skip this malformed chunk and continue if we haven't hit the limit
+                                    if consecutive_errors < max_consecutive_errors:
+                                        continue
+                                    else:
+                                        # Too many recoverable errors, fallback to non-streaming
+                                        logging.warning(f"Too many consecutive streaming errors ({consecutive_errors}), falling back to non-streaming mode")
+                                        raise Exception(f"Streaming failed with {consecutive_errors} consecutive errors") from chunk_error
+                                else:
+                                    # For non-recoverable errors, re-raise immediately
+                                    logging.error(f"Non-recoverable streaming error: {chunk_error}")
+                                    raise chunk_error
+                    except Exception as iterator_error:
+                        # Handle errors that occur during stream iteration itself (e.g., JSON parsing in LiteLLM)
+                        error_msg = str(iterator_error).lower()
+                        # Check if this is a recoverable streaming error (including JSON parsing errors)
+                        if self._is_streaming_error_recoverable(iterator_error):
+                            if verbose:
+                                logging.warning(f"Stream iterator error detected (recoverable): {iterator_error}")
+                                logging.warning("Falling back to non-streaming mode due to stream iteration failure")
+                            # Force fallback to non-streaming for iterator-level errors
+                            raise Exception("Stream iteration failed with recoverable error, falling back to non-streaming") from iterator_error
+                        else:
+                            # For non-recoverable errors, re-raise immediately
+                            logging.error(f"Non-recoverable stream iterator error: {iterator_error}")
+                            raise iterator_error
+                    # After streaming completes, handle tool calls if present
+                    if tool_calls and execute_tool_fn:
+                        # Add assistant message with tool calls to conversation
+                        if self._is_ollama_provider():
+                            messages.append({
+                                "role": "assistant",
+                                "content": response_text
+                            })
+                        else:
+                            serializable_tool_calls = self._serialize_tool_calls(tool_calls)
+                            messages.append({
+                                "role": "assistant",
+                                "content": response_text,
+                                "tool_calls": serializable_tool_calls
+                            })
+                        # Execute tool calls and add results to conversation
+                        for tool_call in tool_calls:
+                            is_ollama = self._is_ollama_provider()
+                            function_name, arguments, tool_call_id = self._extract_tool_call_info(tool_call, is_ollama)
+                            try:
+                                # Execute the tool
+                                tool_result = execute_tool_fn(function_name, arguments)
+                                # Add tool result to messages
+                                tool_message = self._create_tool_message(function_name, tool_result, tool_call_id, is_ollama)
+                                messages.append(tool_message)
+                            except Exception as e:
+                                logging.error(f"Tool execution error for {function_name}: {e}")
+                                # Add error message to conversation
+                                error_message = self._create_tool_message(
+                                    function_name, f"Error executing tool: {e}", tool_call_id, is_ollama
+                                )
+                                messages.append(error_message)
+                        # Continue conversation after tool execution - get follow-up response
+                        try:
+                            follow_up_response = litellm.completion(
+                                **self._build_completion_params(
+                                    messages=messages,
+                                    tools=formatted_tools,
+                                    temperature=temperature,
+                                    stream=False,
+                                    **kwargs
+                                )
+                            )
+                            if follow_up_response and follow_up_response.choices:
+                                follow_up_content = follow_up_response.choices[0].message.content
+                                if follow_up_content:
+                                    # Yield the follow-up response after tool execution
+                                    yield follow_up_content
+                        except Exception as e:
+                            logging.error(f"Follow-up response failed: {e}")
+                except Exception as e:
+                    error_msg = str(e).lower()
+                    # Provide more specific error messages based on the error type
+                    if any(keyword in error_msg for keyword in ['json', 'expecting property name', 'parse', 'decode']):
+                        logging.warning(f"Streaming failed due to JSON parsing errors (likely malformed chunks from provider): {e}")
+                    elif 'connection' in error_msg or 'timeout' in error_msg:
+                        logging.warning(f"Streaming failed due to connection issues: {e}")
+                    else:
+                        logging.error(f"Streaming failed with unexpected error: {e}")
+                    # Fall back to non-streaming if streaming fails
+                    use_streaming = False
+            if not use_streaming:
+                # Fall back to non-streaming and yield the complete response
+                try:
+                    response = litellm.completion(
+                        **self._build_completion_params(
+                            messages=messages,
+                            tools=formatted_tools,
+                            temperature=temperature,
+                            stream=False,
+                            output_json=output_json,
+                            output_pydantic=output_pydantic,
+                            **kwargs
+                        )
+                    )
+                    if response and response.choices:
+                        content = response.choices[0].message.content
+                        if content:
+                            # Yield the complete response as a single chunk
+                            yield content
+                except Exception as e:
+                    logging.error(f"Non-streaming fallback failed: {e}")
+                    raise
+        except Exception as e:
+            logging.error(f"Error in get_response_stream: {e}")
+            raise
     def _is_gemini_model(self) -> bool:
         """Check if the model is a Gemini model."""
         if not self.model:
             return False
         return any(prefix in self.model.lower() for prefix in ['gemini', 'gemini/', 'google/gemini'])
+    def _is_streaming_error_recoverable(self, error: Exception) -> bool:
+        """Check if a streaming error is recoverable (e.g., malformed chunk vs connection error)."""
+        error_msg = str(error).lower()
+        # JSON parsing errors are often recoverable (skip malformed chunk and continue)
+        json_error_keywords = ['json', 'expecting property name', 'parse', 'decode', 'invalid json']
+        if any(keyword in error_msg for keyword in json_error_keywords):
+            return True
+        # Connection errors might be temporary but are less recoverable in streaming context
+        connection_error_keywords = ['connection', 'timeout', 'network', 'http']
+        if any(keyword in error_msg for keyword in connection_error_keywords):
+            return False
+        # Other errors are generally not recoverable
+        return False
     async def get_response_async(
         self,

praisonaiagents 0.0.144__py3-none-any.whl → 0.0.146__py3-none-any.whl

praisonaiagents 0.0.144py3-none-any.whl → 0.0.146py3-none-any.whl