PyPI - agentle - Versions diffs - 0.9.4__py3-none-any.whl → 0.9.28__py3-none-any.whl - Mend

agentle 0.9.4py3-none-any.whl → 0.9.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

agentle/agents/agent.py +175 -10
agentle/agents/agent_run_output.py +8 -1
agentle/agents/apis/__init__.py +79 -6
agentle/agents/apis/api.py +342 -73
agentle/agents/apis/api_key_authentication.py +43 -0
agentle/agents/apis/api_key_location.py +11 -0
agentle/agents/apis/api_metrics.py +16 -0
agentle/agents/apis/auth_type.py +17 -0
agentle/agents/apis/authentication.py +32 -0
agentle/agents/apis/authentication_base.py +42 -0
agentle/agents/apis/authentication_config.py +117 -0
agentle/agents/apis/basic_authentication.py +34 -0
agentle/agents/apis/bearer_authentication.py +52 -0
agentle/agents/apis/cache_strategy.py +12 -0
agentle/agents/apis/circuit_breaker.py +69 -0
agentle/agents/apis/circuit_breaker_error.py +7 -0
agentle/agents/apis/circuit_breaker_state.py +11 -0
agentle/agents/apis/endpoint.py +413 -254
agentle/agents/apis/file_upload.py +23 -0
agentle/agents/apis/hmac_authentication.py +56 -0
agentle/agents/apis/no_authentication.py +27 -0
agentle/agents/apis/oauth2_authentication.py +111 -0
agentle/agents/apis/oauth2_grant_type.py +12 -0
agentle/agents/apis/object_schema.py +86 -1
agentle/agents/apis/params/__init__.py +10 -1
agentle/agents/apis/params/boolean_param.py +44 -0
agentle/agents/apis/params/number_param.py +56 -0
agentle/agents/apis/rate_limit_error.py +7 -0
agentle/agents/apis/rate_limiter.py +57 -0
agentle/agents/apis/request_config.py +126 -4
agentle/agents/apis/request_hook.py +16 -0
agentle/agents/apis/response_cache.py +49 -0
agentle/agents/apis/retry_strategy.py +12 -0
agentle/agents/whatsapp/human_delay_calculator.py +462 -0
agentle/agents/whatsapp/models/audio_message.py +6 -4
agentle/agents/whatsapp/models/key.py +2 -2
agentle/agents/whatsapp/models/whatsapp_bot_config.py +375 -21
agentle/agents/whatsapp/models/whatsapp_response_base.py +31 -0
agentle/agents/whatsapp/models/whatsapp_webhook_payload.py +5 -1
agentle/agents/whatsapp/providers/base/whatsapp_provider.py +51 -0
agentle/agents/whatsapp/providers/evolution/evolution_api_provider.py +237 -10
agentle/agents/whatsapp/providers/meta/meta_whatsapp_provider.py +126 -0
agentle/agents/whatsapp/v2/batch_processor_manager.py +4 -0
agentle/agents/whatsapp/v2/bot_config.py +188 -0
agentle/agents/whatsapp/v2/message_limit.py +9 -0
agentle/agents/whatsapp/v2/payload.py +0 -0
agentle/agents/whatsapp/v2/whatsapp_bot.py +13 -0
agentle/agents/whatsapp/v2/whatsapp_cloud_api_provider.py +0 -0
agentle/agents/whatsapp/v2/whatsapp_provider.py +0 -0
agentle/agents/whatsapp/whatsapp_bot.py +827 -45
agentle/generations/providers/google/adapters/generate_generate_content_response_to_generation_adapter.py +13 -10
agentle/generations/providers/google/google_generation_provider.py +35 -5
agentle/generations/providers/openrouter/_adapters/openrouter_message_to_generated_assistant_message_adapter.py +35 -1
agentle/mcp/servers/stdio_mcp_server.py +23 -4
agentle/parsing/parsers/docx.py +8 -0
agentle/parsing/parsers/file_parser.py +4 -0
agentle/parsing/parsers/pdf.py +7 -1
agentle/storage/__init__.py +11 -0
agentle/storage/file_storage_manager.py +44 -0
agentle/storage/local_file_storage_manager.py +122 -0
agentle/storage/s3_file_storage_manager.py +124 -0
agentle/tts/audio_format.py +6 -0
agentle/tts/elevenlabs_tts_provider.py +108 -0
agentle/tts/output_format_type.py +26 -0
agentle/tts/speech_config.py +14 -0
agentle/tts/speech_result.py +15 -0
agentle/tts/tts_provider.py +16 -0
agentle/tts/voice_settings.py +30 -0
agentle/utils/parse_streaming_json.py +39 -13
agentle/voice_cloning/__init__.py +0 -0
agentle/voice_cloning/voice_cloner.py +0 -0
agentle/web/extractor.py +282 -148
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/METADATA +1 -1
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/RECORD +78 -39
agentle/tts/real_time/definitions/audio_data.py +0 -20
agentle/tts/real_time/definitions/speech_config.py +0 -27
agentle/tts/real_time/definitions/speech_result.py +0 -14
agentle/tts/real_time/definitions/tts_stream_chunk.py +0 -15
agentle/tts/real_time/definitions/voice_gender.py +0 -9
agentle/tts/real_time/definitions/voice_info.py +0 -18
agentle/tts/real_time/real_time_speech_to_text_provider.py +0 -66
/agentle/{tts/real_time → agents/whatsapp/v2}/__init__.py +0 -0
/agentle/{tts/real_time/definitions/__init__.py → agents/whatsapp/v2/in_memory_batch_processor_manager.py} +0 -0
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/WHEEL +0 -0
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/licenses/LICENSE +0 -0

agentle/agents/whatsapp/whatsapp_bot.py CHANGED Viewed

@@ -33,6 +33,7 @@ from agentle.agents.whatsapp.models.whatsapp_document_message import (
 from agentle.agents.whatsapp.models.whatsapp_image_message import WhatsAppImageMessage
 from agentle.agents.whatsapp.models.whatsapp_media_message import WhatsAppMediaMessage
 from agentle.agents.whatsapp.models.whatsapp_message import WhatsAppMessage
+from agentle.agents.whatsapp.models.whatsapp_response_base import WhatsAppResponseBase
 from agentle.agents.whatsapp.models.whatsapp_session import WhatsAppSession
 from agentle.agents.whatsapp.models.whatsapp_text_message import WhatsAppTextMessage
 from agentle.agents.whatsapp.models.whatsapp_video_message import WhatsAppVideoMessage
@@ -43,6 +44,7 @@ from agentle.agents.whatsapp.providers.base.whatsapp_provider import WhatsAppPro
 from agentle.agents.whatsapp.providers.evolution.evolution_api_provider import (
     EvolutionAPIProvider,
 )
+from agentle.agents.whatsapp.human_delay_calculator import HumanDelayCalculator
 from agentle.generations.models.message_parts.file import FilePart
 from agentle.generations.models.message_parts.text import TextPart
 from agentle.generations.models.message_parts.tool_execution_suggestion import (
@@ -54,7 +56,8 @@ from agentle.generations.models.messages.generated_assistant_message import (
 from agentle.generations.models.messages.user_message import UserMessage
 from agentle.generations.tools.tool import Tool
 from agentle.generations.tools.tool_execution_result import ToolExecutionResult
+from agentle.storage.file_storage_manager import FileStorageManager
+from agentle.tts.tts_provider import TtsProvider
 if TYPE_CHECKING:
     from blacksheep import Application
@@ -126,19 +129,47 @@ class CallbackWithContext:
     context: dict[str, Any] = field(default_factory=dict)
-class WhatsAppBot(BaseModel):
+class WhatsAppBot[T_Schema: WhatsAppResponseBase = WhatsAppResponseBase](BaseModel):
     """
     WhatsApp bot that wraps an Agentle agent with enhanced message batching and spam protection.
-    Now uses the Agent's conversation store directly instead of managing contexts separately.
+    Now supports structured outputs through generic type parameter T_Schema.
+    The schema must extend WhatsAppResponseBase to ensure a 'response' field is always present.
+    Examples:
+    ```python
+        # Basic usage (no structured output)
+        agent = Agent(...)
+        bot = WhatsAppBot(agent=agent, provider=provider)
+        # With structured output
+        class MyResponse(WhatsAppResponseBase):
+            sentiment: Literal["happy", "sad", "neutral"]
+            urgency_level: int
+        agent = Agent[MyResponse](
+            response_schema=MyResponse,
+            instructions="Extract sentiment and urgency from the conversation..."
+        )
+        bot = WhatsAppBot[MyResponse](agent=agent, provider=provider)
+        # Access structured data in callbacks
+        async def my_callback(phone, chat_id, response, context):
+            if response and response.parsed:
+                print(f"Sentiment: {response.parsed.sentiment}")
+                print(f"Urgency: {response.parsed.urgency_level}")
+                # response.parsed.response is automatically sent to WhatsApp
+        bot.add_response_callback(my_callback)
+    ```
     """
-    agent: Agent[Any]
+    agent: Agent[T_Schema]
     provider: WhatsAppProvider
+    tts_provider: TtsProvider | None = Field(default=None)
+    file_storage_manager: FileStorageManager | None = Field(default=None)
     config: WhatsAppBotConfig = Field(default_factory=WhatsAppBotConfig)
-    # REMOVED: context_manager field - no longer needed
     _running: bool = PrivateAttr(default=False)
     _webhook_handlers: MutableSequence[Callable[..., Any]] = PrivateAttr(
         default_factory=list
@@ -153,6 +184,7 @@ class WhatsAppBot(BaseModel):
     _response_callbacks: MutableSequence[CallbackWithContext] = PrivateAttr(
         default_factory=list
     )
+    _delay_calculator: HumanDelayCalculator | None = PrivateAttr(default=None)
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -164,6 +196,55 @@ class WhatsAppBot(BaseModel):
                 + "Please set agent.conversation_store before creating WhatsAppBot."
             )
+        # Log configuration validation
+        validation_issues = self.config.validate_config()
+        if validation_issues:
+            logger.warning(
+                f"[CONFIG_VALIDATION] Configuration has {len(validation_issues)} validation issue(s):"
+            )
+            for issue in validation_issues:
+                logger.warning(f"[CONFIG_VALIDATION]   - {issue}")
+        else:
+            logger.info("[CONFIG_VALIDATION] Configuration validation passed")
+        # Initialize delay calculator if human delays are enabled
+        if self.config.enable_human_delays:
+            logger.info(
+                "[DELAY_CONFIG] ═══════════ HUMAN-LIKE DELAYS ENABLED ═══════════"
+            )
+            logger.info(
+                "[DELAY_CONFIG] Read delay bounds: "
+                + f"[{self.config.min_read_delay_seconds:.2f}s - {self.config.max_read_delay_seconds:.2f}s]"
+            )
+            logger.info(
+                "[DELAY_CONFIG] Typing delay bounds: "
+                + f"[{self.config.min_typing_delay_seconds:.2f}s - {self.config.max_typing_delay_seconds:.2f}s]"
+            )
+            logger.info(
+                "[DELAY_CONFIG] Send delay bounds: "
+                + f"[{self.config.min_send_delay_seconds:.2f}s - {self.config.max_send_delay_seconds:.2f}s]"
+            )
+            logger.info(
+                "[DELAY_CONFIG] Delay behavior settings: "
+                + f"jitter_enabled={self.config.enable_delay_jitter}, "
+                + f"show_typing={self.config.show_typing_during_delay}, "
+                + f"batch_compression={self.config.batch_read_compression_factor:.2f}"
+            )
+            # Initialize delay calculator
+            self._delay_calculator = HumanDelayCalculator(self.config)
+            logger.info("[DELAY_CONFIG] Delay calculator initialized successfully")
+            logger.info(
+                "[DELAY_CONFIG] ═══════════════════════════════════════════════"
+            )
+        else:
+            logger.info(
+                "[DELAY_CONFIG] Human-like delays disabled (enable_human_delays=False)"
+            )
+            logger.debug(
+                "[DELAY_CONFIG] To enable delays, set enable_human_delays=True in WhatsAppBotConfig"
+            )
     def start(self) -> None:
         """Start the WhatsApp bot."""
         run_sync(self.start_async)
@@ -224,7 +305,53 @@ class WhatsAppBot(BaseModel):
     ) -> GeneratedAssistantMessage[Any] | None:
         """
         Handle incoming WhatsApp message with enhanced error handling and batching.
+        This is the main entry point for processing incoming WhatsApp messages. It handles
+        rate limiting, spam protection, message batching, and applies human-like delays
+        to simulate realistic behavior patterns.
+        Message Processing Flow:
+            1. Retrieve or create user session
+            2. Check rate limiting (if spam protection enabled)
+            3. Apply read delay (if human delays enabled) - simulates reading time
+            4. Mark message as read (if auto_read_messages enabled)
+            5. Send welcome message (if first interaction)
+            6. Process message (with batching if enabled) or immediately
+            7. Return generated response
+        Human-Like Delays:
+            When enable_human_delays is True, this method applies a read delay before
+            marking the message as read. The delay simulates the time a human would take
+            to read and comprehend the incoming message, creating a realistic gap between
+            message receipt and read receipt.
+            For batched messages, a batch read delay is applied instead, which accounts
+            for reading multiple messages in sequence with compression for faster batch
+            reading.
+        Args:
+            message: The incoming WhatsApp message to process.
+            chat_id: Optional custom chat identifier for conversation tracking.
+                    If not provided, uses the sender's phone number.
+        Returns:
+            Generated assistant response message, or None if processing failed or
+            was rate limited.
+        Raises:
+            Exceptions are caught and logged. User-facing errors trigger error messages.
+        Example:
+            >>> message = WhatsAppTextMessage(
+            ...     from_number="1234567890",
+            ...     text="Hello!",
+            ...     id="msg_123"
+            ... )
+            >>> response = await bot.handle_message(message)
+            >>> if response:
+            ...     print(f"Response: {response.text}")
         """
         logger.info("[MESSAGE_HANDLER] ═══════════ MESSAGE HANDLER ENTRY ═══════════")
         logger.info(
             f"[MESSAGE_HANDLER] Received message from {message.from_number}: ID={message.id}, Type={type(message).__name__}"
@@ -271,6 +398,9 @@ class WhatsAppBot(BaseModel):
                     await self.provider.update_session(session)
                     return None
+            # Apply read delay before marking message as read (simulates human reading time)
+            await self._apply_read_delay(message)
             # Mark as read if configured (only after rate limiting check passes)
             if self.config.auto_read_messages:
                 logger.debug(f"[MESSAGE_HANDLER] Marking message {message.id} as read")
@@ -1201,8 +1331,55 @@ class WhatsAppBot(BaseModel):
     async def _process_message_batch(
         self, phone_number: PhoneNumber, session: WhatsAppSession, processing_token: str
-    ) -> GeneratedAssistantMessage[Any] | None:
-        """Process a batch of messages for a user with enhanced timeout protection."""
+    ) -> GeneratedAssistantMessage[T_Schema] | None:
+        """Process a batch of messages for a user with enhanced timeout protection.
+        This method processes multiple messages that were received in quick succession
+        as a single batch. It applies batch-specific delays and combines all messages
+        into a single conversation context for more coherent responses.
+        Batch Processing Flow:
+            1. Validate pending messages exist
+            2. Mark session as sending to prevent cleanup
+            3. Apply batch read delay (if human delays enabled) - simulates reading all messages
+            4. Convert message batch to agent input
+            5. Generate single response for entire batch
+            6. Send response to user
+            7. Mark all messages as read
+            8. Update session state
+            9. Execute response callbacks
+        Human-Like Delays:
+            When enable_human_delays is True, this method applies a batch read delay
+            at the start of processing. The delay simulates the time a human would take
+            to read multiple messages in sequence, accounting for:
+            - Individual reading time for each message
+            - Brief pauses between messages (0.5s each)
+            - Compression factor (default 0.7x) for faster batch reading
+            This creates a realistic gap before the batch is processed, making the bot
+            appear more human-like when handling rapid message sequences.
+        Args:
+            phone_number: Phone number of the user whose messages are being processed.
+            session: The user's WhatsApp session containing pending messages.
+            processing_token: Unique token to prevent duplicate batch processing.
+        Returns:
+            Generated assistant response for the batch, or None if processing failed
+            or no messages were pending.
+        Raises:
+            Exceptions are caught and logged. Session state is cleaned up on errors.
+        Example:
+            >>> # Called automatically by batch processor task
+            >>> response = await self._process_message_batch(
+            ...     phone_number="1234567890",
+            ...     session=session,
+            ...     processing_token="batch_123"
+            ... )
+        """
         logger.info("[BATCH_PROCESSING] ═══════════ BATCH PROCESSING START ═══════════")
         logger.info(
             f"[BATCH_PROCESSING] Phone: {phone_number}, Token: {processing_token}"
@@ -1236,14 +1413,8 @@ class WhatsAppBot(BaseModel):
             session.context_data["sending_started_at"] = datetime.now().isoformat()
             await self.provider.update_session(session)
-            # Show typing indicator
-            if self.config.typing_indicator:
-                logger.debug(
-                    f"[BATCH_PROCESSING] Sending typing indicator to {phone_number}"
-                )
-                await self.provider.send_typing_indicator(
-                    phone_number, self.config.typing_duration
-                )
+            # Note: Typing indicator is now sent in _send_response after TTS decision
+            # to avoid sending it before determining if audio should be sent
             # Get all pending messages
             pending_messages = session.clear_pending_messages()
@@ -1251,6 +1422,9 @@ class WhatsAppBot(BaseModel):
                 f"[BATCH_PROCESSING] 📦 Processing batch of {len(pending_messages)} messages for {phone_number}"
             )
+            # Apply batch read delay before processing (simulates human reading multiple messages)
+            await self._apply_batch_read_delay(list(pending_messages))
             # Convert message batch to agent input
             logger.debug(
                 f"[BATCH_PROCESSING] Converting message batch to agent input for {phone_number}"
@@ -1359,7 +1533,7 @@ class WhatsAppBot(BaseModel):
         message: WhatsAppMessage,
         session: WhatsAppSession,
         chat_id: ChatId | None = None,
-    ) -> GeneratedAssistantMessage[Any]:
+    ) -> GeneratedAssistantMessage[T_Schema]:
         """Process a single message immediately with quote message support."""
         logger.info(
             "[SINGLE_MESSAGE] ═══════════ SINGLE MESSAGE PROCESSING START ═══════════"
@@ -2062,16 +2236,72 @@ class WhatsAppBot(BaseModel):
     async def _send_response(
         self,
         to: PhoneNumber,
-        response: GeneratedAssistantMessage[Any] | str,
+        response: GeneratedAssistantMessage[T_Schema] | str,
         reply_to: str | None = None,
     ) -> None:
-        """Send response message(s) to user with enhanced error handling and retry logic."""
-        # Extract text from GeneratedAssistantMessage if needed
-        response_text = (
-            response.text
-            if isinstance(response, GeneratedAssistantMessage)
-            else response
-        )
+        """Send response message(s) to user with enhanced error handling and retry logic.
+        This method handles the complete response sending flow including text-to-speech,
+        human-like delays, typing indicators, message splitting, and error handling.
+        Response Sending Flow:
+            1. Extract and format response text
+            2. Attempt TTS audio generation (if configured and chance succeeds)
+            3. Apply typing delay (if human delays enabled and TTS not sent)
+            4. Show typing indicator (if configured and not already shown during delay)
+            5. Split long messages if needed
+            6. Send each message part with send delay between parts
+            7. Handle errors with retry logic
+        Human-Like Delays:
+            When enable_human_delays is True, this method applies two types of delays:
+            1. Typing Delay: Applied before sending the response to simulate the time
+               a human would take to compose and type the message. The delay is based
+               on response length and includes composition planning time.
+            2. Send Delay: Applied immediately before each message transmission to
+               simulate the brief final review time before hitting send. This delay
+               is applied to each message part independently.
+            If TTS audio is successfully sent, the typing delay is skipped since the
+            audio generation time already provides a natural delay.
+        Args:
+            to: Phone number of the recipient.
+            response: The response to send. Can be a GeneratedAssistantMessage or string.
+            reply_to: Optional message ID to reply to (for message quoting).
+        Raises:
+            Exceptions are caught and logged. Failed messages trigger retry logic
+            if configured.
+        Example:
+            >>> response = GeneratedAssistantMessage(text="Hello! How can I help?")
+            >>> await self._send_response(
+            ...     to="1234567890",
+            ...     response=response,
+            ...     reply_to="msg_123"
+            ... )
+        """
+        response_text = ""
+        if isinstance(response, GeneratedAssistantMessage):
+            # Check if we have structured output (parsed)
+            if response.parsed:
+                # Use the 'response' field from structured output
+                response_text = response.parsed.response
+                logger.debug(
+                    "[SEND_RESPONSE] Using structured output 'response' field "
+                    + f"(schema: {type(response.parsed).__name__})"
+                )
+            else:
+                # Fallback to text field
+                response_text = response.text
+                logger.debug("[SEND_RESPONSE] Using standard text response")
+        else:
+            # Direct string
+            response_text = response
         # Apply WhatsApp-specific markdown formatting
         response_text = self._format_whatsapp_markdown(response_text)
@@ -2080,10 +2310,217 @@ class WhatsAppBot(BaseModel):
             f"[SEND_RESPONSE] Sending response to {to} (length: {len(response_text)}, reply_to: {reply_to})"
         )
+        # Track if TTS was successfully sent (to skip typing delay for audio)
+        tts_sent_successfully = False
+        # Check if we should send audio via TTS
+        should_attempt_tts = (
+            self.tts_provider
+            and self.config.speech_config
+            and self.config.speech_play_chance > 0
+            and self._validate_tts_configuration()
+        )
+        if should_attempt_tts:
+            import random
+            # Determine if we should play speech based on chance
+            should_play_speech = random.random() < self.config.speech_play_chance
+            if should_play_speech:
+                logger.info(
+                    f"[TTS] Attempting to send audio response to {to} (chance: {self.config.speech_play_chance * 100}%)"
+                )
+                try:
+                    # Show recording indicator while synthesizing
+                    if self.config.typing_indicator:
+                        logger.debug(
+                            f"[TTS] Sending recording indicator to {to} during synthesis"
+                        )
+                        # Use a more appropriate duration for recording indicator
+                        # Based on text length: minimum 2s, maximum 10s, or estimated synthesis time
+                        estimated_duration = max(
+                            2, min(10, len(response_text) // 50 + 2)
+                        )
+                        await self.provider.send_recording_indicator(
+                            to, estimated_duration
+                        )
+                    # Synthesize speech
+                    # We know these are not None due to validation above
+                    assert self.tts_provider is not None
+                    assert self.config.speech_config is not None
+                    speech_result = await self.tts_provider.synthesize_async(
+                        response_text, config=self.config.speech_config
+                    )
+                    # Try to upload to file storage if available
+                    audio_url = None
+                    if self.file_storage_manager:
+                        try:
+                            import base64
+                            import time
+                            # Decode base64 to bytes
+                            audio_bytes = base64.b64decode(speech_result.audio)
+                            # Generate unique filename
+                            timestamp = int(time.time())
+                            extension = self._get_audio_extension(speech_result.format)
+                            filename = f"tts_{timestamp}.{extension}"
+                            # Upload to storage
+                            audio_url = await self.file_storage_manager.upload_file(
+                                file_data=audio_bytes,
+                                filename=filename,
+                                mime_type=str(speech_result.mime_type),
+                            )
+                            logger.info(f"[TTS] Audio uploaded to storage: {audio_url}")
+                        except Exception as e:
+                            logger.warning(
+                                f"[TTS] Failed to upload to storage, falling back to base64: {e}"
+                            )
+                            audio_url = None
+                    # Send audio message (URL or base64)
+                    if audio_url:
+                        # Try URL method first
+                        try:
+                            await self.provider.send_audio_message_by_url(
+                                to=to,
+                                audio_url=audio_url,
+                                quoted_message_id=reply_to
+                                if self.config.quote_messages
+                                else None,
+                            )
+                            logger.info(f"[TTS] Audio sent via URL to {to}")
+                        except Exception as e:
+                            logger.warning(
+                                f"[TTS] URL method failed, falling back to base64: {e}"
+                            )
+                            # Fallback to base64
+                            await self.provider.send_audio_message(
+                                to=to,
+                                audio_base64=speech_result.audio,
+                                quoted_message_id=reply_to
+                                if self.config.quote_messages
+                                else None,
+                            )
+                            logger.info(f"[TTS] Audio sent via base64 to {to}")
+                    else:
+                        # Use base64 method (current behavior)
+                        await self.provider.send_audio_message(
+                            to=to,
+                            audio_base64=speech_result.audio,
+                            quoted_message_id=reply_to
+                            if self.config.quote_messages
+                            else None,
+                        )
+                        logger.info(f"[TTS] Audio sent via base64 to {to}")
+                    logger.info(
+                        f"[TTS] Successfully sent audio response to {to}",
+                        extra={
+                            "to_number": to,
+                            "text_length": len(response_text),
+                            "mime_type": str(speech_result.mime_type),
+                            "format": str(speech_result.format),
+                        },
+                    )
+                    # Audio sent successfully, mark flag and return early
+                    tts_sent_successfully = True
+                    logger.info(
+                        "[TTS] Skipping typing delay since TTS audio was sent successfully"
+                    )
+                    return
+                except Exception as e:
+                    # Check if this is a specific Evolution API media upload error
+                    error_message = str(e).lower()
+                    if "media upload failed" in error_message or "400" in error_message:
+                        logger.warning(
+                            f"[TTS] Evolution API media upload failed for {to}, falling back to text: {e}",
+                            extra={
+                                "to_number": to,
+                                "error_type": type(e).__name__,
+                                "error": str(e),
+                                "fallback_reason": "evolution_api_media_upload_failed",
+                            },
+                        )
+                    else:
+                        logger.warning(
+                            f"[TTS] Failed to send audio response to {to}, falling back to text: {e}",
+                            extra={
+                                "to_number": to,
+                                "error_type": type(e).__name__,
+                                "error": str(e),
+                                "fallback_reason": "tts_synthesis_or_send_failed",
+                            },
+                        )
+                    # Fall through to send text message instead
         # Split messages by line breaks and length
         messages = self._split_message_by_line_breaks(response_text)
         logger.info(f"[SEND_RESPONSE] Split response into {len(messages)} parts")
+        # Apply typing delay before sending messages (simulates human typing time)
+        # This should be done before the typing indicator to coordinate properly
+        # Note: This is only reached if TTS was not used or if TTS failed and fell back to text
+        if should_attempt_tts and not tts_sent_successfully:
+            logger.info(
+                "[SEND_RESPONSE] TTS failed, applying typing delay for text fallback"
+            )
+        await self._apply_typing_delay(response_text, to)
+        # Show typing indicator ONCE before sending all messages
+        # Only send typing indicator if we're not attempting TTS or if TTS failed
+        # Skip if typing delay already handled the indicator
+        typing_delay_handled_indicator = (
+            self.config.enable_human_delays
+            and self.config.show_typing_during_delay
+            and self.config.typing_indicator
+        )
+        if typing_delay_handled_indicator:
+            logger.debug(
+                "[SEND_RESPONSE] Skipping redundant typing indicator - already sent during typing delay"
+            )
+        if (
+            self.config.typing_indicator
+            and not should_attempt_tts
+            and not typing_delay_handled_indicator
+        ):
+            try:
+                logger.debug(
+                    f"[SEND_RESPONSE] Sending typing indicator to {to} before sending {len(messages)} message(s)"
+                )
+                await self.provider.send_typing_indicator(
+                    to, self.config.typing_duration
+                )
+            except Exception as e:
+                # Don't let typing indicator failures break message sending
+                logger.warning(f"[SEND_RESPONSE] Failed to send typing indicator: {e}")
+        elif (
+            self.config.typing_indicator
+            and should_attempt_tts
+            and not typing_delay_handled_indicator
+        ):
+            # TTS was attempted but failed, send typing indicator for text fallback
+            # Skip if typing delay already handled the indicator
+            try:
+                logger.debug(
+                    f"[SEND_RESPONSE] TTS failed, sending typing indicator to {to} for text fallback"
+                )
+                await self.provider.send_typing_indicator(
+                    to, self.config.typing_duration
+                )
+            except Exception as e:
+                # Don't let typing indicator failures break message sending
+                logger.warning(f"[SEND_RESPONSE] Failed to send typing indicator: {e}")
         # Track sending state to handle partial failures
         successfully_sent_count = 0
         failed_parts: list[dict[str, Any]] = []
@@ -2093,21 +2530,6 @@ class WhatsAppBot(BaseModel):
                 f"[SEND_RESPONSE] Sending message part {i + 1}/{len(messages)} to {to}"
             )
-            # Show typing indicator before each message if configured
-            if self.config.typing_indicator:
-                try:
-                    logger.debug(
-                        f"[SEND_RESPONSE] Sending typing indicator to {to} for message {i + 1}"
-                    )
-                    await self.provider.send_typing_indicator(
-                        to, self.config.typing_duration
-                    )
-                except Exception as e:
-                    # Don't let typing indicator failures break message sending
-                    logger.warning(
-                        f"[SEND_RESPONSE] Failed to send typing indicator: {e}"
-                    )
             # Only quote the first message if quote_messages is enabled
             quoted_id = reply_to if i == 0 else None
@@ -2118,6 +2540,9 @@ class WhatsAppBot(BaseModel):
             for attempt in range(max_retries + 1):
                 try:
+                    # Apply send delay before transmitting message (simulates final review)
+                    await self._apply_send_delay()
                     sent_message = await self.provider.send_text_message(
                         to=to, text=msg, quoted_message_id=quoted_id
                     )
@@ -2158,15 +2583,30 @@ class WhatsAppBot(BaseModel):
             # Delay between messages (respecting typing duration + small buffer)
             if i < len(messages) - 1:
                 # Use typing duration if typing indicator is enabled, otherwise use a small delay
-                delay = (
+                inter_message_delay = (
                     self.config.typing_duration + 0.5
                     if self.config.typing_indicator
                     else 1.0
                 )
-                logger.debug(
-                    f"[SEND_RESPONSE] Waiting {delay}s before sending next message part"
-                )
-                await asyncio.sleep(delay)
+                # Calculate total delay including send delay if human delays are enabled
+                if self.config.enable_human_delays and self._delay_calculator:
+                    # Send delay will be applied before next message, so log total expected delay
+                    estimated_send_delay = (
+                        self.config.min_send_delay_seconds
+                        + self.config.max_send_delay_seconds
+                    ) / 2
+                    total_delay = inter_message_delay + estimated_send_delay
+                    logger.debug(
+                        f"[SEND_RESPONSE] Inter-message delay: {inter_message_delay:.2f}s "
+                        + f"(+ ~{estimated_send_delay:.2f}s send delay = ~{total_delay:.2f}s total)"
+                    )
+                else:
+                    logger.debug(
+                        f"[SEND_RESPONSE] Waiting {inter_message_delay}s before sending next message part"
+                    )
+                await asyncio.sleep(inter_message_delay)
         # Log final sending results
         if failed_parts:
@@ -2196,6 +2636,48 @@ class WhatsAppBot(BaseModel):
                 f"[SEND_RESPONSE] Successfully sent all {len(messages)} message parts to {to}"
             )
+    def _validate_tts_configuration(self) -> bool:
+        """Validate TTS configuration before attempting synthesis."""
+        try:
+            if not self.config.speech_config:
+                logger.debug("[TTS_VALIDATION] No speech_config provided")
+                return False
+            # Check if voice_id is provided
+            if not self.config.speech_config.voice_id:
+                logger.warning(
+                    "[TTS_VALIDATION] speech_config.voice_id is required but not provided"
+                )
+                return False
+            # Check if TTS provider is properly configured
+            if not self.tts_provider:
+                logger.warning("[TTS_VALIDATION] TTS provider is not configured")
+                return False
+            logger.debug(
+                f"[TTS_VALIDATION] TTS configuration is valid: voice_id={self.config.speech_config.voice_id}"
+            )
+            return True
+        except Exception as e:
+            logger.warning(
+                f"[TTS_VALIDATION] Failed to validate TTS configuration: {e}"
+            )
+            return False
+    def _get_audio_extension(self, format_type: Any) -> str:
+        """Get file extension from TTS format."""
+        format_str = str(format_type)
+        if "mp3" in format_str:
+            return "mp3"
+        elif "wav" in format_str:
+            return "wav"
+        elif "ogg" in format_str:
+            return "ogg"
+        else:
+            return "mp3"  # default
     def _split_message_by_line_breaks(self, text: str) -> Sequence[str]:
         """Split message by line breaks first, then by length if needed with enhanced validation."""
         if not text or not text.strip():
@@ -2519,6 +3001,306 @@ class WhatsAppBot(BaseModel):
                 f"[RATE_LIMIT_ERROR] Failed to send rate limit message to {to}: {e}"
             )
+    async def _apply_read_delay(self, message: WhatsAppMessage) -> None:
+        """Apply human-like read delay before marking message as read.
+        This method simulates the time a human would take to read and comprehend
+        an incoming message. The delay is calculated based on message content length
+        and includes reading time, context switching, and comprehension time.
+        The delay is applied BEFORE marking the message as read, creating a realistic
+        gap between message receipt and read receipt that matches human behavior.
+        Behavior:
+            - Skips delay if enable_human_delays is False
+            - Extracts text content from message (text or media caption)
+            - Calculates delay using HumanDelayCalculator
+            - Applies delay using asyncio.sleep (non-blocking)
+            - Logs delay start and completion
+            - Handles cancellation and errors gracefully
+        Args:
+            message: The WhatsApp message to process. Can be text or media message.
+        Raises:
+            asyncio.CancelledError: Re-raised to allow proper task cancellation.
+            Other exceptions are caught and logged, processing continues without delay.
+        Example:
+            >>> # Called automatically in handle_message() before marking as read
+            >>> await self._apply_read_delay(message)
+            >>> await self.provider.mark_message_as_read(message.id)
+        """
+        if not self.config.enable_human_delays or not self._delay_calculator:
+            logger.debug("[HUMAN_DELAY] ⏱️  Read delay skipped (delays disabled)")
+            return
+        try:
+            # Extract text content from message
+            text_content = ""
+            message_type = type(message).__name__
+            if isinstance(message, WhatsAppTextMessage):
+                text_content = message.text
+            elif isinstance(message, WhatsAppMediaMessage):
+                # For media messages, use caption if available
+                text_content = message.caption or ""
+            # Calculate read delay
+            delay = self._delay_calculator.calculate_read_delay(text_content)
+            # Log delay start
+            logger.info(
+                f"[HUMAN_DELAY] ⏱️  Starting read delay: {delay:.2f}s "
+                + f"for {len(text_content)} chars (message_type={message_type}, message_id={message.id})"
+            )
+            # Apply delay
+            await asyncio.sleep(delay)
+            # Log delay completion
+            logger.info(
+                f"[HUMAN_DELAY] ⏱️  Read delay completed: {delay:.2f}s "
+                + f"(message_id={message.id})"
+            )
+        except asyncio.CancelledError:
+            logger.warning(
+                f"[HUMAN_DELAY] ⏱️  Read delay cancelled for message {message.id}"
+            )
+            raise  # Re-raise to allow proper cancellation
+        except Exception as e:
+            logger.error(
+                f"[HUMAN_DELAY] ⏱️  Error applying read delay for message {message.id}: {e}",
+                exc_info=True,
+            )
+            # Continue without delay on error
+    async def _apply_typing_delay(self, response_text: str, to: PhoneNumber) -> None:
+        """Apply human-like typing delay before sending response.
+        This method simulates the time a human would take to compose and type
+        a response. The delay is calculated based on response content length
+        and includes composition planning, typing time, and multitasking overhead.
+        The delay is applied AFTER response generation but BEFORE sending the message,
+        creating a realistic gap that matches human typing behavior.
+        Behavior:
+            - Skips delay if enable_human_delays is False
+            - Calculates delay using HumanDelayCalculator based on response length
+            - Optionally sends typing indicator during delay (if show_typing_during_delay is True)
+            - Applies delay using asyncio.sleep (non-blocking)
+            - Logs delay start and completion
+            - Handles typing indicator failures gracefully
+            - Handles cancellation and errors gracefully
+        Args:
+            response_text: The response text that will be sent to the user.
+            to: The phone number of the recipient.
+        Raises:
+            asyncio.CancelledError: Re-raised to allow proper task cancellation.
+            Other exceptions are caught and logged, processing continues without delay.
+        Example:
+            >>> # Called automatically in _send_response() before sending
+            >>> response_text = "Hello! How can I help you?"
+            >>> await self._apply_typing_delay(response_text, phone_number)
+            >>> await self.provider.send_text_message(phone_number, response_text)
+        """
+        if not self.config.enable_human_delays or not self._delay_calculator:
+            logger.debug("[HUMAN_DELAY] ⌨️  Typing delay skipped (delays disabled)")
+            return
+        try:
+            # Calculate typing delay
+            delay = self._delay_calculator.calculate_typing_delay(response_text)
+            # Log delay start
+            logger.info(
+                f"[HUMAN_DELAY] ⌨️  Starting typing delay: {delay:.2f}s "
+                + f"for {len(response_text)} chars (to={to})"
+            )
+            # Show typing indicator during delay if configured
+            if self.config.show_typing_during_delay and self.config.typing_indicator:
+                try:
+                    logger.debug(
+                        f"[HUMAN_DELAY] ⌨️  Sending typing indicator for {int(delay)}s to {to}"
+                    )
+                    # Send typing indicator for the duration of the delay
+                    await self.provider.send_typing_indicator(to, int(delay))
+                except Exception as indicator_error:
+                    logger.warning(
+                        f"[HUMAN_DELAY] ⌨️  Failed to send typing indicator during delay to {to}: "
+                        + f"{indicator_error}"
+                    )
+                    # Continue with delay even if indicator fails
+            # Apply delay
+            await asyncio.sleep(delay)
+            # Log delay completion
+            logger.info(
+                f"[HUMAN_DELAY] ⌨️  Typing delay completed: {delay:.2f}s (to={to})"
+            )
+        except asyncio.CancelledError:
+            logger.warning(f"[HUMAN_DELAY] ⌨️  Typing delay cancelled for {to}")
+            raise  # Re-raise to allow proper cancellation
+        except Exception as e:
+            logger.error(
+                f"[HUMAN_DELAY] ⌨️  Error applying typing delay for {to}: {e}",
+                exc_info=True,
+            )
+            # Continue without delay on error
+    async def _apply_send_delay(self) -> None:
+        """Apply brief delay before sending message.
+        This method simulates the final review time before a human sends a message.
+        The delay is a random value within configured bounds, representing the brief
+        moment a human takes to review their message before hitting send.
+        The delay is applied immediately BEFORE each message transmission, creating
+        a small gap that adds to the natural feel of the conversation.
+        Behavior:
+            - Skips delay if enable_human_delays is False
+            - Generates random delay within configured send delay bounds
+            - Applies optional jitter if enabled
+            - Applies delay using asyncio.sleep (non-blocking)
+            - Logs delay start and completion
+            - Handles cancellation and errors gracefully
+        Raises:
+            asyncio.CancelledError: Re-raised to allow proper task cancellation.
+            Other exceptions are caught and logged, processing continues without delay.
+        Example:
+            >>> # Called automatically before each message transmission
+            >>> for message_part in message_parts:
+            ...     await self._apply_send_delay()
+            ...     await self.provider.send_text_message(phone_number, message_part)
+        """
+        if not self.config.enable_human_delays or not self._delay_calculator:
+            logger.debug("[HUMAN_DELAY] 📤 Send delay skipped (delays disabled)")
+            return
+        try:
+            # Calculate send delay
+            delay = self._delay_calculator.calculate_send_delay()
+            # Log delay start
+            logger.info(f"[HUMAN_DELAY] 📤 Starting send delay: {delay:.2f}s")
+            # Apply delay
+            await asyncio.sleep(delay)
+            # Log delay completion
+            logger.debug(f"[HUMAN_DELAY] 📤 Send delay completed: {delay:.2f}s")
+        except asyncio.CancelledError:
+            logger.warning("[HUMAN_DELAY] 📤 Send delay cancelled")
+            raise  # Re-raise to allow proper cancellation
+        except Exception as e:
+            logger.error(
+                f"[HUMAN_DELAY] 📤 Error applying send delay: {e}", exc_info=True
+            )
+            # Continue without delay on error
+    async def _apply_batch_read_delay(self, messages: list[dict[str, Any]]) -> None:
+        """Apply human-like read delay for batch of messages.
+        This method simulates the time a human would take to read multiple messages
+        in sequence. The delay accounts for reading each message individually, with
+        brief pauses between messages, and applies a compression factor to simulate
+        faster batch reading compared to reading messages one at a time.
+        The delay is applied at the START of batch processing, before any message
+        processing begins, creating a realistic gap that matches human batch reading.
+        Behavior:
+            - Skips delay if enable_human_delays is False
+            - Extracts text content from all messages (text and media captions)
+            - Calculates individual read delays for each message
+            - Adds 0.5s pause between each message
+            - Applies compression factor (default 0.7x for 30% faster reading)
+            - Clamps to reasonable bounds (2-20 seconds suggested)
+            - Applies delay using asyncio.sleep (non-blocking)
+            - Logs delay start and completion with message count
+            - Handles cancellation and errors gracefully
+        Args:
+            messages: List of message dictionaries from the batch. Each dict should
+                     contain 'type' and either 'text' or 'caption' fields.
+        Raises:
+            asyncio.CancelledError: Re-raised to allow proper task cancellation.
+            Other exceptions are caught and logged, processing continues without delay.
+        Example:
+            >>> # Called automatically in _process_message_batch() before processing
+            >>> pending_messages = [msg1_dict, msg2_dict, msg3_dict]
+            >>> await self._apply_batch_read_delay(pending_messages)
+            >>> # Now process the batch...
+        """
+        if not self.config.enable_human_delays or not self._delay_calculator:
+            logger.debug("[HUMAN_DELAY] 📚 Batch read delay skipped (delays disabled)")
+            return
+        try:
+            # Extract text content from all messages in batch
+            message_texts: list[str] = []
+            total_chars = 0
+            for msg in messages:
+                if msg.get("type") == "WhatsAppTextMessage":
+                    text = msg.get("text", "")
+                    if text:
+                        message_texts.append(text)
+                        total_chars += len(text)
+                elif msg.get("type") in [
+                    "WhatsAppImageMessage",
+                    "WhatsAppDocumentMessage",
+                    "WhatsAppAudioMessage",
+                    "WhatsAppVideoMessage",
+                ]:
+                    # For media messages, use caption if available
+                    caption = msg.get("caption", "")
+                    if caption:
+                        message_texts.append(caption)
+                        total_chars += len(caption)
+            # Calculate batch read delay
+            delay = self._delay_calculator.calculate_batch_read_delay(message_texts)
+            # Log delay start
+            logger.info(
+                f"[HUMAN_DELAY] 📚 Starting batch read delay: {delay:.2f}s "
+                + f"for {len(messages)} messages ({total_chars} total chars)"
+            )
+            # Apply delay
+            await asyncio.sleep(delay)
+            # Log delay completion
+            logger.info(
+                f"[HUMAN_DELAY] 📚 Batch read delay completed: {delay:.2f}s "
+                + f"for {len(messages)} messages"
+            )
+        except asyncio.CancelledError:
+            logger.warning(
+                f"[HUMAN_DELAY] 📚 Batch read delay cancelled for {len(messages)} messages"
+            )
+            raise  # Re-raise to allow proper cancellation
+        except Exception as e:
+            logger.error(
+                f"[HUMAN_DELAY] 📚 Error applying batch read delay for {len(messages)} messages: {e}",
+                exc_info=True,
+            )
+            # Continue without delay on error
     def _split_message(self, text: str) -> Sequence[str]:
         """Split long message into chunks."""
         if len(text) <= self.config.max_message_length:

agentle 0.9.4__py3-none-any.whl → 0.9.28__py3-none-any.whl

agentle 0.9.4py3-none-any.whl → 0.9.28py3-none-any.whl