PyPI - agentle - Versions diffs - 0.9.4__py3-none-any.whl → 0.9.28__py3-none-any.whl - Mend

agentle 0.9.4py3-none-any.whl → 0.9.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

agentle/agents/agent.py +175 -10
agentle/agents/agent_run_output.py +8 -1
agentle/agents/apis/__init__.py +79 -6
agentle/agents/apis/api.py +342 -73
agentle/agents/apis/api_key_authentication.py +43 -0
agentle/agents/apis/api_key_location.py +11 -0
agentle/agents/apis/api_metrics.py +16 -0
agentle/agents/apis/auth_type.py +17 -0
agentle/agents/apis/authentication.py +32 -0
agentle/agents/apis/authentication_base.py +42 -0
agentle/agents/apis/authentication_config.py +117 -0
agentle/agents/apis/basic_authentication.py +34 -0
agentle/agents/apis/bearer_authentication.py +52 -0
agentle/agents/apis/cache_strategy.py +12 -0
agentle/agents/apis/circuit_breaker.py +69 -0
agentle/agents/apis/circuit_breaker_error.py +7 -0
agentle/agents/apis/circuit_breaker_state.py +11 -0
agentle/agents/apis/endpoint.py +413 -254
agentle/agents/apis/file_upload.py +23 -0
agentle/agents/apis/hmac_authentication.py +56 -0
agentle/agents/apis/no_authentication.py +27 -0
agentle/agents/apis/oauth2_authentication.py +111 -0
agentle/agents/apis/oauth2_grant_type.py +12 -0
agentle/agents/apis/object_schema.py +86 -1
agentle/agents/apis/params/__init__.py +10 -1
agentle/agents/apis/params/boolean_param.py +44 -0
agentle/agents/apis/params/number_param.py +56 -0
agentle/agents/apis/rate_limit_error.py +7 -0
agentle/agents/apis/rate_limiter.py +57 -0
agentle/agents/apis/request_config.py +126 -4
agentle/agents/apis/request_hook.py +16 -0
agentle/agents/apis/response_cache.py +49 -0
agentle/agents/apis/retry_strategy.py +12 -0
agentle/agents/whatsapp/human_delay_calculator.py +462 -0
agentle/agents/whatsapp/models/audio_message.py +6 -4
agentle/agents/whatsapp/models/key.py +2 -2
agentle/agents/whatsapp/models/whatsapp_bot_config.py +375 -21
agentle/agents/whatsapp/models/whatsapp_response_base.py +31 -0
agentle/agents/whatsapp/models/whatsapp_webhook_payload.py +5 -1
agentle/agents/whatsapp/providers/base/whatsapp_provider.py +51 -0
agentle/agents/whatsapp/providers/evolution/evolution_api_provider.py +237 -10
agentle/agents/whatsapp/providers/meta/meta_whatsapp_provider.py +126 -0
agentle/agents/whatsapp/v2/batch_processor_manager.py +4 -0
agentle/agents/whatsapp/v2/bot_config.py +188 -0
agentle/agents/whatsapp/v2/message_limit.py +9 -0
agentle/agents/whatsapp/v2/payload.py +0 -0
agentle/agents/whatsapp/v2/whatsapp_bot.py +13 -0
agentle/agents/whatsapp/v2/whatsapp_cloud_api_provider.py +0 -0
agentle/agents/whatsapp/v2/whatsapp_provider.py +0 -0
agentle/agents/whatsapp/whatsapp_bot.py +827 -45
agentle/generations/providers/google/adapters/generate_generate_content_response_to_generation_adapter.py +13 -10
agentle/generations/providers/google/google_generation_provider.py +35 -5
agentle/generations/providers/openrouter/_adapters/openrouter_message_to_generated_assistant_message_adapter.py +35 -1
agentle/mcp/servers/stdio_mcp_server.py +23 -4
agentle/parsing/parsers/docx.py +8 -0
agentle/parsing/parsers/file_parser.py +4 -0
agentle/parsing/parsers/pdf.py +7 -1
agentle/storage/__init__.py +11 -0
agentle/storage/file_storage_manager.py +44 -0
agentle/storage/local_file_storage_manager.py +122 -0
agentle/storage/s3_file_storage_manager.py +124 -0
agentle/tts/audio_format.py +6 -0
agentle/tts/elevenlabs_tts_provider.py +108 -0
agentle/tts/output_format_type.py +26 -0
agentle/tts/speech_config.py +14 -0
agentle/tts/speech_result.py +15 -0
agentle/tts/tts_provider.py +16 -0
agentle/tts/voice_settings.py +30 -0
agentle/utils/parse_streaming_json.py +39 -13
agentle/voice_cloning/__init__.py +0 -0
agentle/voice_cloning/voice_cloner.py +0 -0
agentle/web/extractor.py +282 -148
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/METADATA +1 -1
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/RECORD +78 -39
agentle/tts/real_time/definitions/audio_data.py +0 -20
agentle/tts/real_time/definitions/speech_config.py +0 -27
agentle/tts/real_time/definitions/speech_result.py +0 -14
agentle/tts/real_time/definitions/tts_stream_chunk.py +0 -15
agentle/tts/real_time/definitions/voice_gender.py +0 -9
agentle/tts/real_time/definitions/voice_info.py +0 -18
agentle/tts/real_time/real_time_speech_to_text_provider.py +0 -66
/agentle/{tts/real_time → agents/whatsapp/v2}/__init__.py +0 -0
/agentle/{tts/real_time/definitions/__init__.py → agents/whatsapp/v2/in_memory_batch_processor_manager.py} +0 -0
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/WHEEL +0 -0
{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/licenses/LICENSE +0 -0

agentle/web/extractor.py CHANGED Viewed

@@ -1,12 +1,18 @@
-from rsb.coroutines.run_sync import run_sync
+from __future__ import annotations
+import asyncio
 from collections.abc import Sequence
 from textwrap import dedent
+from typing import TYPE_CHECKING
 from html_to_markdown import convert
-from playwright.async_api import Geolocation, ViewportSize
+from rsb.coroutines.run_sync import run_sync
 from rsb.models import Field
 from rsb.models.base_model import BaseModel
+from rsb.models.config_dict import ConfigDict
+from agentle.generations.models.generation.generation import Generation
+from agentle.generations.providers.base.generation_provider import GenerationProvider
 from agentle.prompts.models.prompt import Prompt
 from agentle.responses.definitions.reasoning import Reasoning
 from agentle.responses.responder import Responder
@@ -15,6 +21,10 @@ from agentle.web.actions.action import Action
 from agentle.web.extraction_preferences import ExtractionPreferences
 from agentle.web.extraction_result import ExtractionResult
+if TYPE_CHECKING:
+    from playwright.async_api import Browser, Geolocation, ViewportSize
 _INSTRUCTIONS = Prompt.from_text(
     dedent("""\
     <character>
@@ -52,36 +62,37 @@ _PROMPT = Prompt.from_text(
 # HTML -> MD -> LLM (Structured Output)
 class Extractor(BaseModel):
-    llm: Responder = Field(..., description="The responder to use for the extractor.")
+    llm: Responder | GenerationProvider = Field(
+        ..., description="The responder to use for the extractor."
+    )
     reasoning: Reasoning | None = Field(default=None)
     model: str | None = Field(default=None)
     max_output_tokens: int | None = Field(default=None)
-    def extract[T: BaseModel](
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    def extract_markdown(
         self,
+        browser: Browser,
         urls: Sequence[str],
-        output: type[T],
-        prompt: str | None = None,
         extraction_preferences: ExtractionPreferences | None = None,
         ignore_invalid_urls: bool = True,
-    ) -> ExtractionResult[T]:
+    ) -> tuple[str, str]:
         return run_sync(
-            self.extract_async(
-                urls, output, prompt, extraction_preferences, ignore_invalid_urls
-            )
+            self.extract_markdown_async,
+            browser=browser,
+            urls=urls,
+            extraction_preferences=extraction_preferences,
+            ignore_invalid_urls=ignore_invalid_urls,
         )
-    @needs("playwright")
-    async def extract_async[T: BaseModel](
+    async def extract_markdown_async(
         self,
+        browser: Browser,
         urls: Sequence[str],
-        output: type[T],
-        prompt: str | None = None,
         extraction_preferences: ExtractionPreferences | None = None,
         ignore_invalid_urls: bool = True,
-    ) -> ExtractionResult[T]:
-        from playwright import async_api
+    ) -> tuple[str, str]:
         _preferences = extraction_preferences or ExtractionPreferences()
         _actions: Sequence[Action] = _preferences.actions or []
@@ -91,136 +102,244 @@ class Extractor(BaseModel):
             # This is a placeholder for proxy configuration
             pass
-        async with async_api.async_playwright() as p:
-            browser = await p.chromium.launch(headless=True)
-            # Build context options properly based on preferences
-            if _preferences.mobile:
-                viewport: ViewportSize | None = ViewportSize(width=375, height=667)
-                user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
-                is_mobile = True
-            else:
-                viewport = None
-                user_agent = None
-                is_mobile = None
-            # Handle geolocation
-            geolocation: Geolocation | None = None
-            permissions = None
-            if _preferences.location:
-                geolocation = Geolocation(
-                    latitude=getattr(_preferences.location, "latitude", 0),
-                    longitude=getattr(_preferences.location, "longitude", 0),
-                )
-                permissions = ["geolocation"]
-            context = await browser.new_context(
-                viewport=viewport,
-                user_agent=user_agent,
-                is_mobile=is_mobile,
-                extra_http_headers=_preferences.headers,
-                ignore_https_errors=_preferences.skip_tls_verification,
-                geolocation=geolocation,
-                permissions=permissions,
+        # Build context options properly based on preferences
+        if _preferences.mobile:
+            viewport: ViewportSize | None = ViewportSize(width=375, height=667)
+            user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
+            is_mobile = True
+        else:
+            viewport = None
+            user_agent = None
+            is_mobile = None
+        # Handle geolocation
+        geolocation: Geolocation | None = None
+        permissions = None
+        if _preferences.location:
+            geolocation = Geolocation(
+                latitude=getattr(_preferences.location, "latitude", 0),
+                longitude=getattr(_preferences.location, "longitude", 0),
             )
+            permissions = ["geolocation"]
+        context = await browser.new_context(
+            viewport=viewport,
+            user_agent=user_agent,
+            is_mobile=is_mobile,
+            extra_http_headers=_preferences.headers,
+            ignore_https_errors=_preferences.skip_tls_verification,
+            geolocation=geolocation,
+            permissions=permissions,
+        )
-            # Block ads if specified
-            if _preferences.block_ads:
-                await context.route(
-                    "**/*",
-                    lambda route: route.abort()
-                    if route.request.resource_type in ["image", "media", "font"]
-                    and any(
-                        ad_domain in route.request.url
-                        for ad_domain in [
-                            "doubleclick.net",
-                            "googlesyndication.com",
-                            "adservice.google.com",
-                            "ads",
-                            "analytics",
-                            "tracking",
-                        ]
-                    )
-                    else route.continue_(),
+        # Block ads if specified
+        if _preferences.block_ads:
+            await context.route(
+                "**/*",
+                lambda route: route.abort()
+                if route.request.resource_type in ["image", "media", "font"]
+                and any(
+                    ad_domain in route.request.url
+                    for ad_domain in [
+                        "doubleclick.net",
+                        "googlesyndication.com",
+                        "adservice.google.com",
+                        "ads",
+                        "analytics",
+                        "tracking",
+                    ]
                 )
+                else route.continue_(),
+            )
+        page = await context.new_page()
+        for url in urls:
+            # Set timeout if specified
+            timeout = _preferences.timeout_ms if _preferences.timeout_ms else 30000
-            page = await context.new_page()
+            try:
+                await page.goto(url, timeout=timeout)
-            for url in urls:
-                # Set timeout if specified
-                timeout = _preferences.timeout_ms if _preferences.timeout_ms else 30000
+                # Wait for specified time if configured
+                if _preferences.wait_for_ms:
+                    await page.wait_for_timeout(_preferences.wait_for_ms)
-                try:
-                    await page.goto(url, timeout=timeout)
+                # Execute actions
+                for action in _actions:
+                    await action.execute(page)
-                    # Wait for specified time if configured
-                    if _preferences.wait_for_ms:
-                        await page.wait_for_timeout(_preferences.wait_for_ms)
+            except Exception as e:
+                if ignore_invalid_urls:
+                    print(f"Warning: Failed to load {url}: {e}")
+                    continue
+                else:
+                    raise
-                    # Execute actions
-                    for action in _actions:
-                        await action.execute(page)
+        html = await page.content()
-                except Exception as e:
-                    if ignore_invalid_urls:
-                        print(f"Warning: Failed to load {url}: {e}")
-                        continue
-                    else:
-                        raise
+        # Process HTML based on preferences - consolidate all BeautifulSoup operations
+        if (
+            _preferences.remove_base_64_images
+            or _preferences.include_tags
+            or _preferences.exclude_tags
+            or _preferences.only_main_content
+        ):
+            from bs4 import BeautifulSoup
-            html = await page.content()
+            soup = BeautifulSoup(html, "html.parser")
-            # Process HTML based on preferences
+            # Remove base64 images first
             if _preferences.remove_base_64_images:
                 import re
-                html = re.sub(
-                    r'<img[^>]+src="data:image/[^"]+"[^>]*>',
-                    "",
-                    html,
-                    flags=re.IGNORECASE,
+                # Debug: Check what we have before processing
+                all_imgs = soup.find_all("img")
+                print(f"DEBUG: Found {len(all_imgs)} img tags total")
+                base64_count = 0
+                for img in all_imgs:
+                    src = img.attrs.get("src") if hasattr(img, "attrs") else None  # type: ignore[union-attr]
+                    if isinstance(src, str) and "data:image/" in src:
+                        base64_count += 1
+                        print(f"DEBUG: Found base64 img: {src[:100]}...")
+                print(f"DEBUG: {base64_count} images have base64 data")
+                # First, remove any anchor tags that contain img children with base64
+                # (must be done before removing img tags themselves)
+                removed_anchors = 0
+                for a_tag in soup.find_all("a"):
+                    imgs = a_tag.find_all("img")  # type: ignore[union-attr]
+                    for img in imgs:
+                        src = img.attrs.get("src") if hasattr(img, "attrs") else None  # type: ignore[union-attr]
+                        if isinstance(src, str) and src.startswith("data:image/"):
+                            # Remove the entire anchor tag if it contains base64 image
+                            a_tag.decompose()
+                            removed_anchors += 1
+                            break
+                print(
+                    f"DEBUG: Removed {removed_anchors} anchor tags with base64 images"
+                )
+                # Remove standalone img tags with base64 src
+                removed_imgs = 0
+                for img in soup.find_all("img"):
+                    src = img.attrs.get("src") if hasattr(img, "attrs") else None  # type: ignore[union-attr]
+                    if isinstance(src, str) and src.startswith("data:image/"):
+                        img.decompose()
+                        removed_imgs += 1
+                print(f"DEBUG: Removed {removed_imgs} standalone img tags")
+                # Remove any element with base64 in href (like anchor tags with image data)
+                for elem in soup.find_all(attrs={"href": True}):
+                    href = elem.attrs.get("href") if hasattr(elem, "attrs") else None  # type: ignore[union-attr]
+                    if isinstance(href, str) and href.startswith("data:image/"):
+                        elem.decompose()
+                # Remove any element with base64 in style attribute
+                for elem in soup.find_all(attrs={"style": True}):
+                    style = elem.attrs.get("style") if hasattr(elem, "attrs") else None  # type: ignore[union-attr]
+                    if isinstance(style, str) and "data:image/" in style:
+                        elem.decompose()
+                # Remove SVG tags (they often contain base64 or are converted to base64 by markdown)
+                for svg in soup.find_all("svg"):
+                    svg.decompose()
+                # Remove any anchor tags that contain SVG children
+                for a_tag in soup.find_all("a"):
+                    if a_tag.find("svg"):  # type: ignore[union-attr]
+                        a_tag.decompose()
+                # Final check: see if any base64 remains in the HTML string
+                html_str = str(soup)
+                remaining = len(re.findall(r'data:image/[^"\')\s]+', html_str))
+                print(
+                    f"DEBUG: After processing, {remaining} base64 data URIs remain in HTML"
+                )
+            # Extract main content if requested
+            if _preferences.only_main_content:
+                main_content = (
+                    soup.find("main")
+                    or soup.find("article")
+                    or soup.find("div", {"id": "content"})
+                    or soup.find("div", {"class": "content"})
                 )
+                if main_content:
+                    soup = main_content  # type: ignore[assignment]
-            # Filter HTML by tags if specified
-            if _preferences.include_tags or _preferences.exclude_tags:
-                from bs4 import BeautifulSoup
-                soup = BeautifulSoup(html, "html.parser")
-                if _preferences.only_main_content:
-                    # Try to find main content area
-                    main_content = (
-                        soup.find("main")
-                        or soup.find("article")
-                        or soup.find("div", {"id": "content"})
-                        or soup.find("div", {"class": "content"})
-                    )
-                    if main_content:
-                        soup = BeautifulSoup(str(main_content), "html.parser")
-                if _preferences.exclude_tags:
-                    for tag in _preferences.exclude_tags:
-                        for element in soup.find_all(tag):
-                            element.decompose()
-                if _preferences.include_tags:
-                    # Keep only specified tags
-                    new_soup = BeautifulSoup("", "html.parser")
-                    for tag in _preferences.include_tags:
-                        for element in soup.find_all(tag):
-                            new_soup.append(element)
-                    soup = new_soup
-                html = str(soup)
-            # Convert to markdown
-            markdown = convert(html)
-            # Prepare and send prompt
-            _prompt = _PROMPT.compile(
-                user_instructions=prompt or "Not provided.", markdown=markdown
+            # Exclude specific tags
+            if _preferences.exclude_tags:
+                for tag in _preferences.exclude_tags:
+                    for element in soup.find_all(tag):  # type: ignore[union-attr]
+                        element.decompose()
+            # Include only specific tags
+            if _preferences.include_tags:
+                new_soup = BeautifulSoup("", "html.parser")
+                for tag in _preferences.include_tags:
+                    for element in soup.find_all(tag):  # type: ignore[union-attr]
+                        new_soup.append(element)  # type: ignore[arg-type]
+                soup = new_soup
+            html = str(soup)
+        # Convert to markdown
+        markdown = convert(html)
+        return html, markdown
+    def extract[T: BaseModel](
+        self,
+        browser: Browser,
+        urls: Sequence[str],
+        output: type[T],
+        prompt: str | None = None,
+        extraction_preferences: ExtractionPreferences | None = None,
+        ignore_invalid_urls: bool = True,
+    ) -> ExtractionResult[T]:
+        return run_sync(
+            self.extract_async(
+                browser=browser,
+                urls=urls,
+                output=output,
+                prompt=prompt,
+                extraction_preferences=extraction_preferences,
+                ignore_invalid_urls=ignore_invalid_urls,
             )
+        )
+    @needs("playwright")
+    async def extract_async[T: BaseModel](
+        self,
+        browser: Browser,
+        urls: Sequence[str],
+        output: type[T],
+        prompt: str | None = None,
+        extraction_preferences: ExtractionPreferences | None = None,
+        ignore_invalid_urls: bool = True,
+    ) -> ExtractionResult[T]:
+        _preferences = extraction_preferences or ExtractionPreferences()
+        html, markdown = await self.extract_markdown_async(
+            browser=browser,
+            urls=urls,
+            extraction_preferences=_preferences,
+            ignore_invalid_urls=ignore_invalid_urls,
+        )
+        # Prepare and send prompt
+        _prompt = _PROMPT.compile(
+            user_instructions=prompt or "Not provided.", markdown=markdown
+        )
+        if isinstance(self.llm, GenerationProvider):
+            response = await self.llm.generate_by_prompt_async(
+                prompt=_prompt,
+                model=self.model,
+                developer_prompt=_INSTRUCTIONS,
+                response_schema=output,
+            )
+        else:
             response = await self.llm.respond_async(
                 input=_prompt,
                 model=self.model,
@@ -229,19 +348,26 @@ class Extractor(BaseModel):
                 text_format=output,
             )
-            await browser.close()
+        output_parsed = (
+            response.parsed
+            if isinstance(response, Generation)
+            else response.output_parsed
+        )
-            return ExtractionResult[T](
-                urls=urls,
-                html=html,
-                markdown=markdown,
-                extraction_preferences=_preferences,
-                output_parsed=response.output_parsed,
-            )
+        await browser.close()
+        return ExtractionResult[T](
+            urls=urls,
+            html=html,
+            markdown=markdown,
+            extraction_preferences=_preferences,
+            output_parsed=output_parsed,
+        )
-if __name__ == "__main__":
+async def test() -> None:
     from dotenv import load_dotenv
+    from playwright import async_api
     load_dotenv()
@@ -251,8 +377,8 @@ if __name__ == "__main__":
         possiveis_redirecionamentos: list[str]
     extractor = Extractor(
-        llm=Responder.openai(),
-        model="gpt-5-nano",
+        llm=Responder.openrouter(),
+        model="google/gemini-2.5-flash",
     )
     # Example with custom extraction preferences
@@ -264,12 +390,20 @@ if __name__ == "__main__":
         timeout_ms=15000,
     )
-    result = extractor.extract(
-        urls=[site_uniube],
-        output=PossiveisRedirecionamentos,
-        prompt="Extract the possible redirects from the page.",
-        extraction_preferences=preferences,
-    )
+    async with async_api.async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
-    for link in result.output_parsed.possiveis_redirecionamentos:
-        print(f"Link: {link}")
+        result = await extractor.extract_async(
+            browser=browser,
+            urls=[site_uniube],
+            output=PossiveisRedirecionamentos,
+            prompt="Extract the possible redirects from the page.",
+            extraction_preferences=preferences,
+        )
+        for link in result.output_parsed.possiveis_redirecionamentos:
+            print(f"Link: {link}")
+if __name__ == "__main__":
+    asyncio.run(test())

{agentle-0.9.4.dist-info → agentle-0.9.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agentle
-Version: 0.9.4
+Version: 0.9.28
 Summary: ...
 Author-email: Arthur Brenno <64020210+arthurbrenno@users.noreply.github.com>
 License-File: LICENSE

agentle 0.9.4__py3-none-any.whl → 0.9.28__py3-none-any.whl

agentle 0.9.4py3-none-any.whl → 0.9.28py3-none-any.whl