agentle 0.9.25__py3-none-any.whl → 0.9.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ Provides advanced features for managing collections of related endpoints with:
14
14
  from __future__ import annotations
15
15
 
16
16
  import logging
17
+ import re
17
18
  from collections.abc import (
18
19
  Coroutine,
19
20
  Mapping,
@@ -513,13 +514,30 @@ class API(BaseModel):
513
514
  continue
514
515
 
515
516
  # Create endpoint
516
- endpoint_name: str = cast(
517
- str,
518
- (
519
- operation_id
520
- or f"{method}_{path.replace('/', '_').replace('{', '').replace('}', '')}"
521
- ),
522
- )
517
+ # Generate a valid function name from the path
518
+ if operation_id:
519
+ endpoint_name = operation_id
520
+ else:
521
+ # Clean the path to create a valid function name
522
+ # Remove leading/trailing slashes and replace special chars
523
+ clean_path = (
524
+ path.strip("/")
525
+ .replace("/", "_")
526
+ .replace("{", "")
527
+ .replace("}", "")
528
+ .replace("-", "_")
529
+ )
530
+ # Remove any consecutive underscores
531
+ clean_path = re.sub(r"_+", "_", clean_path)
532
+ # Ensure it doesn't start with a number
533
+ if clean_path and clean_path[0].isdigit():
534
+ clean_path = f"n{clean_path}"
535
+ # If empty after cleaning, use a generic name
536
+ if not clean_path:
537
+ clean_path = "root"
538
+ endpoint_name = f"{method.lower()}_{clean_path}"
539
+
540
+ endpoint_name = cast(str, endpoint_name)
523
541
 
524
542
  endpoint_description: str = cast(
525
543
  str,
@@ -359,7 +359,7 @@ class Endpoint(BaseModel):
359
359
  await self._auth_handler.refresh_if_needed()
360
360
  await self._auth_handler.apply_auth(None, url, headers, query_params) # type: ignore
361
361
 
362
- # Prepare connector
362
+ # Prepare connector kwargs (will be used to create fresh connector for each attempt)
363
363
  connector_kwargs: dict[str, Any] = {
364
364
  "limit": 10,
365
365
  "limit_per_host": 5,
@@ -369,8 +369,6 @@ class Endpoint(BaseModel):
369
369
  if not self.request_config.verify_ssl:
370
370
  connector_kwargs["ssl"] = False
371
371
 
372
- connector = aiohttp.TCPConnector(**connector_kwargs)
373
-
374
372
  # Prepare timeout
375
373
  timeout = aiohttp.ClientTimeout(
376
374
  total=self.request_config.timeout,
@@ -381,9 +379,11 @@ class Endpoint(BaseModel):
381
379
  # Define the request function for circuit breaker
382
380
  async def make_single_request() -> Any:
383
381
  """Make a single request attempt."""
384
- async with aiohttp.ClientSession(
385
- connector=connector, timeout=timeout
386
- ) as session:
382
+ # Create a fresh connector for each request attempt to avoid "Session is closed" errors on retries
383
+ connector = aiohttp.TCPConnector(**connector_kwargs)
384
+ session = None
385
+ try:
386
+ session = aiohttp.ClientSession(connector=connector, timeout=timeout)
387
387
  # Prepare request kwargs
388
388
  request_kwargs: dict[str, Any] = {
389
389
  "headers": headers,
@@ -486,6 +486,12 @@ class Endpoint(BaseModel):
486
486
  await self._response_cache.set(url, kwargs, result)
487
487
 
488
488
  return result
489
+ finally:
490
+ # Always close the session to prevent "Session is closed" errors on retries
491
+ if session is not None:
492
+ await session.close()
493
+ # Give the connector time to close properly
494
+ await asyncio.sleep(0.01)
489
495
 
490
496
  # Execute with retries
491
497
  last_exception = None
@@ -569,11 +575,11 @@ class Endpoint(BaseModel):
569
575
 
570
576
  if hasattr(param, "enum") and param.enum:
571
577
  param_info["enum"] = list(param.enum)
572
-
578
+
573
579
  # Add constraints for number/primitive types
574
580
  if hasattr(param, "parameter_schema") and param.parameter_schema:
575
581
  from agentle.agents.apis.primitive_schema import PrimitiveSchema
576
-
582
+
577
583
  schema = param.parameter_schema
578
584
  # Only PrimitiveSchema has minimum, maximum, format
579
585
  if isinstance(schema, PrimitiveSchema):
@@ -0,0 +1,31 @@
1
+ # Em: agentle/agents/whatsapp/models/whatsapp_response_base.py
2
+
3
+ from rsb.models.base_model import BaseModel
4
+ from rsb.models.field import Field
5
+
6
+
7
+ class WhatsAppResponseBase(BaseModel):
8
+ """
9
+ Base class for WhatsApp bot structured responses.
10
+
11
+ This class ensures that all structured outputs from the WhatsApp bot
12
+ contain a 'response' field with the text to be sent to the user.
13
+
14
+ Developers can extend this class to add additional structured data
15
+ that they want to extract from the conversation.
16
+
17
+ Example:
18
+ ```python
19
+ class CustomerServiceResponse(WhatsAppResponseBase):
20
+ response: str # Inherited - text to send to user
21
+ sentiment: Literal["happy", "neutral", "frustrated", "angry"]
22
+ urgency: int = Field(ge=1, le=5, description="Urgency level 1-5")
23
+ requires_human: bool = False
24
+ suggested_actions: list[str] = Field(default_factory=list)
25
+ ```
26
+ """
27
+
28
+ response: str = Field(
29
+ ...,
30
+ description="The text response that will be sent to the WhatsApp user. This field is required.",
31
+ )
@@ -33,6 +33,7 @@ from agentle.agents.whatsapp.models.whatsapp_document_message import (
33
33
  from agentle.agents.whatsapp.models.whatsapp_image_message import WhatsAppImageMessage
34
34
  from agentle.agents.whatsapp.models.whatsapp_media_message import WhatsAppMediaMessage
35
35
  from agentle.agents.whatsapp.models.whatsapp_message import WhatsAppMessage
36
+ from agentle.agents.whatsapp.models.whatsapp_response_base import WhatsAppResponseBase
36
37
  from agentle.agents.whatsapp.models.whatsapp_session import WhatsAppSession
37
38
  from agentle.agents.whatsapp.models.whatsapp_text_message import WhatsAppTextMessage
38
39
  from agentle.agents.whatsapp.models.whatsapp_video_message import WhatsAppVideoMessage
@@ -128,14 +129,42 @@ class CallbackWithContext:
128
129
  context: dict[str, Any] = field(default_factory=dict)
129
130
 
130
131
 
131
- class WhatsAppBot(BaseModel):
132
+ class WhatsAppBot[T_Schema: WhatsAppResponseBase = WhatsAppResponseBase](BaseModel):
132
133
  """
133
134
  WhatsApp bot that wraps an Agentle agent with enhanced message batching and spam protection.
134
135
 
135
- Now uses the Agent's conversation store directly instead of managing contexts separately.
136
+ Now supports structured outputs through generic type parameter T_Schema.
137
+ The schema must extend WhatsAppResponseBase to ensure a 'response' field is always present.
138
+
139
+ Examples:
140
+ ```python
141
+ # Basic usage (no structured output)
142
+ agent = Agent(...)
143
+ bot = WhatsAppBot(agent=agent, provider=provider)
144
+
145
+ # With structured output
146
+ class MyResponse(WhatsAppResponseBase):
147
+ sentiment: Literal["happy", "sad", "neutral"]
148
+ urgency_level: int
149
+
150
+ agent = Agent[MyResponse](
151
+ response_schema=MyResponse,
152
+ instructions="Extract sentiment and urgency from the conversation..."
153
+ )
154
+ bot = WhatsAppBot[MyResponse](agent=agent, provider=provider)
155
+
156
+ # Access structured data in callbacks
157
+ async def my_callback(phone, chat_id, response, context):
158
+ if response and response.parsed:
159
+ print(f"Sentiment: {response.parsed.sentiment}")
160
+ print(f"Urgency: {response.parsed.urgency_level}")
161
+ # response.parsed.response is automatically sent to WhatsApp
162
+
163
+ bot.add_response_callback(my_callback)
164
+ ```
136
165
  """
137
166
 
138
- agent: Agent[Any]
167
+ agent: Agent[T_Schema]
139
168
  provider: WhatsAppProvider
140
169
  tts_provider: TtsProvider | None = Field(default=None)
141
170
  file_storage_manager: FileStorageManager | None = Field(default=None)
@@ -1302,7 +1331,7 @@ class WhatsAppBot(BaseModel):
1302
1331
 
1303
1332
  async def _process_message_batch(
1304
1333
  self, phone_number: PhoneNumber, session: WhatsAppSession, processing_token: str
1305
- ) -> GeneratedAssistantMessage[Any] | None:
1334
+ ) -> GeneratedAssistantMessage[T_Schema] | None:
1306
1335
  """Process a batch of messages for a user with enhanced timeout protection.
1307
1336
 
1308
1337
  This method processes multiple messages that were received in quick succession
@@ -1504,7 +1533,7 @@ class WhatsAppBot(BaseModel):
1504
1533
  message: WhatsAppMessage,
1505
1534
  session: WhatsAppSession,
1506
1535
  chat_id: ChatId | None = None,
1507
- ) -> GeneratedAssistantMessage[Any]:
1536
+ ) -> GeneratedAssistantMessage[T_Schema]:
1508
1537
  """Process a single message immediately with quote message support."""
1509
1538
  logger.info(
1510
1539
  "[SINGLE_MESSAGE] ═══════════ SINGLE MESSAGE PROCESSING START ═══════════"
@@ -2207,7 +2236,7 @@ class WhatsAppBot(BaseModel):
2207
2236
  async def _send_response(
2208
2237
  self,
2209
2238
  to: PhoneNumber,
2210
- response: GeneratedAssistantMessage[Any] | str,
2239
+ response: GeneratedAssistantMessage[T_Schema] | str,
2211
2240
  reply_to: str | None = None,
2212
2241
  ) -> None:
2213
2242
  """Send response message(s) to user with enhanced error handling and retry logic.
@@ -2255,12 +2284,24 @@ class WhatsAppBot(BaseModel):
2255
2284
  ... reply_to="msg_123"
2256
2285
  ... )
2257
2286
  """
2258
- # Extract text from GeneratedAssistantMessage if needed
2259
- response_text = (
2260
- response.text
2261
- if isinstance(response, GeneratedAssistantMessage)
2262
- else response
2263
- )
2287
+ response_text = ""
2288
+
2289
+ if isinstance(response, GeneratedAssistantMessage):
2290
+ # Check if we have structured output (parsed)
2291
+ if response.parsed:
2292
+ # Use the 'response' field from structured output
2293
+ response_text = response.parsed.response
2294
+ logger.debug(
2295
+ "[SEND_RESPONSE] Using structured output 'response' field "
2296
+ + f"(schema: {type(response.parsed).__name__})"
2297
+ )
2298
+ else:
2299
+ # Fallback to text field
2300
+ response_text = response.text
2301
+ logger.debug("[SEND_RESPONSE] Using standard text response")
2302
+ else:
2303
+ # Direct string
2304
+ response_text = response
2264
2305
 
2265
2306
  # Apply WhatsApp-specific markdown formatting
2266
2307
  response_text = self._format_whatsapp_markdown(response_text)
agentle/web/extractor.py CHANGED
@@ -1,8 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
1
4
  from collections.abc import Sequence
2
5
  from textwrap import dedent
6
+ from typing import TYPE_CHECKING
3
7
 
4
8
  from html_to_markdown import convert
5
- from playwright.async_api import Geolocation, ViewportSize
6
9
  from rsb.coroutines.run_sync import run_sync
7
10
  from rsb.models import Field
8
11
  from rsb.models.base_model import BaseModel
@@ -18,6 +21,10 @@ from agentle.web.actions.action import Action
18
21
  from agentle.web.extraction_preferences import ExtractionPreferences
19
22
  from agentle.web.extraction_result import ExtractionResult
20
23
 
24
+ if TYPE_CHECKING:
25
+ from playwright.async_api import Browser, Geolocation, ViewportSize
26
+
27
+
21
28
  _INSTRUCTIONS = Prompt.from_text(
22
29
  dedent("""\
23
30
  <character>
@@ -64,31 +71,28 @@ class Extractor(BaseModel):
64
71
 
65
72
  model_config = ConfigDict(arbitrary_types_allowed=True)
66
73
 
67
- def extract[T: BaseModel](
74
+ def extract_markdown(
68
75
  self,
76
+ browser: Browser,
69
77
  urls: Sequence[str],
70
- output: type[T],
71
- prompt: str | None = None,
72
78
  extraction_preferences: ExtractionPreferences | None = None,
73
79
  ignore_invalid_urls: bool = True,
74
- ) -> ExtractionResult[T]:
80
+ ) -> tuple[str, str]:
75
81
  return run_sync(
76
- self.extract_async(
77
- urls, output, prompt, extraction_preferences, ignore_invalid_urls
78
- )
82
+ self.extract_markdown_async,
83
+ browser=browser,
84
+ urls=urls,
85
+ extraction_preferences=extraction_preferences,
86
+ ignore_invalid_urls=ignore_invalid_urls,
79
87
  )
80
88
 
81
- @needs("playwright")
82
- async def extract_async[T: BaseModel](
89
+ async def extract_markdown_async(
83
90
  self,
91
+ browser: Browser,
84
92
  urls: Sequence[str],
85
- output: type[T],
86
- prompt: str | None = None,
87
93
  extraction_preferences: ExtractionPreferences | None = None,
88
94
  ignore_invalid_urls: bool = True,
89
- ) -> ExtractionResult[T]:
90
- from playwright import async_api
91
-
95
+ ) -> tuple[str, str]:
92
96
  _preferences = extraction_preferences or ExtractionPreferences()
93
97
  _actions: Sequence[Action] = _preferences.actions or []
94
98
 
@@ -98,171 +102,272 @@ class Extractor(BaseModel):
98
102
  # This is a placeholder for proxy configuration
99
103
  pass
100
104
 
101
- async with async_api.async_playwright() as p:
102
- browser = await p.chromium.launch(headless=True)
103
-
104
- # Build context options properly based on preferences
105
- if _preferences.mobile:
106
- viewport: ViewportSize | None = ViewportSize(width=375, height=667)
107
- user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
108
- is_mobile = True
109
- else:
110
- viewport = None
111
- user_agent = None
112
- is_mobile = None
113
-
114
- # Handle geolocation
115
- geolocation: Geolocation | None = None
116
- permissions = None
117
- if _preferences.location:
118
- geolocation = Geolocation(
119
- latitude=getattr(_preferences.location, "latitude", 0),
120
- longitude=getattr(_preferences.location, "longitude", 0),
121
- )
122
- permissions = ["geolocation"]
123
-
124
- context = await browser.new_context(
125
- viewport=viewport,
126
- user_agent=user_agent,
127
- is_mobile=is_mobile,
128
- extra_http_headers=_preferences.headers,
129
- ignore_https_errors=_preferences.skip_tls_verification,
130
- geolocation=geolocation,
131
- permissions=permissions,
105
+ # Build context options properly based on preferences
106
+ if _preferences.mobile:
107
+ viewport: ViewportSize | None = ViewportSize(width=375, height=667)
108
+ user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
109
+ is_mobile = True
110
+ else:
111
+ viewport = None
112
+ user_agent = None
113
+ is_mobile = None
114
+
115
+ # Handle geolocation
116
+ geolocation: Geolocation | None = None
117
+ permissions = None
118
+ if _preferences.location:
119
+ geolocation = Geolocation(
120
+ latitude=getattr(_preferences.location, "latitude", 0),
121
+ longitude=getattr(_preferences.location, "longitude", 0),
132
122
  )
123
+ permissions = ["geolocation"]
124
+
125
+ context = await browser.new_context(
126
+ viewport=viewport,
127
+ user_agent=user_agent,
128
+ is_mobile=is_mobile,
129
+ extra_http_headers=_preferences.headers,
130
+ ignore_https_errors=_preferences.skip_tls_verification,
131
+ geolocation=geolocation,
132
+ permissions=permissions,
133
+ )
133
134
 
134
- # Block ads if specified
135
- if _preferences.block_ads:
136
- await context.route(
137
- "**/*",
138
- lambda route: route.abort()
139
- if route.request.resource_type in ["image", "media", "font"]
140
- and any(
141
- ad_domain in route.request.url
142
- for ad_domain in [
143
- "doubleclick.net",
144
- "googlesyndication.com",
145
- "adservice.google.com",
146
- "ads",
147
- "analytics",
148
- "tracking",
149
- ]
150
- )
151
- else route.continue_(),
135
+ # Block ads if specified
136
+ if _preferences.block_ads:
137
+ await context.route(
138
+ "**/*",
139
+ lambda route: route.abort()
140
+ if route.request.resource_type in ["image", "media", "font"]
141
+ and any(
142
+ ad_domain in route.request.url
143
+ for ad_domain in [
144
+ "doubleclick.net",
145
+ "googlesyndication.com",
146
+ "adservice.google.com",
147
+ "ads",
148
+ "analytics",
149
+ "tracking",
150
+ ]
152
151
  )
152
+ else route.continue_(),
153
+ )
154
+
155
+ page = await context.new_page()
156
+
157
+ for url in urls:
158
+ # Set timeout if specified
159
+ timeout = _preferences.timeout_ms if _preferences.timeout_ms else 30000
153
160
 
154
- page = await context.new_page()
161
+ try:
162
+ await page.goto(url, timeout=timeout)
155
163
 
156
- for url in urls:
157
- # Set timeout if specified
158
- timeout = _preferences.timeout_ms if _preferences.timeout_ms else 30000
164
+ # Wait for specified time if configured
165
+ if _preferences.wait_for_ms:
166
+ await page.wait_for_timeout(_preferences.wait_for_ms)
159
167
 
160
- try:
161
- await page.goto(url, timeout=timeout)
168
+ # Execute actions
169
+ for action in _actions:
170
+ await action.execute(page)
162
171
 
163
- # Wait for specified time if configured
164
- if _preferences.wait_for_ms:
165
- await page.wait_for_timeout(_preferences.wait_for_ms)
172
+ except Exception as e:
173
+ if ignore_invalid_urls:
174
+ print(f"Warning: Failed to load {url}: {e}")
175
+ continue
176
+ else:
177
+ raise
166
178
 
167
- # Execute actions
168
- for action in _actions:
169
- await action.execute(page)
179
+ html = await page.content()
170
180
 
171
- except Exception as e:
172
- if ignore_invalid_urls:
173
- print(f"Warning: Failed to load {url}: {e}")
174
- continue
175
- else:
176
- raise
181
+ # Process HTML based on preferences - consolidate all BeautifulSoup operations
182
+ if (
183
+ _preferences.remove_base_64_images
184
+ or _preferences.include_tags
185
+ or _preferences.exclude_tags
186
+ or _preferences.only_main_content
187
+ ):
188
+ from bs4 import BeautifulSoup
177
189
 
178
- html = await page.content()
190
+ soup = BeautifulSoup(html, "html.parser")
179
191
 
180
- # Process HTML based on preferences
192
+ # Remove base64 images first
181
193
  if _preferences.remove_base_64_images:
182
194
  import re
183
195
 
184
- html = re.sub(
185
- r'<img[^>]+src="data:image/[^"]+"[^>]*>',
186
- "",
187
- html,
188
- flags=re.IGNORECASE,
196
+ # Debug: Check what we have before processing
197
+ all_imgs = soup.find_all("img")
198
+ print(f"DEBUG: Found {len(all_imgs)} img tags total")
199
+ base64_count = 0
200
+ for img in all_imgs:
201
+ src = img.attrs.get("src") if hasattr(img, "attrs") else None # type: ignore[union-attr]
202
+ if isinstance(src, str) and "data:image/" in src:
203
+ base64_count += 1
204
+ print(f"DEBUG: Found base64 img: {src[:100]}...")
205
+ print(f"DEBUG: {base64_count} images have base64 data")
206
+
207
+ # First, remove any anchor tags that contain img children with base64
208
+ # (must be done before removing img tags themselves)
209
+ removed_anchors = 0
210
+ for a_tag in soup.find_all("a"):
211
+ imgs = a_tag.find_all("img") # type: ignore[union-attr]
212
+ for img in imgs:
213
+ src = img.attrs.get("src") if hasattr(img, "attrs") else None # type: ignore[union-attr]
214
+ if isinstance(src, str) and src.startswith("data:image/"):
215
+ # Remove the entire anchor tag if it contains base64 image
216
+ a_tag.decompose()
217
+ removed_anchors += 1
218
+ break
219
+ print(
220
+ f"DEBUG: Removed {removed_anchors} anchor tags with base64 images"
189
221
  )
190
222
 
191
- # Filter HTML by tags if specified
192
- if _preferences.include_tags or _preferences.exclude_tags:
193
- from bs4 import BeautifulSoup
194
-
195
- soup = BeautifulSoup(html, "html.parser")
196
-
197
- if _preferences.only_main_content:
198
- # Try to find main content area
199
- main_content = (
200
- soup.find("main")
201
- or soup.find("article")
202
- or soup.find("div", {"id": "content"})
203
- or soup.find("div", {"class": "content"})
204
- )
205
- if main_content:
206
- soup = BeautifulSoup(str(main_content), "html.parser")
207
-
208
- if _preferences.exclude_tags:
209
- for tag in _preferences.exclude_tags:
210
- for element in soup.find_all(tag):
211
- element.decompose()
212
-
213
- if _preferences.include_tags:
214
- # Keep only specified tags
215
- new_soup = BeautifulSoup("", "html.parser")
216
- for tag in _preferences.include_tags:
217
- for element in soup.find_all(tag):
218
- new_soup.append(element)
219
- soup = new_soup
220
-
221
- html = str(soup)
222
-
223
- # Convert to markdown
224
- markdown = convert(html)
225
-
226
- # Prepare and send prompt
227
- _prompt = _PROMPT.compile(
228
- user_instructions=prompt or "Not provided.", markdown=markdown
229
- )
230
-
231
- if isinstance(self.llm, GenerationProvider):
232
- response = await self.llm.generate_by_prompt_async(
233
- prompt=_prompt,
234
- model=self.model,
235
- developer_prompt=_INSTRUCTIONS,
236
- response_schema=output,
223
+ # Remove standalone img tags with base64 src
224
+ removed_imgs = 0
225
+ for img in soup.find_all("img"):
226
+ src = img.attrs.get("src") if hasattr(img, "attrs") else None # type: ignore[union-attr]
227
+ if isinstance(src, str) and src.startswith("data:image/"):
228
+ img.decompose()
229
+ removed_imgs += 1
230
+ print(f"DEBUG: Removed {removed_imgs} standalone img tags")
231
+
232
+ # Remove any element with base64 in href (like anchor tags with image data)
233
+ for elem in soup.find_all(attrs={"href": True}):
234
+ href = elem.attrs.get("href") if hasattr(elem, "attrs") else None # type: ignore[union-attr]
235
+ if isinstance(href, str) and href.startswith("data:image/"):
236
+ elem.decompose()
237
+
238
+ # Remove any element with base64 in style attribute
239
+ for elem in soup.find_all(attrs={"style": True}):
240
+ style = elem.attrs.get("style") if hasattr(elem, "attrs") else None # type: ignore[union-attr]
241
+ if isinstance(style, str) and "data:image/" in style:
242
+ elem.decompose()
243
+
244
+ # Remove SVG tags (they often contain base64 or are converted to base64 by markdown)
245
+ for svg in soup.find_all("svg"):
246
+ svg.decompose()
247
+
248
+ # Remove any anchor tags that contain SVG children
249
+ for a_tag in soup.find_all("a"):
250
+ if a_tag.find("svg"): # type: ignore[union-attr]
251
+ a_tag.decompose()
252
+
253
+ # Final check: see if any base64 remains in the HTML string
254
+ html_str = str(soup)
255
+ remaining = len(re.findall(r'data:image/[^"\')\s]+', html_str))
256
+ print(
257
+ f"DEBUG: After processing, {remaining} base64 data URIs remain in HTML"
237
258
  )
238
- else:
239
- response = await self.llm.respond_async(
240
- input=_prompt,
241
- model=self.model,
242
- instructions=_INSTRUCTIONS,
243
- reasoning=self.reasoning,
244
- text_format=output,
259
+
260
+ # Extract main content if requested
261
+ if _preferences.only_main_content:
262
+ main_content = (
263
+ soup.find("main")
264
+ or soup.find("article")
265
+ or soup.find("div", {"id": "content"})
266
+ or soup.find("div", {"class": "content"})
245
267
  )
268
+ if main_content:
269
+ soup = main_content # type: ignore[assignment]
246
270
 
247
- output_parsed = (
248
- response.parsed
249
- if isinstance(response, Generation)
250
- else response.output_parsed
251
- )
271
+ # Exclude specific tags
272
+ if _preferences.exclude_tags:
273
+ for tag in _preferences.exclude_tags:
274
+ for element in soup.find_all(tag): # type: ignore[union-attr]
275
+ element.decompose()
276
+
277
+ # Include only specific tags
278
+ if _preferences.include_tags:
279
+ new_soup = BeautifulSoup("", "html.parser")
280
+ for tag in _preferences.include_tags:
281
+ for element in soup.find_all(tag): # type: ignore[union-attr]
282
+ new_soup.append(element) # type: ignore[arg-type]
283
+ soup = new_soup
284
+
285
+ html = str(soup)
252
286
 
253
- await browser.close()
287
+ # Convert to markdown
288
+ markdown = convert(html)
289
+ return html, markdown
254
290
 
255
- return ExtractionResult[T](
291
+ def extract[T: BaseModel](
292
+ self,
293
+ browser: Browser,
294
+ urls: Sequence[str],
295
+ output: type[T],
296
+ prompt: str | None = None,
297
+ extraction_preferences: ExtractionPreferences | None = None,
298
+ ignore_invalid_urls: bool = True,
299
+ ) -> ExtractionResult[T]:
300
+ return run_sync(
301
+ self.extract_async(
302
+ browser=browser,
256
303
  urls=urls,
257
- html=html,
258
- markdown=markdown,
259
- extraction_preferences=_preferences,
260
- output_parsed=output_parsed,
304
+ output=output,
305
+ prompt=prompt,
306
+ extraction_preferences=extraction_preferences,
307
+ ignore_invalid_urls=ignore_invalid_urls,
261
308
  )
309
+ )
262
310
 
311
+ @needs("playwright")
312
+ async def extract_async[T: BaseModel](
313
+ self,
314
+ browser: Browser,
315
+ urls: Sequence[str],
316
+ output: type[T],
317
+ prompt: str | None = None,
318
+ extraction_preferences: ExtractionPreferences | None = None,
319
+ ignore_invalid_urls: bool = True,
320
+ ) -> ExtractionResult[T]:
321
+ _preferences = extraction_preferences or ExtractionPreferences()
263
322
 
264
- if __name__ == "__main__":
323
+ html, markdown = await self.extract_markdown_async(
324
+ browser=browser,
325
+ urls=urls,
326
+ extraction_preferences=_preferences,
327
+ ignore_invalid_urls=ignore_invalid_urls,
328
+ )
329
+
330
+ # Prepare and send prompt
331
+ _prompt = _PROMPT.compile(
332
+ user_instructions=prompt or "Not provided.", markdown=markdown
333
+ )
334
+
335
+ if isinstance(self.llm, GenerationProvider):
336
+ response = await self.llm.generate_by_prompt_async(
337
+ prompt=_prompt,
338
+ model=self.model,
339
+ developer_prompt=_INSTRUCTIONS,
340
+ response_schema=output,
341
+ )
342
+ else:
343
+ response = await self.llm.respond_async(
344
+ input=_prompt,
345
+ model=self.model,
346
+ instructions=_INSTRUCTIONS,
347
+ reasoning=self.reasoning,
348
+ text_format=output,
349
+ )
350
+
351
+ output_parsed = (
352
+ response.parsed
353
+ if isinstance(response, Generation)
354
+ else response.output_parsed
355
+ )
356
+
357
+ await browser.close()
358
+
359
+ return ExtractionResult[T](
360
+ urls=urls,
361
+ html=html,
362
+ markdown=markdown,
363
+ extraction_preferences=_preferences,
364
+ output_parsed=output_parsed,
365
+ )
366
+
367
+
368
+ async def test() -> None:
265
369
  from dotenv import load_dotenv
370
+ from playwright import async_api
266
371
 
267
372
  load_dotenv()
268
373
 
@@ -272,8 +377,8 @@ if __name__ == "__main__":
272
377
  possiveis_redirecionamentos: list[str]
273
378
 
274
379
  extractor = Extractor(
275
- llm=Responder.openai(),
276
- model="gpt-5-nano",
380
+ llm=Responder.openrouter(),
381
+ model="google/gemini-2.5-flash",
277
382
  )
278
383
 
279
384
  # Example with custom extraction preferences
@@ -285,12 +390,20 @@ if __name__ == "__main__":
285
390
  timeout_ms=15000,
286
391
  )
287
392
 
288
- result = extractor.extract(
289
- urls=[site_uniube],
290
- output=PossiveisRedirecionamentos,
291
- prompt="Extract the possible redirects from the page.",
292
- extraction_preferences=preferences,
293
- )
393
+ async with async_api.async_playwright() as p:
394
+ browser = await p.chromium.launch(headless=True)
395
+
396
+ result = await extractor.extract_async(
397
+ browser=browser,
398
+ urls=[site_uniube],
399
+ output=PossiveisRedirecionamentos,
400
+ prompt="Extract the possible redirects from the page.",
401
+ extraction_preferences=preferences,
402
+ )
403
+
404
+ for link in result.output_parsed.possiveis_redirecionamentos:
405
+ print(f"Link: {link}")
294
406
 
295
- for link in result.output_parsed.possiveis_redirecionamentos:
296
- print(f"Link: {link}")
407
+
408
+ if __name__ == "__main__":
409
+ asyncio.run(test())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentle
3
- Version: 0.9.25
3
+ Version: 0.9.27
4
4
  Summary: ...
5
5
  Author-email: Arthur Brenno <64020210+arthurbrenno@users.noreply.github.com>
6
6
  License-File: LICENSE
@@ -63,7 +63,7 @@ agentle/agents/a2a/tasks/managment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
63
63
  agentle/agents/a2a/tasks/managment/in_memory.py,sha256=_G5VuXqEPBMtE6XJg1d7WmqFr1qzd0-99FoqM_qMwAE,23841
64
64
  agentle/agents/a2a/tasks/managment/task_manager.py,sha256=rBCuzu4DqIs55xDnwXY0w5Rs9ybv6OJpgpugAQLhtoU,3112
65
65
  agentle/agents/apis/__init__.py,sha256=PX7oAe0hRGvyLB295DrBF1VBsqgp5ZmGI4BCZvLUozo,2811
66
- agentle/agents/apis/api.py,sha256=dl146HQwawq4ll8lEcWl7ZM46-FBBgmX_TXEopY-Le8,25709
66
+ agentle/agents/apis/api.py,sha256=PyYcZJJR0f8c7VceQMLzUfnvj3belNbCJJNmVdXfdXo,26610
67
67
  agentle/agents/apis/api_key_authentication.py,sha256=MtMA4qkCjM3ou42a1fDgKI4u3NQkB_Zr-gEA5_oysZ0,1311
68
68
  agentle/agents/apis/api_key_location.py,sha256=0pj_8rTkd0pkUJ2eP_Kur3AvT3JD8JpFIxQsWDzeg_c,188
69
69
  agentle/agents/apis/api_metrics.py,sha256=SyvJdvEMKp7rGij-tDZ6vxjwc26MIFrKrcckMc4Q1Zg,380
@@ -78,7 +78,7 @@ agentle/agents/apis/cache_strategy.py,sha256=uoAvmUm1EE8426anuLR4PoygLgO4SPN1Qm_
78
78
  agentle/agents/apis/circuit_breaker.py,sha256=9yopLPoZ6WMlPMWPoacayORoGN1CZq2EGWB0gCu3GOY,2371
79
79
  agentle/agents/apis/circuit_breaker_error.py,sha256=I5XyCWwFCXTDzhvS7CpMZwBRxMyCk96g7MfBLBqAvhg,127
80
80
  agentle/agents/apis/circuit_breaker_state.py,sha256=6IwcWWKNmE0cnpogcfsnhpy_EVIk7crQ7WiDN9YkkbE,277
81
- agentle/agents/apis/endpoint.py,sha256=CIcGzzQmvT1WVgpmFgUOvrF5DTQ4r0V0oOE6NPST6nc,22147
81
+ agentle/agents/apis/endpoint.py,sha256=4skIMje2oEbc6ONb-Ww66wh8VGP5tj1JZACE3p7U62E,22600
82
82
  agentle/agents/apis/endpoint_parameter.py,sha256=A_SVje6AyNeeJNDxWL__uGc4ZNZ2se6GvawS--kUYEU,20230
83
83
  agentle/agents/apis/endpoints_to_tools.py,sha256=5KzxRLjfUYx7MGKHe65NMPHyTKOcd8tdK0uYzfGnO5g,999
84
84
  agentle/agents/apis/file_upload.py,sha256=PzJ1197EKLCdBHrLDztgifM3WWFQZ8K2CKkTeeYpJas,587
@@ -137,7 +137,7 @@ agentle/agents/ui/__init__.py,sha256=IjHRV0k2DNwvFrEHebmsXiBvmITE8nQUnsR07h9tVkU
137
137
  agentle/agents/ui/streamlit.py,sha256=9afICL0cxtG1o2pWh6vH39-NdKiVfADKiXo405F2aB0,42829
138
138
  agentle/agents/whatsapp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
139
  agentle/agents/whatsapp/human_delay_calculator.py,sha256=BGCDeoNTPsMn4d_QYmG0BWGCG8SiUJC6Fk295ulAsAk,18268
140
- agentle/agents/whatsapp/whatsapp_bot.py,sha256=D51UD2Wbi47RKgxUU8J7iWsxNMqV99Rzujz6TV1DjGw,160179
140
+ agentle/agents/whatsapp/whatsapp_bot.py,sha256=7B1ZCQYMJ6qgRINFzSQVSshDr-4L1F5H3MHqJYI7w3g,161962
141
141
  agentle/agents/whatsapp/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
142
142
  agentle/agents/whatsapp/models/audio_message.py,sha256=kUqG1HdNW6DCYD-CqscJ6WHlAyv9ufmTSKMdjio9XWk,2705
143
143
  agentle/agents/whatsapp/models/context_info.py,sha256=sk80KuNE36S6VRnLh7n6UXmzZCXIB4E4lNxnRyVizg8,563
@@ -161,6 +161,7 @@ agentle/agents/whatsapp/models/whatsapp_media_message.py,sha256=-FY15vezGILzGMiU
161
161
  agentle/agents/whatsapp/models/whatsapp_message.py,sha256=QtGAJKOF1ykZycsNDld25gk-JUeg3uV7hNXx0ZXO0Rg,1217
162
162
  agentle/agents/whatsapp/models/whatsapp_message_status.py,sha256=jDWShdvSve5EhkgtDkh1jZmpRVNoXCokv4M6at1eSIU,214
163
163
  agentle/agents/whatsapp/models/whatsapp_message_type.py,sha256=GctIGOC1Bc_D_L0ehEmEwgxePFx0ioTEUoBlZEdxdG8,279
164
+ agentle/agents/whatsapp/models/whatsapp_response_base.py,sha256=IIDONx9Ipt593tAZvoc8dPDUISeNH-WOpRP1x_-Q6Gk,1145
164
165
  agentle/agents/whatsapp/models/whatsapp_session.py,sha256=9G1HC-A2G9jTdpwYy3w9bnYkOGK2vvA7kdYAf32oWMU,15640
165
166
  agentle/agents/whatsapp/models/whatsapp_text_message.py,sha256=GpSwFrPC4qpQlVCWKKgYjQJKNv0qvwgYfuoD3ttLzdQ,441
166
167
  agentle/agents/whatsapp/models/whatsapp_video_message.py,sha256=-d-4hnkkxyLVNoje3a1pOEAvzWqoCLFcBn70wUpnyXY,346
@@ -1003,7 +1004,7 @@ agentle/voice_cloning/voice_cloner.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
1003
1004
  agentle/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1004
1005
  agentle/web/extraction_preferences.py,sha256=Xb4X6ZgnbDuu4Pp7cI0sdPcv6LaR1Q94FPTNEoHVTGg,985
1005
1006
  agentle/web/extraction_result.py,sha256=IsbRdT_wA9RVYGToCiz17XRoWMTtiFzxky96Zwqa4ZY,318
1006
- agentle/web/extractor.py,sha256=x3KlbJkKSnTaUD9vAx3n2jev-PKgcAlnGJxBPOdwFoM,10997
1007
+ agentle/web/extractor.py,sha256=ISNYVoofry47HtB0oDhmb2Eof15ZhTL-qdyes1mYSbQ,15585
1007
1008
  agentle/web/location.py,sha256=RZgqb2rW7wUdcbw3PnmDtfr4FkTSSovW0j70ZOvoRiw,64
1008
1009
  agentle/web/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1009
1010
  agentle/web/actions/action.py,sha256=krxW5vXaqB1_JfnPpuo5cVJyANrlElu9P0B0TrF_aZs,723
@@ -1017,7 +1018,7 @@ agentle/web/actions/scroll.py,sha256=WqVVAORNDK3BL1oASZBPmXJYeSVkPgAOmWA8ibYO82I
1017
1018
  agentle/web/actions/viewport.py,sha256=KCwm88Pri19Qc6GLHC69HsRxmdJz1gEEAODfggC_fHo,287
1018
1019
  agentle/web/actions/wait.py,sha256=IKEywjf-KC4ni9Gkkv4wgc7bY-hk7HwD4F-OFWlyf2w,571
1019
1020
  agentle/web/actions/write_text.py,sha256=9mxfHcpKs_L7BsDnJvOYHQwG8M0GWe61SRJAsKk3xQ8,748
1020
- agentle-0.9.25.dist-info/METADATA,sha256=FQFBCmIzX4o197z2k9UyswQwuTBJOuAfRqEQelGDtwo,86849
1021
- agentle-0.9.25.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1022
- agentle-0.9.25.dist-info/licenses/LICENSE,sha256=T90S9vqRS6qP-voULxAcvwEs558wRRo6dHuZrjgcOUI,1085
1023
- agentle-0.9.25.dist-info/RECORD,,
1021
+ agentle-0.9.27.dist-info/METADATA,sha256=6AlFskgDL86WD5HmWjtRGLwjygJgG2rkMsTbmFUQsJ0,86849
1022
+ agentle-0.9.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
1023
+ agentle-0.9.27.dist-info/licenses/LICENSE,sha256=T90S9vqRS6qP-voULxAcvwEs558wRRo6dHuZrjgcOUI,1085
1024
+ agentle-0.9.27.dist-info/RECORD,,