agentle 0.9.24__py3-none-any.whl → 0.9.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentle/agents/apis/api.py +25 -7
- agentle/agents/apis/endpoint.py +14 -8
- agentle/agents/whatsapp/human_delay_calculator.py +462 -0
- agentle/agents/whatsapp/models/audio_message.py +6 -4
- agentle/agents/whatsapp/models/whatsapp_bot_config.py +352 -21
- agentle/agents/whatsapp/v2/__init__.py +0 -0
- agentle/agents/whatsapp/v2/batch_processor_manager.py +4 -0
- agentle/agents/whatsapp/v2/bot_config.py +188 -0
- agentle/agents/whatsapp/v2/in_memory_batch_processor_manager.py +0 -0
- agentle/agents/whatsapp/v2/message_limit.py +9 -0
- agentle/agents/whatsapp/v2/payload.py +0 -0
- agentle/agents/whatsapp/v2/whatsapp_bot.py +13 -0
- agentle/agents/whatsapp/v2/whatsapp_cloud_api_provider.py +0 -0
- agentle/agents/whatsapp/v2/whatsapp_provider.py +0 -0
- agentle/agents/whatsapp/whatsapp_bot.py +559 -12
- agentle/web/extractor.py +282 -165
- {agentle-0.9.24.dist-info → agentle-0.9.26.dist-info}/METADATA +1 -1
- {agentle-0.9.24.dist-info → agentle-0.9.26.dist-info}/RECORD +20 -10
- {agentle-0.9.24.dist-info → agentle-0.9.26.dist-info}/WHEEL +0 -0
- {agentle-0.9.24.dist-info → agentle-0.9.26.dist-info}/licenses/LICENSE +0 -0
agentle/web/extractor.py
CHANGED
|
@@ -1,11 +1,16 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
2
4
|
from collections.abc import Sequence
|
|
3
5
|
from textwrap import dedent
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
4
7
|
|
|
5
8
|
from html_to_markdown import convert
|
|
6
|
-
from
|
|
9
|
+
from rsb.coroutines.run_sync import run_sync
|
|
7
10
|
from rsb.models import Field
|
|
8
11
|
from rsb.models.base_model import BaseModel
|
|
12
|
+
from rsb.models.config_dict import ConfigDict
|
|
13
|
+
|
|
9
14
|
from agentle.generations.models.generation.generation import Generation
|
|
10
15
|
from agentle.generations.providers.base.generation_provider import GenerationProvider
|
|
11
16
|
from agentle.prompts.models.prompt import Prompt
|
|
@@ -16,6 +21,10 @@ from agentle.web.actions.action import Action
|
|
|
16
21
|
from agentle.web.extraction_preferences import ExtractionPreferences
|
|
17
22
|
from agentle.web.extraction_result import ExtractionResult
|
|
18
23
|
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from playwright.async_api import Browser, Geolocation, ViewportSize
|
|
26
|
+
|
|
27
|
+
|
|
19
28
|
_INSTRUCTIONS = Prompt.from_text(
|
|
20
29
|
dedent("""\
|
|
21
30
|
<character>
|
|
@@ -60,31 +69,30 @@ class Extractor(BaseModel):
|
|
|
60
69
|
model: str | None = Field(default=None)
|
|
61
70
|
max_output_tokens: int | None = Field(default=None)
|
|
62
71
|
|
|
63
|
-
|
|
72
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
73
|
+
|
|
74
|
+
def extract_markdown(
|
|
64
75
|
self,
|
|
76
|
+
browser: Browser,
|
|
65
77
|
urls: Sequence[str],
|
|
66
|
-
output: type[T],
|
|
67
|
-
prompt: str | None = None,
|
|
68
78
|
extraction_preferences: ExtractionPreferences | None = None,
|
|
69
79
|
ignore_invalid_urls: bool = True,
|
|
70
|
-
) ->
|
|
80
|
+
) -> tuple[str, str]:
|
|
71
81
|
return run_sync(
|
|
72
|
-
self.
|
|
73
|
-
|
|
74
|
-
|
|
82
|
+
self.extract_markdown_async,
|
|
83
|
+
browser=browser,
|
|
84
|
+
urls=urls,
|
|
85
|
+
extraction_preferences=extraction_preferences,
|
|
86
|
+
ignore_invalid_urls=ignore_invalid_urls,
|
|
75
87
|
)
|
|
76
88
|
|
|
77
|
-
|
|
78
|
-
async def extract_async[T: BaseModel](
|
|
89
|
+
async def extract_markdown_async(
|
|
79
90
|
self,
|
|
91
|
+
browser: Browser,
|
|
80
92
|
urls: Sequence[str],
|
|
81
|
-
output: type[T],
|
|
82
|
-
prompt: str | None = None,
|
|
83
93
|
extraction_preferences: ExtractionPreferences | None = None,
|
|
84
94
|
ignore_invalid_urls: bool = True,
|
|
85
|
-
) ->
|
|
86
|
-
from playwright import async_api
|
|
87
|
-
|
|
95
|
+
) -> tuple[str, str]:
|
|
88
96
|
_preferences = extraction_preferences or ExtractionPreferences()
|
|
89
97
|
_actions: Sequence[Action] = _preferences.actions or []
|
|
90
98
|
|
|
@@ -94,171 +102,272 @@ class Extractor(BaseModel):
|
|
|
94
102
|
# This is a placeholder for proxy configuration
|
|
95
103
|
pass
|
|
96
104
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
geolocation
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
geolocation = Geolocation(
|
|
115
|
-
latitude=getattr(_preferences.location, "latitude", 0),
|
|
116
|
-
longitude=getattr(_preferences.location, "longitude", 0),
|
|
117
|
-
)
|
|
118
|
-
permissions = ["geolocation"]
|
|
119
|
-
|
|
120
|
-
context = await browser.new_context(
|
|
121
|
-
viewport=viewport,
|
|
122
|
-
user_agent=user_agent,
|
|
123
|
-
is_mobile=is_mobile,
|
|
124
|
-
extra_http_headers=_preferences.headers,
|
|
125
|
-
ignore_https_errors=_preferences.skip_tls_verification,
|
|
126
|
-
geolocation=geolocation,
|
|
127
|
-
permissions=permissions,
|
|
105
|
+
# Build context options properly based on preferences
|
|
106
|
+
if _preferences.mobile:
|
|
107
|
+
viewport: ViewportSize | None = ViewportSize(width=375, height=667)
|
|
108
|
+
user_agent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15"
|
|
109
|
+
is_mobile = True
|
|
110
|
+
else:
|
|
111
|
+
viewport = None
|
|
112
|
+
user_agent = None
|
|
113
|
+
is_mobile = None
|
|
114
|
+
|
|
115
|
+
# Handle geolocation
|
|
116
|
+
geolocation: Geolocation | None = None
|
|
117
|
+
permissions = None
|
|
118
|
+
if _preferences.location:
|
|
119
|
+
geolocation = Geolocation(
|
|
120
|
+
latitude=getattr(_preferences.location, "latitude", 0),
|
|
121
|
+
longitude=getattr(_preferences.location, "longitude", 0),
|
|
128
122
|
)
|
|
123
|
+
permissions = ["geolocation"]
|
|
124
|
+
|
|
125
|
+
context = await browser.new_context(
|
|
126
|
+
viewport=viewport,
|
|
127
|
+
user_agent=user_agent,
|
|
128
|
+
is_mobile=is_mobile,
|
|
129
|
+
extra_http_headers=_preferences.headers,
|
|
130
|
+
ignore_https_errors=_preferences.skip_tls_verification,
|
|
131
|
+
geolocation=geolocation,
|
|
132
|
+
permissions=permissions,
|
|
133
|
+
)
|
|
129
134
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
else route.continue_(),
|
|
135
|
+
# Block ads if specified
|
|
136
|
+
if _preferences.block_ads:
|
|
137
|
+
await context.route(
|
|
138
|
+
"**/*",
|
|
139
|
+
lambda route: route.abort()
|
|
140
|
+
if route.request.resource_type in ["image", "media", "font"]
|
|
141
|
+
and any(
|
|
142
|
+
ad_domain in route.request.url
|
|
143
|
+
for ad_domain in [
|
|
144
|
+
"doubleclick.net",
|
|
145
|
+
"googlesyndication.com",
|
|
146
|
+
"adservice.google.com",
|
|
147
|
+
"ads",
|
|
148
|
+
"analytics",
|
|
149
|
+
"tracking",
|
|
150
|
+
]
|
|
148
151
|
)
|
|
152
|
+
else route.continue_(),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
page = await context.new_page()
|
|
149
156
|
|
|
150
|
-
|
|
157
|
+
for url in urls:
|
|
158
|
+
# Set timeout if specified
|
|
159
|
+
timeout = _preferences.timeout_ms if _preferences.timeout_ms else 30000
|
|
151
160
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
timeout = _preferences.timeout_ms if _preferences.timeout_ms else 30000
|
|
161
|
+
try:
|
|
162
|
+
await page.goto(url, timeout=timeout)
|
|
155
163
|
|
|
156
|
-
|
|
157
|
-
|
|
164
|
+
# Wait for specified time if configured
|
|
165
|
+
if _preferences.wait_for_ms:
|
|
166
|
+
await page.wait_for_timeout(_preferences.wait_for_ms)
|
|
158
167
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
168
|
+
# Execute actions
|
|
169
|
+
for action in _actions:
|
|
170
|
+
await action.execute(page)
|
|
162
171
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
172
|
+
except Exception as e:
|
|
173
|
+
if ignore_invalid_urls:
|
|
174
|
+
print(f"Warning: Failed to load {url}: {e}")
|
|
175
|
+
continue
|
|
176
|
+
else:
|
|
177
|
+
raise
|
|
166
178
|
|
|
167
|
-
|
|
168
|
-
if ignore_invalid_urls:
|
|
169
|
-
print(f"Warning: Failed to load {url}: {e}")
|
|
170
|
-
continue
|
|
171
|
-
else:
|
|
172
|
-
raise
|
|
179
|
+
html = await page.content()
|
|
173
180
|
|
|
174
|
-
|
|
181
|
+
# Process HTML based on preferences - consolidate all BeautifulSoup operations
|
|
182
|
+
if (
|
|
183
|
+
_preferences.remove_base_64_images
|
|
184
|
+
or _preferences.include_tags
|
|
185
|
+
or _preferences.exclude_tags
|
|
186
|
+
or _preferences.only_main_content
|
|
187
|
+
):
|
|
188
|
+
from bs4 import BeautifulSoup
|
|
175
189
|
|
|
176
|
-
|
|
190
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
191
|
+
|
|
192
|
+
# Remove base64 images first
|
|
177
193
|
if _preferences.remove_base_64_images:
|
|
178
194
|
import re
|
|
179
195
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
196
|
+
# Debug: Check what we have before processing
|
|
197
|
+
all_imgs = soup.find_all("img")
|
|
198
|
+
print(f"DEBUG: Found {len(all_imgs)} img tags total")
|
|
199
|
+
base64_count = 0
|
|
200
|
+
for img in all_imgs:
|
|
201
|
+
src = img.attrs.get("src") if hasattr(img, "attrs") else None # type: ignore[union-attr]
|
|
202
|
+
if isinstance(src, str) and "data:image/" in src:
|
|
203
|
+
base64_count += 1
|
|
204
|
+
print(f"DEBUG: Found base64 img: {src[:100]}...")
|
|
205
|
+
print(f"DEBUG: {base64_count} images have base64 data")
|
|
206
|
+
|
|
207
|
+
# First, remove any anchor tags that contain img children with base64
|
|
208
|
+
# (must be done before removing img tags themselves)
|
|
209
|
+
removed_anchors = 0
|
|
210
|
+
for a_tag in soup.find_all("a"):
|
|
211
|
+
imgs = a_tag.find_all("img") # type: ignore[union-attr]
|
|
212
|
+
for img in imgs:
|
|
213
|
+
src = img.attrs.get("src") if hasattr(img, "attrs") else None # type: ignore[union-attr]
|
|
214
|
+
if isinstance(src, str) and src.startswith("data:image/"):
|
|
215
|
+
# Remove the entire anchor tag if it contains base64 image
|
|
216
|
+
a_tag.decompose()
|
|
217
|
+
removed_anchors += 1
|
|
218
|
+
break
|
|
219
|
+
print(
|
|
220
|
+
f"DEBUG: Removed {removed_anchors} anchor tags with base64 images"
|
|
185
221
|
)
|
|
186
222
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
# Prepare and send prompt
|
|
223
|
-
_prompt = _PROMPT.compile(
|
|
224
|
-
user_instructions=prompt or "Not provided.", markdown=markdown
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
if isinstance(self.llm, GenerationProvider):
|
|
228
|
-
response = await self.llm.generate_by_prompt_async(
|
|
229
|
-
prompt=_prompt,
|
|
230
|
-
model=self.model,
|
|
231
|
-
developer_prompt=_INSTRUCTIONS,
|
|
232
|
-
response_schema=output,
|
|
223
|
+
# Remove standalone img tags with base64 src
|
|
224
|
+
removed_imgs = 0
|
|
225
|
+
for img in soup.find_all("img"):
|
|
226
|
+
src = img.attrs.get("src") if hasattr(img, "attrs") else None # type: ignore[union-attr]
|
|
227
|
+
if isinstance(src, str) and src.startswith("data:image/"):
|
|
228
|
+
img.decompose()
|
|
229
|
+
removed_imgs += 1
|
|
230
|
+
print(f"DEBUG: Removed {removed_imgs} standalone img tags")
|
|
231
|
+
|
|
232
|
+
# Remove any element with base64 in href (like anchor tags with image data)
|
|
233
|
+
for elem in soup.find_all(attrs={"href": True}):
|
|
234
|
+
href = elem.attrs.get("href") if hasattr(elem, "attrs") else None # type: ignore[union-attr]
|
|
235
|
+
if isinstance(href, str) and href.startswith("data:image/"):
|
|
236
|
+
elem.decompose()
|
|
237
|
+
|
|
238
|
+
# Remove any element with base64 in style attribute
|
|
239
|
+
for elem in soup.find_all(attrs={"style": True}):
|
|
240
|
+
style = elem.attrs.get("style") if hasattr(elem, "attrs") else None # type: ignore[union-attr]
|
|
241
|
+
if isinstance(style, str) and "data:image/" in style:
|
|
242
|
+
elem.decompose()
|
|
243
|
+
|
|
244
|
+
# Remove SVG tags (they often contain base64 or are converted to base64 by markdown)
|
|
245
|
+
for svg in soup.find_all("svg"):
|
|
246
|
+
svg.decompose()
|
|
247
|
+
|
|
248
|
+
# Remove any anchor tags that contain SVG children
|
|
249
|
+
for a_tag in soup.find_all("a"):
|
|
250
|
+
if a_tag.find("svg"): # type: ignore[union-attr]
|
|
251
|
+
a_tag.decompose()
|
|
252
|
+
|
|
253
|
+
# Final check: see if any base64 remains in the HTML string
|
|
254
|
+
html_str = str(soup)
|
|
255
|
+
remaining = len(re.findall(r'data:image/[^"\')\s]+', html_str))
|
|
256
|
+
print(
|
|
257
|
+
f"DEBUG: After processing, {remaining} base64 data URIs remain in HTML"
|
|
233
258
|
)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
259
|
+
|
|
260
|
+
# Extract main content if requested
|
|
261
|
+
if _preferences.only_main_content:
|
|
262
|
+
main_content = (
|
|
263
|
+
soup.find("main")
|
|
264
|
+
or soup.find("article")
|
|
265
|
+
or soup.find("div", {"id": "content"})
|
|
266
|
+
or soup.find("div", {"class": "content"})
|
|
241
267
|
)
|
|
268
|
+
if main_content:
|
|
269
|
+
soup = main_content # type: ignore[assignment]
|
|
242
270
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
271
|
+
# Exclude specific tags
|
|
272
|
+
if _preferences.exclude_tags:
|
|
273
|
+
for tag in _preferences.exclude_tags:
|
|
274
|
+
for element in soup.find_all(tag): # type: ignore[union-attr]
|
|
275
|
+
element.decompose()
|
|
276
|
+
|
|
277
|
+
# Include only specific tags
|
|
278
|
+
if _preferences.include_tags:
|
|
279
|
+
new_soup = BeautifulSoup("", "html.parser")
|
|
280
|
+
for tag in _preferences.include_tags:
|
|
281
|
+
for element in soup.find_all(tag): # type: ignore[union-attr]
|
|
282
|
+
new_soup.append(element) # type: ignore[arg-type]
|
|
283
|
+
soup = new_soup
|
|
284
|
+
|
|
285
|
+
html = str(soup)
|
|
248
286
|
|
|
249
|
-
|
|
287
|
+
# Convert to markdown
|
|
288
|
+
markdown = convert(html)
|
|
289
|
+
return html, markdown
|
|
250
290
|
|
|
251
|
-
|
|
291
|
+
def extract[T: BaseModel](
|
|
292
|
+
self,
|
|
293
|
+
browser: Browser,
|
|
294
|
+
urls: Sequence[str],
|
|
295
|
+
output: type[T],
|
|
296
|
+
prompt: str | None = None,
|
|
297
|
+
extraction_preferences: ExtractionPreferences | None = None,
|
|
298
|
+
ignore_invalid_urls: bool = True,
|
|
299
|
+
) -> ExtractionResult[T]:
|
|
300
|
+
return run_sync(
|
|
301
|
+
self.extract_async(
|
|
302
|
+
browser=browser,
|
|
252
303
|
urls=urls,
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
extraction_preferences=
|
|
256
|
-
|
|
304
|
+
output=output,
|
|
305
|
+
prompt=prompt,
|
|
306
|
+
extraction_preferences=extraction_preferences,
|
|
307
|
+
ignore_invalid_urls=ignore_invalid_urls,
|
|
257
308
|
)
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@needs("playwright")
|
|
312
|
+
async def extract_async[T: BaseModel](
|
|
313
|
+
self,
|
|
314
|
+
browser: Browser,
|
|
315
|
+
urls: Sequence[str],
|
|
316
|
+
output: type[T],
|
|
317
|
+
prompt: str | None = None,
|
|
318
|
+
extraction_preferences: ExtractionPreferences | None = None,
|
|
319
|
+
ignore_invalid_urls: bool = True,
|
|
320
|
+
) -> ExtractionResult[T]:
|
|
321
|
+
_preferences = extraction_preferences or ExtractionPreferences()
|
|
258
322
|
|
|
323
|
+
html, markdown = await self.extract_markdown_async(
|
|
324
|
+
browser=browser,
|
|
325
|
+
urls=urls,
|
|
326
|
+
extraction_preferences=_preferences,
|
|
327
|
+
ignore_invalid_urls=ignore_invalid_urls,
|
|
328
|
+
)
|
|
259
329
|
|
|
260
|
-
|
|
330
|
+
# Prepare and send prompt
|
|
331
|
+
_prompt = _PROMPT.compile(
|
|
332
|
+
user_instructions=prompt or "Not provided.", markdown=markdown
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if isinstance(self.llm, GenerationProvider):
|
|
336
|
+
response = await self.llm.generate_by_prompt_async(
|
|
337
|
+
prompt=_prompt,
|
|
338
|
+
model=self.model,
|
|
339
|
+
developer_prompt=_INSTRUCTIONS,
|
|
340
|
+
response_schema=output,
|
|
341
|
+
)
|
|
342
|
+
else:
|
|
343
|
+
response = await self.llm.respond_async(
|
|
344
|
+
input=_prompt,
|
|
345
|
+
model=self.model,
|
|
346
|
+
instructions=_INSTRUCTIONS,
|
|
347
|
+
reasoning=self.reasoning,
|
|
348
|
+
text_format=output,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
output_parsed = (
|
|
352
|
+
response.parsed
|
|
353
|
+
if isinstance(response, Generation)
|
|
354
|
+
else response.output_parsed
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
await browser.close()
|
|
358
|
+
|
|
359
|
+
return ExtractionResult[T](
|
|
360
|
+
urls=urls,
|
|
361
|
+
html=html,
|
|
362
|
+
markdown=markdown,
|
|
363
|
+
extraction_preferences=_preferences,
|
|
364
|
+
output_parsed=output_parsed,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
async def test() -> None:
|
|
261
369
|
from dotenv import load_dotenv
|
|
370
|
+
from playwright import async_api
|
|
262
371
|
|
|
263
372
|
load_dotenv()
|
|
264
373
|
|
|
@@ -268,8 +377,8 @@ if __name__ == "__main__":
|
|
|
268
377
|
possiveis_redirecionamentos: list[str]
|
|
269
378
|
|
|
270
379
|
extractor = Extractor(
|
|
271
|
-
llm=Responder.
|
|
272
|
-
model="
|
|
380
|
+
llm=Responder.openrouter(),
|
|
381
|
+
model="google/gemini-2.5-flash",
|
|
273
382
|
)
|
|
274
383
|
|
|
275
384
|
# Example with custom extraction preferences
|
|
@@ -281,12 +390,20 @@ if __name__ == "__main__":
|
|
|
281
390
|
timeout_ms=15000,
|
|
282
391
|
)
|
|
283
392
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
393
|
+
async with async_api.async_playwright() as p:
|
|
394
|
+
browser = await p.chromium.launch(headless=True)
|
|
395
|
+
|
|
396
|
+
result = await extractor.extract_async(
|
|
397
|
+
browser=browser,
|
|
398
|
+
urls=[site_uniube],
|
|
399
|
+
output=PossiveisRedirecionamentos,
|
|
400
|
+
prompt="Extract the possible redirects from the page.",
|
|
401
|
+
extraction_preferences=preferences,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
for link in result.output_parsed.possiveis_redirecionamentos:
|
|
405
|
+
print(f"Link: {link}")
|
|
290
406
|
|
|
291
|
-
|
|
292
|
-
|
|
407
|
+
|
|
408
|
+
if __name__ == "__main__":
|
|
409
|
+
asyncio.run(test())
|
|
@@ -63,7 +63,7 @@ agentle/agents/a2a/tasks/managment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
|
63
63
|
agentle/agents/a2a/tasks/managment/in_memory.py,sha256=_G5VuXqEPBMtE6XJg1d7WmqFr1qzd0-99FoqM_qMwAE,23841
|
|
64
64
|
agentle/agents/a2a/tasks/managment/task_manager.py,sha256=rBCuzu4DqIs55xDnwXY0w5Rs9ybv6OJpgpugAQLhtoU,3112
|
|
65
65
|
agentle/agents/apis/__init__.py,sha256=PX7oAe0hRGvyLB295DrBF1VBsqgp5ZmGI4BCZvLUozo,2811
|
|
66
|
-
agentle/agents/apis/api.py,sha256=
|
|
66
|
+
agentle/agents/apis/api.py,sha256=PyYcZJJR0f8c7VceQMLzUfnvj3belNbCJJNmVdXfdXo,26610
|
|
67
67
|
agentle/agents/apis/api_key_authentication.py,sha256=MtMA4qkCjM3ou42a1fDgKI4u3NQkB_Zr-gEA5_oysZ0,1311
|
|
68
68
|
agentle/agents/apis/api_key_location.py,sha256=0pj_8rTkd0pkUJ2eP_Kur3AvT3JD8JpFIxQsWDzeg_c,188
|
|
69
69
|
agentle/agents/apis/api_metrics.py,sha256=SyvJdvEMKp7rGij-tDZ6vxjwc26MIFrKrcckMc4Q1Zg,380
|
|
@@ -78,7 +78,7 @@ agentle/agents/apis/cache_strategy.py,sha256=uoAvmUm1EE8426anuLR4PoygLgO4SPN1Qm_
|
|
|
78
78
|
agentle/agents/apis/circuit_breaker.py,sha256=9yopLPoZ6WMlPMWPoacayORoGN1CZq2EGWB0gCu3GOY,2371
|
|
79
79
|
agentle/agents/apis/circuit_breaker_error.py,sha256=I5XyCWwFCXTDzhvS7CpMZwBRxMyCk96g7MfBLBqAvhg,127
|
|
80
80
|
agentle/agents/apis/circuit_breaker_state.py,sha256=6IwcWWKNmE0cnpogcfsnhpy_EVIk7crQ7WiDN9YkkbE,277
|
|
81
|
-
agentle/agents/apis/endpoint.py,sha256=
|
|
81
|
+
agentle/agents/apis/endpoint.py,sha256=4skIMje2oEbc6ONb-Ww66wh8VGP5tj1JZACE3p7U62E,22600
|
|
82
82
|
agentle/agents/apis/endpoint_parameter.py,sha256=A_SVje6AyNeeJNDxWL__uGc4ZNZ2se6GvawS--kUYEU,20230
|
|
83
83
|
agentle/agents/apis/endpoints_to_tools.py,sha256=5KzxRLjfUYx7MGKHe65NMPHyTKOcd8tdK0uYzfGnO5g,999
|
|
84
84
|
agentle/agents/apis/file_upload.py,sha256=PzJ1197EKLCdBHrLDztgifM3WWFQZ8K2CKkTeeYpJas,587
|
|
@@ -136,9 +136,10 @@ agentle/agents/templates/data_collection/progressive_profilling_agent.py,sha256=
|
|
|
136
136
|
agentle/agents/ui/__init__.py,sha256=IjHRV0k2DNwvFrEHebmsXiBvmITE8nQUnsR07h9tVkU,7
|
|
137
137
|
agentle/agents/ui/streamlit.py,sha256=9afICL0cxtG1o2pWh6vH39-NdKiVfADKiXo405F2aB0,42829
|
|
138
138
|
agentle/agents/whatsapp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
|
-
agentle/agents/whatsapp/
|
|
139
|
+
agentle/agents/whatsapp/human_delay_calculator.py,sha256=BGCDeoNTPsMn4d_QYmG0BWGCG8SiUJC6Fk295ulAsAk,18268
|
|
140
|
+
agentle/agents/whatsapp/whatsapp_bot.py,sha256=D51UD2Wbi47RKgxUU8J7iWsxNMqV99Rzujz6TV1DjGw,160179
|
|
140
141
|
agentle/agents/whatsapp/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
141
|
-
agentle/agents/whatsapp/models/audio_message.py,sha256=
|
|
142
|
+
agentle/agents/whatsapp/models/audio_message.py,sha256=kUqG1HdNW6DCYD-CqscJ6WHlAyv9ufmTSKMdjio9XWk,2705
|
|
142
143
|
agentle/agents/whatsapp/models/context_info.py,sha256=sk80KuNE36S6VRnLh7n6UXmzZCXIB4E4lNxnRyVizg8,563
|
|
143
144
|
agentle/agents/whatsapp/models/data.py,sha256=iertlmc2zbzjuIdTubkO-4RuwB0ni1qx7UvvW7Dlhuo,1245
|
|
144
145
|
agentle/agents/whatsapp/models/device_list_metadata.py,sha256=Nki7esmgi9Zq70L2t4yjJH77clvaljStchVGrXiDlbU,808
|
|
@@ -151,7 +152,7 @@ agentle/agents/whatsapp/models/message_context_info.py,sha256=msCSuu8uMN3G9GDaXd
|
|
|
151
152
|
agentle/agents/whatsapp/models/quoted_message.py,sha256=QC4sp7eLPE9g9i-_f3avb0sDO7gKpkzZR2qkbxqptts,1073
|
|
152
153
|
agentle/agents/whatsapp/models/video_message.py,sha256=-wujSwdYaE3tst7K_rUYCvx6v4lySTW1JZ5burF6slg,3422
|
|
153
154
|
agentle/agents/whatsapp/models/whatsapp_audio_message.py,sha256=AAcnjzJC1O5VjyWZaSWpG_tmZFc2-CdcPn9abjyLrpc,378
|
|
154
|
-
agentle/agents/whatsapp/models/whatsapp_bot_config.py,sha256=
|
|
155
|
+
agentle/agents/whatsapp/models/whatsapp_bot_config.py,sha256=hkbOAdSZbSt634mZEARuPQSer7wf3biL7dL9TFW-a7o,37164
|
|
155
156
|
agentle/agents/whatsapp/models/whatsapp_contact.py,sha256=6iO6xmFs7z9hd1N9kZzGyNHYvCaUoCHn3Yi1DAJN4YU,240
|
|
156
157
|
agentle/agents/whatsapp/models/whatsapp_document_message.py,sha256=ECM_hXF-3IbC9itbtZI0eA_XRNXFVefw9Mr-Lo_lrH0,323
|
|
157
158
|
agentle/agents/whatsapp/models/whatsapp_image_message.py,sha256=xOAPRRSgqj9gQ2ZZOGdFWfOgtmNpE1W8mIUAmB5YTpo,314
|
|
@@ -177,6 +178,15 @@ agentle/agents/whatsapp/providers/twilio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
|
|
|
177
178
|
agentle/agents/whatsapp/providers/twilio/twilio_api_config.py,sha256=Xd9Ui5w3eAMfB12zJb_uQfzMFN5Gbx1Dgz7OIlUk0Yo,1580
|
|
178
179
|
agentle/agents/whatsapp/providers/twilio/twilio_api_provider.py,sha256=7QbHEuQGozNVTf8xpxOR5RkYeA3om60nUlYbB4zmn5Y,29250
|
|
179
180
|
agentle/agents/whatsapp/providers/twilio/twilio_webhook_parser.py,sha256=j9KCOZlbv9horcaFeQJ9fM9bGOr1YWsGqBWGuIhkFqY,10274
|
|
181
|
+
agentle/agents/whatsapp/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
|
+
agentle/agents/whatsapp/v2/batch_processor_manager.py,sha256=2cwnFSstbfgIVdlHeMyf5qAr4fEQx_V7Jlt_4O5scWA,55
|
|
183
|
+
agentle/agents/whatsapp/v2/bot_config.py,sha256=eRuDZgrb5EvnRUh8qWSOcyoWZfjOZ2dTEAWZtdHCYVc,7705
|
|
184
|
+
agentle/agents/whatsapp/v2/in_memory_batch_processor_manager.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
185
|
+
agentle/agents/whatsapp/v2/message_limit.py,sha256=ea-ISLPNtm-jGsMLKF-gr0hQPDpB7HLqX7N5mh1WN28,199
|
|
186
|
+
agentle/agents/whatsapp/v2/payload.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
|
+
agentle/agents/whatsapp/v2/whatsapp_bot.py,sha256=B6wrsv7oJswATxsjvwgBLJxCpFjpgf9crzuhcLmmz8g,413
|
|
188
|
+
agentle/agents/whatsapp/v2/whatsapp_cloud_api_provider.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
189
|
+
agentle/agents/whatsapp/v2/whatsapp_provider.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
190
|
agentle/autonomous_systems/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
191
|
agentle/autonomous_systems/agent.py,sha256=tGrYTD4Rs4hgCF4mfHhjgMLPM0rNQZ5JACU_kA2cXe8,5078
|
|
182
192
|
agentle/autonomous_systems/agent_input_type.py,sha256=mnzELBiKwgj0EYYM4U8GE50XRkys3m6ds_VR7aS-SGA,201
|
|
@@ -993,7 +1003,7 @@ agentle/voice_cloning/voice_cloner.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
993
1003
|
agentle/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
994
1004
|
agentle/web/extraction_preferences.py,sha256=Xb4X6ZgnbDuu4Pp7cI0sdPcv6LaR1Q94FPTNEoHVTGg,985
|
|
995
1005
|
agentle/web/extraction_result.py,sha256=IsbRdT_wA9RVYGToCiz17XRoWMTtiFzxky96Zwqa4ZY,318
|
|
996
|
-
agentle/web/extractor.py,sha256=
|
|
1006
|
+
agentle/web/extractor.py,sha256=ISNYVoofry47HtB0oDhmb2Eof15ZhTL-qdyes1mYSbQ,15585
|
|
997
1007
|
agentle/web/location.py,sha256=RZgqb2rW7wUdcbw3PnmDtfr4FkTSSovW0j70ZOvoRiw,64
|
|
998
1008
|
agentle/web/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
999
1009
|
agentle/web/actions/action.py,sha256=krxW5vXaqB1_JfnPpuo5cVJyANrlElu9P0B0TrF_aZs,723
|
|
@@ -1007,7 +1017,7 @@ agentle/web/actions/scroll.py,sha256=WqVVAORNDK3BL1oASZBPmXJYeSVkPgAOmWA8ibYO82I
|
|
|
1007
1017
|
agentle/web/actions/viewport.py,sha256=KCwm88Pri19Qc6GLHC69HsRxmdJz1gEEAODfggC_fHo,287
|
|
1008
1018
|
agentle/web/actions/wait.py,sha256=IKEywjf-KC4ni9Gkkv4wgc7bY-hk7HwD4F-OFWlyf2w,571
|
|
1009
1019
|
agentle/web/actions/write_text.py,sha256=9mxfHcpKs_L7BsDnJvOYHQwG8M0GWe61SRJAsKk3xQ8,748
|
|
1010
|
-
agentle-0.9.
|
|
1011
|
-
agentle-0.9.
|
|
1012
|
-
agentle-0.9.
|
|
1013
|
-
agentle-0.9.
|
|
1020
|
+
agentle-0.9.26.dist-info/METADATA,sha256=ZCnSN_aDQlrUPevRNIgjpDU64lRUcusv_5bx3ZcDFP0,86849
|
|
1021
|
+
agentle-0.9.26.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
1022
|
+
agentle-0.9.26.dist-info/licenses/LICENSE,sha256=T90S9vqRS6qP-voULxAcvwEs558wRRo6dHuZrjgcOUI,1085
|
|
1023
|
+
agentle-0.9.26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|