lumivor 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lumivor/README.md +51 -0
- lumivor/__init__.py +25 -0
- lumivor/agent/message_manager/service.py +252 -0
- lumivor/agent/message_manager/tests.py +246 -0
- lumivor/agent/message_manager/views.py +37 -0
- lumivor/agent/prompts.py +208 -0
- lumivor/agent/service.py +1017 -0
- lumivor/agent/tests.py +204 -0
- lumivor/agent/views.py +272 -0
- lumivor/browser/browser.py +208 -0
- lumivor/browser/context.py +993 -0
- lumivor/browser/tests/screenshot_test.py +38 -0
- lumivor/browser/tests/test_clicks.py +77 -0
- lumivor/browser/views.py +48 -0
- lumivor/controller/registry/service.py +140 -0
- lumivor/controller/registry/views.py +71 -0
- lumivor/controller/service.py +557 -0
- lumivor/controller/views.py +47 -0
- lumivor/dom/__init__.py +0 -0
- lumivor/dom/buildDomTree.js +428 -0
- lumivor/dom/history_tree_processor/service.py +112 -0
- lumivor/dom/history_tree_processor/view.py +33 -0
- lumivor/dom/service.py +100 -0
- lumivor/dom/tests/extraction_test.py +44 -0
- lumivor/dom/tests/process_dom_test.py +40 -0
- lumivor/dom/views.py +187 -0
- lumivor/logging_config.py +128 -0
- lumivor/telemetry/service.py +114 -0
- lumivor/telemetry/views.py +51 -0
- lumivor/utils.py +54 -0
- lumivor-0.1.7.dist-info/METADATA +100 -0
- lumivor-0.1.7.dist-info/RECORD +34 -0
- lumivor-0.1.7.dist-info/WHEEL +4 -0
- lumivor-0.1.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,557 @@
|
|
1
|
+
import asyncio
|
2
|
+
import logging
|
3
|
+
|
4
|
+
from main_content_extractor import MainContentExtractor
|
5
|
+
from playwright.async_api import Page
|
6
|
+
|
7
|
+
from lumivor.agent.views import ActionModel, ActionResult
|
8
|
+
from lumivor.browser.context import BrowserContext
|
9
|
+
from lumivor.controller.registry.service import Registry
|
10
|
+
from lumivor.controller.views import (
|
11
|
+
ClickElementAction,
|
12
|
+
DoneAction,
|
13
|
+
ExtractPageContentAction,
|
14
|
+
GoToUrlAction,
|
15
|
+
InputTextAction,
|
16
|
+
OpenTabAction,
|
17
|
+
ScrollAction,
|
18
|
+
SearchGoogleAction,
|
19
|
+
SendKeysAction,
|
20
|
+
SwitchTabAction,
|
21
|
+
)
|
22
|
+
from lumivor.utils import time_execution_async, time_execution_sync
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class Controller:
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
):
|
31
|
+
self.registry = Registry()
|
32
|
+
self._register_default_actions()
|
33
|
+
|
34
|
+
def _register_default_actions(self):
|
35
|
+
"""Register all default browser actions"""
|
36
|
+
|
37
|
+
# Basic Navigation Actions
|
38
|
+
@self.registry.action(
|
39
|
+
'Search Google in the current tab',
|
40
|
+
param_model=SearchGoogleAction,
|
41
|
+
requires_browser=True,
|
42
|
+
)
|
43
|
+
async def search_google(params: SearchGoogleAction, browser: BrowserContext):
|
44
|
+
page = await browser.get_current_page()
|
45
|
+
await page.goto(f'https://www.google.com/search?q={params.query}')
|
46
|
+
await page.wait_for_load_state()
|
47
|
+
msg = f'🔍 Searched for "{params.query}" in Google'
|
48
|
+
logger.info(msg)
|
49
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
50
|
+
|
51
|
+
@self.registry.action(
|
52
|
+
'Navigate to URL in the current tab', param_model=GoToUrlAction, requires_browser=True
|
53
|
+
)
|
54
|
+
async def go_to_url(params: GoToUrlAction, browser: BrowserContext):
|
55
|
+
page = await browser.get_current_page()
|
56
|
+
await page.goto(params.url)
|
57
|
+
await page.wait_for_load_state()
|
58
|
+
msg = f'🔗 Navigated to {params.url}'
|
59
|
+
logger.info(msg)
|
60
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
61
|
+
|
62
|
+
@self.registry.action('Go back', requires_browser=True)
|
63
|
+
async def go_back(browser: BrowserContext):
|
64
|
+
page = await browser.get_current_page()
|
65
|
+
await page.go_back()
|
66
|
+
await page.wait_for_load_state()
|
67
|
+
msg = '🔙 Navigated back'
|
68
|
+
logger.info(msg)
|
69
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
70
|
+
|
71
|
+
# Element Interaction Actions
|
72
|
+
@self.registry.action(
|
73
|
+
'Click element', param_model=ClickElementAction, requires_browser=True
|
74
|
+
)
|
75
|
+
async def click_element(params: ClickElementAction, browser: BrowserContext):
|
76
|
+
session = await browser.get_session()
|
77
|
+
state = session.cached_state
|
78
|
+
|
79
|
+
if params.index not in state.selector_map:
|
80
|
+
raise Exception(
|
81
|
+
f'Element with index {
|
82
|
+
params.index} does not exist - retry or use alternative actions'
|
83
|
+
)
|
84
|
+
|
85
|
+
element_node = state.selector_map[params.index]
|
86
|
+
initial_pages = len(session.context.pages)
|
87
|
+
|
88
|
+
# if element has file uploader then dont click
|
89
|
+
if await browser.is_file_uploader(element_node):
|
90
|
+
msg = f'Index {
|
91
|
+
params.index} - has an element which opens file upload dialog. To upload files please use a specific function to upload files '
|
92
|
+
logger.info(msg)
|
93
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
94
|
+
|
95
|
+
msg = None
|
96
|
+
|
97
|
+
try:
|
98
|
+
await browser._click_element_node(element_node)
|
99
|
+
msg = f'🖱️ Clicked index {params.index}'
|
100
|
+
logger.info(msg)
|
101
|
+
logger.debug(f'Element xpath: {element_node.xpath}')
|
102
|
+
if len(session.context.pages) > initial_pages:
|
103
|
+
new_tab_msg = 'New tab opened - switching to it'
|
104
|
+
msg += f' - {new_tab_msg}'
|
105
|
+
logger.info(new_tab_msg)
|
106
|
+
await browser.switch_to_tab(-1)
|
107
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
108
|
+
except Exception as e:
|
109
|
+
logger.warning(
|
110
|
+
f'Element no longer available with index {
|
111
|
+
params.index} - most likely the page changed'
|
112
|
+
)
|
113
|
+
return ActionResult(error=str(e))
|
114
|
+
|
115
|
+
@self.registry.action(
|
116
|
+
'Input text into a input interactive element',
|
117
|
+
param_model=InputTextAction,
|
118
|
+
requires_browser=True,
|
119
|
+
)
|
120
|
+
async def input_text(params: InputTextAction, browser: BrowserContext):
|
121
|
+
session = await browser.get_session()
|
122
|
+
state = session.cached_state
|
123
|
+
|
124
|
+
if params.index not in state.selector_map:
|
125
|
+
raise Exception(
|
126
|
+
f'Element index {
|
127
|
+
params.index} does not exist - retry or use alternative actions'
|
128
|
+
)
|
129
|
+
|
130
|
+
element_node = state.selector_map[params.index]
|
131
|
+
await browser._input_text_element_node(element_node, params.text)
|
132
|
+
msg = f'⌨️ Input "{params.text}" into index {params.index}'
|
133
|
+
logger.info(msg)
|
134
|
+
logger.debug(f'Element xpath: {element_node.xpath}')
|
135
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
136
|
+
|
137
|
+
# Tab Management Actions
|
138
|
+
@self.registry.action('Switch tab', param_model=SwitchTabAction, requires_browser=True)
|
139
|
+
async def switch_tab(params: SwitchTabAction, browser: BrowserContext):
|
140
|
+
await browser.switch_to_tab(params.page_id)
|
141
|
+
# Wait for tab to be ready
|
142
|
+
page = await browser.get_current_page()
|
143
|
+
await page.wait_for_load_state()
|
144
|
+
msg = f'🔄 Switched to tab {params.page_id}'
|
145
|
+
logger.info(msg)
|
146
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
147
|
+
|
148
|
+
@self.registry.action(
|
149
|
+
'Open url in new tab', param_model=OpenTabAction, requires_browser=True
|
150
|
+
)
|
151
|
+
async def open_tab(params: OpenTabAction, browser: BrowserContext):
|
152
|
+
await browser.create_new_tab(params.url)
|
153
|
+
msg = f'🔗 Opened new tab with {params.url}'
|
154
|
+
logger.info(msg)
|
155
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
156
|
+
|
157
|
+
# Content Actions
|
158
|
+
@self.registry.action(
|
159
|
+
'Extract page content to get the text or markdown ',
|
160
|
+
param_model=ExtractPageContentAction,
|
161
|
+
requires_browser=True,
|
162
|
+
)
|
163
|
+
async def extract_content(params: ExtractPageContentAction, browser: BrowserContext):
|
164
|
+
page = await browser.get_current_page()
|
165
|
+
|
166
|
+
content = MainContentExtractor.extract( # type: ignore
|
167
|
+
html=await page.content(),
|
168
|
+
output_format=params.value,
|
169
|
+
)
|
170
|
+
msg = f'📄 Extracted page content\n: {content}\n'
|
171
|
+
logger.info(msg)
|
172
|
+
return ActionResult(extracted_content=msg)
|
173
|
+
|
174
|
+
@self.registry.action('Complete task', param_model=DoneAction)
|
175
|
+
async def done(params: DoneAction):
|
176
|
+
return ActionResult(is_done=True, extracted_content=params.text)
|
177
|
+
|
178
|
+
@self.registry.action(
|
179
|
+
'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
|
180
|
+
param_model=ScrollAction,
|
181
|
+
requires_browser=True,
|
182
|
+
)
|
183
|
+
async def scroll_down(params: ScrollAction, browser: BrowserContext):
|
184
|
+
page = await browser.get_current_page()
|
185
|
+
if params.amount is not None:
|
186
|
+
await page.evaluate(f'window.scrollBy(0, {params.amount});')
|
187
|
+
else:
|
188
|
+
await page.keyboard.press('PageDown')
|
189
|
+
|
190
|
+
amount = f'{
|
191
|
+
params.amount} pixels' if params.amount is not None else 'one page'
|
192
|
+
msg = f'🔍 Scrolled down the page by {amount}'
|
193
|
+
logger.info(msg)
|
194
|
+
return ActionResult(
|
195
|
+
extracted_content=msg,
|
196
|
+
include_in_memory=True,
|
197
|
+
)
|
198
|
+
|
199
|
+
# scroll up
|
200
|
+
@self.registry.action(
|
201
|
+
'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
|
202
|
+
param_model=ScrollAction,
|
203
|
+
requires_browser=True,
|
204
|
+
)
|
205
|
+
async def scroll_up(params: ScrollAction, browser: BrowserContext):
|
206
|
+
page = await browser.get_current_page()
|
207
|
+
if params.amount is not None:
|
208
|
+
await page.evaluate(f'window.scrollBy(0, -{params.amount});')
|
209
|
+
else:
|
210
|
+
await page.keyboard.press('PageUp')
|
211
|
+
|
212
|
+
amount = f'{
|
213
|
+
params.amount} pixels' if params.amount is not None else 'one page'
|
214
|
+
msg = f'🔍 Scrolled up the page by {amount}'
|
215
|
+
logger.info(msg)
|
216
|
+
return ActionResult(
|
217
|
+
extracted_content=msg,
|
218
|
+
include_in_memory=True,
|
219
|
+
)
|
220
|
+
|
221
|
+
# send keys
|
222
|
+
@self.registry.action(
|
223
|
+
'Send strings of special keys like Backspace, Insert, PageDown, Delete, Enter, Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard.press. Be aware of different operating systems and their shortcuts',
|
224
|
+
param_model=SendKeysAction,
|
225
|
+
requires_browser=True,
|
226
|
+
)
|
227
|
+
async def send_keys(params: SendKeysAction, browser: BrowserContext):
|
228
|
+
page = await browser.get_current_page()
|
229
|
+
|
230
|
+
await page.keyboard.press(params.keys)
|
231
|
+
msg = f'⌨️ Sent keys: {params.keys}'
|
232
|
+
logger.info(msg)
|
233
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
234
|
+
|
235
|
+
@self.registry.action(
|
236
|
+
description='If you dont find something which you want to interact with, scroll to it',
|
237
|
+
requires_browser=True,
|
238
|
+
)
|
239
|
+
async def scroll_to_text(text: str, browser: BrowserContext): # type: ignore
|
240
|
+
page = await browser.get_current_page()
|
241
|
+
try:
|
242
|
+
# Try different locator strategies
|
243
|
+
locators = [
|
244
|
+
page.get_by_text(text, exact=False),
|
245
|
+
page.locator(f'text={text}'),
|
246
|
+
page.locator(f"//*[contains(text(), '{text}')]"),
|
247
|
+
]
|
248
|
+
|
249
|
+
for locator in locators:
|
250
|
+
try:
|
251
|
+
# First check if element exists and is visible
|
252
|
+
if await locator.count() > 0 and await locator.first.is_visible():
|
253
|
+
await locator.first.scroll_into_view_if_needed()
|
254
|
+
# Wait for scroll to complete
|
255
|
+
await asyncio.sleep(0.5)
|
256
|
+
msg = f'🔍 Scrolled to text: {text}'
|
257
|
+
logger.info(msg)
|
258
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
259
|
+
except Exception as e:
|
260
|
+
logger.debug(f'Locator attempt failed: {str(e)}')
|
261
|
+
continue
|
262
|
+
|
263
|
+
msg = f"Text '{text}' not found or not visible on page"
|
264
|
+
logger.info(msg)
|
265
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
266
|
+
|
267
|
+
except Exception as e:
|
268
|
+
msg = f"Failed to scroll to text '{text}': {str(e)}"
|
269
|
+
logger.error(msg)
|
270
|
+
return ActionResult(error=msg, include_in_memory=True)
|
271
|
+
|
272
|
+
@self.registry.action(
|
273
|
+
description='Get all options from a native dropdown',
|
274
|
+
requires_browser=True,
|
275
|
+
)
|
276
|
+
async def get_dropdown_options(index: int, browser: BrowserContext) -> ActionResult:
|
277
|
+
"""Get all options from a native dropdown"""
|
278
|
+
page = await browser.get_current_page()
|
279
|
+
selector_map = await browser.get_selector_map()
|
280
|
+
dom_element = selector_map[index]
|
281
|
+
|
282
|
+
try:
|
283
|
+
# Frame-aware approach since we know it works
|
284
|
+
all_options = []
|
285
|
+
frame_index = 0
|
286
|
+
|
287
|
+
for frame in page.frames:
|
288
|
+
try:
|
289
|
+
options = await frame.evaluate(
|
290
|
+
"""
|
291
|
+
(xpath) => {
|
292
|
+
const select = document.evaluate(xpath, document, null,
|
293
|
+
XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
294
|
+
if (!select) return null;
|
295
|
+
|
296
|
+
return {
|
297
|
+
options: Array.from(select.options).map(opt => ({
|
298
|
+
text: opt.text.trim(),
|
299
|
+
value: opt.value,
|
300
|
+
index: opt.index
|
301
|
+
})),
|
302
|
+
id: select.id,
|
303
|
+
name: select.name
|
304
|
+
};
|
305
|
+
}
|
306
|
+
""",
|
307
|
+
dom_element.xpath,
|
308
|
+
)
|
309
|
+
|
310
|
+
if options:
|
311
|
+
logger.debug(
|
312
|
+
f'Found dropdown in frame {frame_index}')
|
313
|
+
logger.debug(f"Dropdown ID: {options['id']}, Name: {
|
314
|
+
options['name']}")
|
315
|
+
|
316
|
+
formatted_options = []
|
317
|
+
for opt in options['options']:
|
318
|
+
formatted_options.append(
|
319
|
+
f"{opt['index']}: {
|
320
|
+
opt['text']} (value={opt['value']})"
|
321
|
+
)
|
322
|
+
|
323
|
+
all_options.extend(formatted_options)
|
324
|
+
|
325
|
+
except Exception as frame_e:
|
326
|
+
logger.debug(f'Frame {frame_index} evaluation failed: {
|
327
|
+
str(frame_e)}')
|
328
|
+
|
329
|
+
frame_index += 1
|
330
|
+
|
331
|
+
if all_options:
|
332
|
+
msg = '\n'.join(all_options)
|
333
|
+
logger.info(msg)
|
334
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
335
|
+
else:
|
336
|
+
msg = 'No options found in any frame for dropdown'
|
337
|
+
logger.info(msg)
|
338
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
339
|
+
|
340
|
+
except Exception as e:
|
341
|
+
logger.error(f'Failed to get dropdown options: {str(e)}')
|
342
|
+
msg = f'Error getting options: {str(e)}'
|
343
|
+
logger.info(msg)
|
344
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
345
|
+
|
346
|
+
@self.registry.action(
|
347
|
+
description='Select dropdown option for interactive element index by the text of the option you want to select',
|
348
|
+
requires_browser=True,
|
349
|
+
)
|
350
|
+
async def select_dropdown_option(
|
351
|
+
index: int,
|
352
|
+
text: str,
|
353
|
+
browser: BrowserContext,
|
354
|
+
) -> ActionResult:
|
355
|
+
"""Select dropdown option by the text of the option you want to select"""
|
356
|
+
page = await browser.get_current_page()
|
357
|
+
selector_map = await browser.get_selector_map()
|
358
|
+
dom_element = selector_map[index]
|
359
|
+
|
360
|
+
# Validate that we're working with a select element
|
361
|
+
if dom_element.tag_name != 'select':
|
362
|
+
logger.error(
|
363
|
+
f'Element is not a select! Tag: {
|
364
|
+
dom_element.tag_name}, Attributes: {dom_element.attributes}'
|
365
|
+
)
|
366
|
+
msg = f'Cannot select option: Element with index {
|
367
|
+
index} is a {dom_element.tag_name}, not a select'
|
368
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
369
|
+
|
370
|
+
logger.debug(f"Attempting to select '{
|
371
|
+
text}' using xpath: {dom_element.xpath}")
|
372
|
+
logger.debug(f'Element attributes: {dom_element.attributes}')
|
373
|
+
logger.debug(f'Element tag: {dom_element.tag_name}')
|
374
|
+
|
375
|
+
try:
|
376
|
+
frame_index = 0
|
377
|
+
for frame in page.frames:
|
378
|
+
try:
|
379
|
+
logger.debug(f'Trying frame {
|
380
|
+
frame_index} URL: {frame.url}')
|
381
|
+
|
382
|
+
# First verify we can find the dropdown in this frame
|
383
|
+
find_dropdown_js = """
|
384
|
+
(xpath) => {
|
385
|
+
try {
|
386
|
+
const select = document.evaluate(xpath, document, null,
|
387
|
+
XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
388
|
+
if (!select) return null;
|
389
|
+
if (select.tagName.toLowerCase() !== 'select') {
|
390
|
+
return {
|
391
|
+
error: `Found element but it's a ${select.tagName}, not a SELECT`,
|
392
|
+
found: false
|
393
|
+
};
|
394
|
+
}
|
395
|
+
return {
|
396
|
+
id: select.id,
|
397
|
+
name: select.name,
|
398
|
+
found: true,
|
399
|
+
tagName: select.tagName,
|
400
|
+
optionCount: select.options.length,
|
401
|
+
currentValue: select.value,
|
402
|
+
availableOptions: Array.from(select.options).map(o => o.text.trim())
|
403
|
+
};
|
404
|
+
} catch (e) {
|
405
|
+
return {error: e.toString(), found: false};
|
406
|
+
}
|
407
|
+
}
|
408
|
+
"""
|
409
|
+
|
410
|
+
dropdown_info = await frame.evaluate(find_dropdown_js, dom_element.xpath)
|
411
|
+
|
412
|
+
if dropdown_info:
|
413
|
+
if not dropdown_info.get('found'):
|
414
|
+
logger.error(
|
415
|
+
f"Frame {frame_index} error: {
|
416
|
+
dropdown_info.get('error')}"
|
417
|
+
)
|
418
|
+
continue
|
419
|
+
|
420
|
+
logger.debug(f'Found dropdown in frame {
|
421
|
+
frame_index}: {dropdown_info}')
|
422
|
+
|
423
|
+
# Rest of the selection code remains the same...
|
424
|
+
select_option_js = """
|
425
|
+
(params) => {
|
426
|
+
try {
|
427
|
+
const select = document.evaluate(params.xpath, document, null,
|
428
|
+
XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
429
|
+
if (!select || select.tagName.toLowerCase() !== 'select') {
|
430
|
+
return {success: false, error: 'Select not found or invalid element type'};
|
431
|
+
}
|
432
|
+
|
433
|
+
const option = Array.from(select.options)
|
434
|
+
.find(opt => opt.text.trim() === params.text);
|
435
|
+
|
436
|
+
if (!option) {
|
437
|
+
return {
|
438
|
+
success: false,
|
439
|
+
error: 'Option not found',
|
440
|
+
availableOptions: Array.from(select.options).map(o => o.text.trim())
|
441
|
+
};
|
442
|
+
}
|
443
|
+
|
444
|
+
select.value = option.value;
|
445
|
+
select.dispatchEvent(new Event('change'));
|
446
|
+
return {
|
447
|
+
success: true,
|
448
|
+
selectedValue: option.value,
|
449
|
+
selectedText: option.text.trim()
|
450
|
+
};
|
451
|
+
} catch (e) {
|
452
|
+
return {success: false, error: e.toString()};
|
453
|
+
}
|
454
|
+
}
|
455
|
+
"""
|
456
|
+
|
457
|
+
params = {'xpath': dom_element.xpath, 'text': text}
|
458
|
+
|
459
|
+
result = await frame.evaluate(select_option_js, params)
|
460
|
+
logger.debug(f'Selection result: {result}')
|
461
|
+
|
462
|
+
if result.get('success'):
|
463
|
+
msg = (
|
464
|
+
f"Selected option '{text}' (value={
|
465
|
+
result.get('selectedValue')}"
|
466
|
+
)
|
467
|
+
logger.info(msg + f' in frame {frame_index}')
|
468
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
469
|
+
else:
|
470
|
+
logger.error(f"Selection failed: {
|
471
|
+
result.get('error')}")
|
472
|
+
if 'availableOptions' in result:
|
473
|
+
logger.error(f"Available options: {
|
474
|
+
result['availableOptions']}")
|
475
|
+
|
476
|
+
except Exception as frame_e:
|
477
|
+
logger.error(f'Frame {frame_index} attempt failed: {
|
478
|
+
str(frame_e)}')
|
479
|
+
logger.error(f'Frame type: {type(frame)}')
|
480
|
+
logger.error(f'Frame URL: {frame.url}')
|
481
|
+
|
482
|
+
frame_index += 1
|
483
|
+
|
484
|
+
msg = f"Could not select option '{text}' in any frame"
|
485
|
+
logger.info(msg)
|
486
|
+
return ActionResult(extracted_content=msg, include_in_memory=True)
|
487
|
+
|
488
|
+
except Exception as e:
|
489
|
+
msg = f'Selection failed: {str(e)}'
|
490
|
+
logger.error(msg)
|
491
|
+
return ActionResult(error=msg, include_in_memory=True)
|
492
|
+
|
493
|
+
def action(self, description: str, **kwargs):
|
494
|
+
"""Decorator for registering custom actions
|
495
|
+
|
496
|
+
@param description: Describe the LLM what the function does (better description == better function calling)
|
497
|
+
"""
|
498
|
+
return self.registry.action(description, **kwargs)
|
499
|
+
|
500
|
+
@time_execution_async('--multi-act')
|
501
|
+
async def multi_act(
|
502
|
+
self, actions: list[ActionModel], browser_context: BrowserContext
|
503
|
+
) -> list[ActionResult]:
|
504
|
+
"""Execute multiple actions"""
|
505
|
+
results = []
|
506
|
+
|
507
|
+
session = await browser_context.get_session()
|
508
|
+
cached_selector_map = session.cached_state.selector_map
|
509
|
+
cached_path_hashes = set(
|
510
|
+
e.hash.branch_path_hash for e in cached_selector_map.values())
|
511
|
+
await browser_context.remove_highlights()
|
512
|
+
|
513
|
+
for i, action in enumerate(actions):
|
514
|
+
if action.get_index() is not None and i != 0:
|
515
|
+
new_state = await browser_context.get_state()
|
516
|
+
new_path_hashes = set(
|
517
|
+
e.hash.branch_path_hash for e in new_state.selector_map.values()
|
518
|
+
)
|
519
|
+
if not new_path_hashes.issubset(cached_path_hashes):
|
520
|
+
# next action requires index but there are new elements on the page
|
521
|
+
logger.info(f'Something new appeared after action {
|
522
|
+
i} / {len(actions)}')
|
523
|
+
break
|
524
|
+
|
525
|
+
results.append(await self.act(action, browser_context))
|
526
|
+
|
527
|
+
logger.debug(f'Executed action {i + 1} / {len(actions)}')
|
528
|
+
if results[-1].is_done or results[-1].error or i == len(actions) - 1:
|
529
|
+
break
|
530
|
+
|
531
|
+
await asyncio.sleep(browser_context.config.wait_between_actions)
|
532
|
+
# hash all elements. if it is a subset of cached_state its fine - else break (new elements on page)
|
533
|
+
|
534
|
+
return results
|
535
|
+
|
536
|
+
@time_execution_sync('--act')
|
537
|
+
async def act(self, action: ActionModel, browser_context: BrowserContext) -> ActionResult:
|
538
|
+
"""Execute an action"""
|
539
|
+
try:
|
540
|
+
for action_name, params in action.model_dump(exclude_unset=True).items():
|
541
|
+
if params is not None:
|
542
|
+
# remove highlights
|
543
|
+
result = await self.registry.execute_action(
|
544
|
+
action_name, params, browser=browser_context
|
545
|
+
)
|
546
|
+
if isinstance(result, str):
|
547
|
+
return ActionResult(extracted_content=result)
|
548
|
+
elif isinstance(result, ActionResult):
|
549
|
+
return result
|
550
|
+
elif result is None:
|
551
|
+
return ActionResult()
|
552
|
+
else:
|
553
|
+
raise ValueError(f'Invalid action result type: {
|
554
|
+
type(result)} of {result}')
|
555
|
+
return ActionResult()
|
556
|
+
except Exception as e:
|
557
|
+
raise e
|
@@ -0,0 +1,47 @@
|
|
1
|
+
from typing import Literal, Optional
|
2
|
+
|
3
|
+
from pydantic import BaseModel
|
4
|
+
|
5
|
+
|
6
|
+
# Action Input Models
|
7
|
+
class SearchGoogleAction(BaseModel):
|
8
|
+
query: str
|
9
|
+
|
10
|
+
|
11
|
+
class GoToUrlAction(BaseModel):
|
12
|
+
url: str
|
13
|
+
|
14
|
+
|
15
|
+
class ClickElementAction(BaseModel):
|
16
|
+
index: int
|
17
|
+
xpath: Optional[str] = None
|
18
|
+
|
19
|
+
|
20
|
+
class InputTextAction(BaseModel):
|
21
|
+
index: int
|
22
|
+
text: str
|
23
|
+
xpath: Optional[str] = None
|
24
|
+
|
25
|
+
|
26
|
+
class DoneAction(BaseModel):
|
27
|
+
text: str
|
28
|
+
|
29
|
+
|
30
|
+
class SwitchTabAction(BaseModel):
|
31
|
+
page_id: int
|
32
|
+
|
33
|
+
|
34
|
+
class OpenTabAction(BaseModel):
|
35
|
+
url: str
|
36
|
+
|
37
|
+
|
38
|
+
class ExtractPageContentAction(BaseModel):
|
39
|
+
value: Literal['text', 'markdown', 'html'] = 'text'
|
40
|
+
|
41
|
+
|
42
|
+
class ScrollAction(BaseModel):
|
43
|
+
amount: Optional[int] = None # The number of pixels to scroll. If None, scroll down/up one page
|
44
|
+
|
45
|
+
|
46
|
+
class SendKeysAction(BaseModel):
|
47
|
+
keys: str
|
lumivor/dom/__init__.py
ADDED
File without changes
|