lumivor 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,557 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from main_content_extractor import MainContentExtractor
5
+ from playwright.async_api import Page
6
+
7
+ from lumivor.agent.views import ActionModel, ActionResult
8
+ from lumivor.browser.context import BrowserContext
9
+ from lumivor.controller.registry.service import Registry
10
+ from lumivor.controller.views import (
11
+ ClickElementAction,
12
+ DoneAction,
13
+ ExtractPageContentAction,
14
+ GoToUrlAction,
15
+ InputTextAction,
16
+ OpenTabAction,
17
+ ScrollAction,
18
+ SearchGoogleAction,
19
+ SendKeysAction,
20
+ SwitchTabAction,
21
+ )
22
+ from lumivor.utils import time_execution_async, time_execution_sync
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class Controller:
28
+ def __init__(
29
+ self,
30
+ ):
31
+ self.registry = Registry()
32
+ self._register_default_actions()
33
+
34
+ def _register_default_actions(self):
35
+ """Register all default browser actions"""
36
+
37
+ # Basic Navigation Actions
38
+ @self.registry.action(
39
+ 'Search Google in the current tab',
40
+ param_model=SearchGoogleAction,
41
+ requires_browser=True,
42
+ )
43
+ async def search_google(params: SearchGoogleAction, browser: BrowserContext):
44
+ page = await browser.get_current_page()
45
+ await page.goto(f'https://www.google.com/search?q={params.query}')
46
+ await page.wait_for_load_state()
47
+ msg = f'🔍 Searched for "{params.query}" in Google'
48
+ logger.info(msg)
49
+ return ActionResult(extracted_content=msg, include_in_memory=True)
50
+
51
+ @self.registry.action(
52
+ 'Navigate to URL in the current tab', param_model=GoToUrlAction, requires_browser=True
53
+ )
54
+ async def go_to_url(params: GoToUrlAction, browser: BrowserContext):
55
+ page = await browser.get_current_page()
56
+ await page.goto(params.url)
57
+ await page.wait_for_load_state()
58
+ msg = f'🔗 Navigated to {params.url}'
59
+ logger.info(msg)
60
+ return ActionResult(extracted_content=msg, include_in_memory=True)
61
+
62
+ @self.registry.action('Go back', requires_browser=True)
63
+ async def go_back(browser: BrowserContext):
64
+ page = await browser.get_current_page()
65
+ await page.go_back()
66
+ await page.wait_for_load_state()
67
+ msg = '🔙 Navigated back'
68
+ logger.info(msg)
69
+ return ActionResult(extracted_content=msg, include_in_memory=True)
70
+
71
+ # Element Interaction Actions
72
+ @self.registry.action(
73
+ 'Click element', param_model=ClickElementAction, requires_browser=True
74
+ )
75
+ async def click_element(params: ClickElementAction, browser: BrowserContext):
76
+ session = await browser.get_session()
77
+ state = session.cached_state
78
+
79
+ if params.index not in state.selector_map:
80
+ raise Exception(
81
+ f'Element with index {
82
+ params.index} does not exist - retry or use alternative actions'
83
+ )
84
+
85
+ element_node = state.selector_map[params.index]
86
+ initial_pages = len(session.context.pages)
87
+
88
+ # if element has file uploader then dont click
89
+ if await browser.is_file_uploader(element_node):
90
+ msg = f'Index {
91
+ params.index} - has an element which opens file upload dialog. To upload files please use a specific function to upload files '
92
+ logger.info(msg)
93
+ return ActionResult(extracted_content=msg, include_in_memory=True)
94
+
95
+ msg = None
96
+
97
+ try:
98
+ await browser._click_element_node(element_node)
99
+ msg = f'🖱️ Clicked index {params.index}'
100
+ logger.info(msg)
101
+ logger.debug(f'Element xpath: {element_node.xpath}')
102
+ if len(session.context.pages) > initial_pages:
103
+ new_tab_msg = 'New tab opened - switching to it'
104
+ msg += f' - {new_tab_msg}'
105
+ logger.info(new_tab_msg)
106
+ await browser.switch_to_tab(-1)
107
+ return ActionResult(extracted_content=msg, include_in_memory=True)
108
+ except Exception as e:
109
+ logger.warning(
110
+ f'Element no longer available with index {
111
+ params.index} - most likely the page changed'
112
+ )
113
+ return ActionResult(error=str(e))
114
+
115
+ @self.registry.action(
116
+ 'Input text into a input interactive element',
117
+ param_model=InputTextAction,
118
+ requires_browser=True,
119
+ )
120
+ async def input_text(params: InputTextAction, browser: BrowserContext):
121
+ session = await browser.get_session()
122
+ state = session.cached_state
123
+
124
+ if params.index not in state.selector_map:
125
+ raise Exception(
126
+ f'Element index {
127
+ params.index} does not exist - retry or use alternative actions'
128
+ )
129
+
130
+ element_node = state.selector_map[params.index]
131
+ await browser._input_text_element_node(element_node, params.text)
132
+ msg = f'⌨️ Input "{params.text}" into index {params.index}'
133
+ logger.info(msg)
134
+ logger.debug(f'Element xpath: {element_node.xpath}')
135
+ return ActionResult(extracted_content=msg, include_in_memory=True)
136
+
137
+ # Tab Management Actions
138
+ @self.registry.action('Switch tab', param_model=SwitchTabAction, requires_browser=True)
139
+ async def switch_tab(params: SwitchTabAction, browser: BrowserContext):
140
+ await browser.switch_to_tab(params.page_id)
141
+ # Wait for tab to be ready
142
+ page = await browser.get_current_page()
143
+ await page.wait_for_load_state()
144
+ msg = f'🔄 Switched to tab {params.page_id}'
145
+ logger.info(msg)
146
+ return ActionResult(extracted_content=msg, include_in_memory=True)
147
+
148
+ @self.registry.action(
149
+ 'Open url in new tab', param_model=OpenTabAction, requires_browser=True
150
+ )
151
+ async def open_tab(params: OpenTabAction, browser: BrowserContext):
152
+ await browser.create_new_tab(params.url)
153
+ msg = f'🔗 Opened new tab with {params.url}'
154
+ logger.info(msg)
155
+ return ActionResult(extracted_content=msg, include_in_memory=True)
156
+
157
+ # Content Actions
158
+ @self.registry.action(
159
+ 'Extract page content to get the text or markdown ',
160
+ param_model=ExtractPageContentAction,
161
+ requires_browser=True,
162
+ )
163
+ async def extract_content(params: ExtractPageContentAction, browser: BrowserContext):
164
+ page = await browser.get_current_page()
165
+
166
+ content = MainContentExtractor.extract( # type: ignore
167
+ html=await page.content(),
168
+ output_format=params.value,
169
+ )
170
+ msg = f'📄 Extracted page content\n: {content}\n'
171
+ logger.info(msg)
172
+ return ActionResult(extracted_content=msg)
173
+
174
+ @self.registry.action('Complete task', param_model=DoneAction)
175
+ async def done(params: DoneAction):
176
+ return ActionResult(is_done=True, extracted_content=params.text)
177
+
178
+ @self.registry.action(
179
+ 'Scroll down the page by pixel amount - if no amount is specified, scroll down one page',
180
+ param_model=ScrollAction,
181
+ requires_browser=True,
182
+ )
183
+ async def scroll_down(params: ScrollAction, browser: BrowserContext):
184
+ page = await browser.get_current_page()
185
+ if params.amount is not None:
186
+ await page.evaluate(f'window.scrollBy(0, {params.amount});')
187
+ else:
188
+ await page.keyboard.press('PageDown')
189
+
190
+ amount = f'{
191
+ params.amount} pixels' if params.amount is not None else 'one page'
192
+ msg = f'🔍 Scrolled down the page by {amount}'
193
+ logger.info(msg)
194
+ return ActionResult(
195
+ extracted_content=msg,
196
+ include_in_memory=True,
197
+ )
198
+
199
+ # scroll up
200
+ @self.registry.action(
201
+ 'Scroll up the page by pixel amount - if no amount is specified, scroll up one page',
202
+ param_model=ScrollAction,
203
+ requires_browser=True,
204
+ )
205
+ async def scroll_up(params: ScrollAction, browser: BrowserContext):
206
+ page = await browser.get_current_page()
207
+ if params.amount is not None:
208
+ await page.evaluate(f'window.scrollBy(0, -{params.amount});')
209
+ else:
210
+ await page.keyboard.press('PageUp')
211
+
212
+ amount = f'{
213
+ params.amount} pixels' if params.amount is not None else 'one page'
214
+ msg = f'🔍 Scrolled up the page by {amount}'
215
+ logger.info(msg)
216
+ return ActionResult(
217
+ extracted_content=msg,
218
+ include_in_memory=True,
219
+ )
220
+
221
+ # send keys
222
+ @self.registry.action(
223
+ 'Send strings of special keys like Backspace, Insert, PageDown, Delete, Enter, Shortcuts such as `Control+o`, `Control+Shift+T` are supported as well. This gets used in keyboard.press. Be aware of different operating systems and their shortcuts',
224
+ param_model=SendKeysAction,
225
+ requires_browser=True,
226
+ )
227
+ async def send_keys(params: SendKeysAction, browser: BrowserContext):
228
+ page = await browser.get_current_page()
229
+
230
+ await page.keyboard.press(params.keys)
231
+ msg = f'⌨️ Sent keys: {params.keys}'
232
+ logger.info(msg)
233
+ return ActionResult(extracted_content=msg, include_in_memory=True)
234
+
235
+ @self.registry.action(
236
+ description='If you dont find something which you want to interact with, scroll to it',
237
+ requires_browser=True,
238
+ )
239
+ async def scroll_to_text(text: str, browser: BrowserContext): # type: ignore
240
+ page = await browser.get_current_page()
241
+ try:
242
+ # Try different locator strategies
243
+ locators = [
244
+ page.get_by_text(text, exact=False),
245
+ page.locator(f'text={text}'),
246
+ page.locator(f"//*[contains(text(), '{text}')]"),
247
+ ]
248
+
249
+ for locator in locators:
250
+ try:
251
+ # First check if element exists and is visible
252
+ if await locator.count() > 0 and await locator.first.is_visible():
253
+ await locator.first.scroll_into_view_if_needed()
254
+ # Wait for scroll to complete
255
+ await asyncio.sleep(0.5)
256
+ msg = f'🔍 Scrolled to text: {text}'
257
+ logger.info(msg)
258
+ return ActionResult(extracted_content=msg, include_in_memory=True)
259
+ except Exception as e:
260
+ logger.debug(f'Locator attempt failed: {str(e)}')
261
+ continue
262
+
263
+ msg = f"Text '{text}' not found or not visible on page"
264
+ logger.info(msg)
265
+ return ActionResult(extracted_content=msg, include_in_memory=True)
266
+
267
+ except Exception as e:
268
+ msg = f"Failed to scroll to text '{text}': {str(e)}"
269
+ logger.error(msg)
270
+ return ActionResult(error=msg, include_in_memory=True)
271
+
272
+ @self.registry.action(
273
+ description='Get all options from a native dropdown',
274
+ requires_browser=True,
275
+ )
276
+ async def get_dropdown_options(index: int, browser: BrowserContext) -> ActionResult:
277
+ """Get all options from a native dropdown"""
278
+ page = await browser.get_current_page()
279
+ selector_map = await browser.get_selector_map()
280
+ dom_element = selector_map[index]
281
+
282
+ try:
283
+ # Frame-aware approach since we know it works
284
+ all_options = []
285
+ frame_index = 0
286
+
287
+ for frame in page.frames:
288
+ try:
289
+ options = await frame.evaluate(
290
+ """
291
+ (xpath) => {
292
+ const select = document.evaluate(xpath, document, null,
293
+ XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
294
+ if (!select) return null;
295
+
296
+ return {
297
+ options: Array.from(select.options).map(opt => ({
298
+ text: opt.text.trim(),
299
+ value: opt.value,
300
+ index: opt.index
301
+ })),
302
+ id: select.id,
303
+ name: select.name
304
+ };
305
+ }
306
+ """,
307
+ dom_element.xpath,
308
+ )
309
+
310
+ if options:
311
+ logger.debug(
312
+ f'Found dropdown in frame {frame_index}')
313
+ logger.debug(f"Dropdown ID: {options['id']}, Name: {
314
+ options['name']}")
315
+
316
+ formatted_options = []
317
+ for opt in options['options']:
318
+ formatted_options.append(
319
+ f"{opt['index']}: {
320
+ opt['text']} (value={opt['value']})"
321
+ )
322
+
323
+ all_options.extend(formatted_options)
324
+
325
+ except Exception as frame_e:
326
+ logger.debug(f'Frame {frame_index} evaluation failed: {
327
+ str(frame_e)}')
328
+
329
+ frame_index += 1
330
+
331
+ if all_options:
332
+ msg = '\n'.join(all_options)
333
+ logger.info(msg)
334
+ return ActionResult(extracted_content=msg, include_in_memory=True)
335
+ else:
336
+ msg = 'No options found in any frame for dropdown'
337
+ logger.info(msg)
338
+ return ActionResult(extracted_content=msg, include_in_memory=True)
339
+
340
+ except Exception as e:
341
+ logger.error(f'Failed to get dropdown options: {str(e)}')
342
+ msg = f'Error getting options: {str(e)}'
343
+ logger.info(msg)
344
+ return ActionResult(extracted_content=msg, include_in_memory=True)
345
+
346
+ @self.registry.action(
347
+ description='Select dropdown option for interactive element index by the text of the option you want to select',
348
+ requires_browser=True,
349
+ )
350
+ async def select_dropdown_option(
351
+ index: int,
352
+ text: str,
353
+ browser: BrowserContext,
354
+ ) -> ActionResult:
355
+ """Select dropdown option by the text of the option you want to select"""
356
+ page = await browser.get_current_page()
357
+ selector_map = await browser.get_selector_map()
358
+ dom_element = selector_map[index]
359
+
360
+ # Validate that we're working with a select element
361
+ if dom_element.tag_name != 'select':
362
+ logger.error(
363
+ f'Element is not a select! Tag: {
364
+ dom_element.tag_name}, Attributes: {dom_element.attributes}'
365
+ )
366
+ msg = f'Cannot select option: Element with index {
367
+ index} is a {dom_element.tag_name}, not a select'
368
+ return ActionResult(extracted_content=msg, include_in_memory=True)
369
+
370
+ logger.debug(f"Attempting to select '{
371
+ text}' using xpath: {dom_element.xpath}")
372
+ logger.debug(f'Element attributes: {dom_element.attributes}')
373
+ logger.debug(f'Element tag: {dom_element.tag_name}')
374
+
375
+ try:
376
+ frame_index = 0
377
+ for frame in page.frames:
378
+ try:
379
+ logger.debug(f'Trying frame {
380
+ frame_index} URL: {frame.url}')
381
+
382
+ # First verify we can find the dropdown in this frame
383
+ find_dropdown_js = """
384
+ (xpath) => {
385
+ try {
386
+ const select = document.evaluate(xpath, document, null,
387
+ XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
388
+ if (!select) return null;
389
+ if (select.tagName.toLowerCase() !== 'select') {
390
+ return {
391
+ error: `Found element but it's a ${select.tagName}, not a SELECT`,
392
+ found: false
393
+ };
394
+ }
395
+ return {
396
+ id: select.id,
397
+ name: select.name,
398
+ found: true,
399
+ tagName: select.tagName,
400
+ optionCount: select.options.length,
401
+ currentValue: select.value,
402
+ availableOptions: Array.from(select.options).map(o => o.text.trim())
403
+ };
404
+ } catch (e) {
405
+ return {error: e.toString(), found: false};
406
+ }
407
+ }
408
+ """
409
+
410
+ dropdown_info = await frame.evaluate(find_dropdown_js, dom_element.xpath)
411
+
412
+ if dropdown_info:
413
+ if not dropdown_info.get('found'):
414
+ logger.error(
415
+ f"Frame {frame_index} error: {
416
+ dropdown_info.get('error')}"
417
+ )
418
+ continue
419
+
420
+ logger.debug(f'Found dropdown in frame {
421
+ frame_index}: {dropdown_info}')
422
+
423
+ # Rest of the selection code remains the same...
424
+ select_option_js = """
425
+ (params) => {
426
+ try {
427
+ const select = document.evaluate(params.xpath, document, null,
428
+ XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
429
+ if (!select || select.tagName.toLowerCase() !== 'select') {
430
+ return {success: false, error: 'Select not found or invalid element type'};
431
+ }
432
+
433
+ const option = Array.from(select.options)
434
+ .find(opt => opt.text.trim() === params.text);
435
+
436
+ if (!option) {
437
+ return {
438
+ success: false,
439
+ error: 'Option not found',
440
+ availableOptions: Array.from(select.options).map(o => o.text.trim())
441
+ };
442
+ }
443
+
444
+ select.value = option.value;
445
+ select.dispatchEvent(new Event('change'));
446
+ return {
447
+ success: true,
448
+ selectedValue: option.value,
449
+ selectedText: option.text.trim()
450
+ };
451
+ } catch (e) {
452
+ return {success: false, error: e.toString()};
453
+ }
454
+ }
455
+ """
456
+
457
+ params = {'xpath': dom_element.xpath, 'text': text}
458
+
459
+ result = await frame.evaluate(select_option_js, params)
460
+ logger.debug(f'Selection result: {result}')
461
+
462
+ if result.get('success'):
463
+ msg = (
464
+ f"Selected option '{text}' (value={
465
+ result.get('selectedValue')}"
466
+ )
467
+ logger.info(msg + f' in frame {frame_index}')
468
+ return ActionResult(extracted_content=msg, include_in_memory=True)
469
+ else:
470
+ logger.error(f"Selection failed: {
471
+ result.get('error')}")
472
+ if 'availableOptions' in result:
473
+ logger.error(f"Available options: {
474
+ result['availableOptions']}")
475
+
476
+ except Exception as frame_e:
477
+ logger.error(f'Frame {frame_index} attempt failed: {
478
+ str(frame_e)}')
479
+ logger.error(f'Frame type: {type(frame)}')
480
+ logger.error(f'Frame URL: {frame.url}')
481
+
482
+ frame_index += 1
483
+
484
+ msg = f"Could not select option '{text}' in any frame"
485
+ logger.info(msg)
486
+ return ActionResult(extracted_content=msg, include_in_memory=True)
487
+
488
+ except Exception as e:
489
+ msg = f'Selection failed: {str(e)}'
490
+ logger.error(msg)
491
+ return ActionResult(error=msg, include_in_memory=True)
492
+
493
+ def action(self, description: str, **kwargs):
494
+ """Decorator for registering custom actions
495
+
496
+ @param description: Describe the LLM what the function does (better description == better function calling)
497
+ """
498
+ return self.registry.action(description, **kwargs)
499
+
500
+ @time_execution_async('--multi-act')
501
+ async def multi_act(
502
+ self, actions: list[ActionModel], browser_context: BrowserContext
503
+ ) -> list[ActionResult]:
504
+ """Execute multiple actions"""
505
+ results = []
506
+
507
+ session = await browser_context.get_session()
508
+ cached_selector_map = session.cached_state.selector_map
509
+ cached_path_hashes = set(
510
+ e.hash.branch_path_hash for e in cached_selector_map.values())
511
+ await browser_context.remove_highlights()
512
+
513
+ for i, action in enumerate(actions):
514
+ if action.get_index() is not None and i != 0:
515
+ new_state = await browser_context.get_state()
516
+ new_path_hashes = set(
517
+ e.hash.branch_path_hash for e in new_state.selector_map.values()
518
+ )
519
+ if not new_path_hashes.issubset(cached_path_hashes):
520
+ # next action requires index but there are new elements on the page
521
+ logger.info(f'Something new appeared after action {
522
+ i} / {len(actions)}')
523
+ break
524
+
525
+ results.append(await self.act(action, browser_context))
526
+
527
+ logger.debug(f'Executed action {i + 1} / {len(actions)}')
528
+ if results[-1].is_done or results[-1].error or i == len(actions) - 1:
529
+ break
530
+
531
+ await asyncio.sleep(browser_context.config.wait_between_actions)
532
+ # hash all elements. if it is a subset of cached_state its fine - else break (new elements on page)
533
+
534
+ return results
535
+
536
+ @time_execution_sync('--act')
537
+ async def act(self, action: ActionModel, browser_context: BrowserContext) -> ActionResult:
538
+ """Execute an action"""
539
+ try:
540
+ for action_name, params in action.model_dump(exclude_unset=True).items():
541
+ if params is not None:
542
+ # remove highlights
543
+ result = await self.registry.execute_action(
544
+ action_name, params, browser=browser_context
545
+ )
546
+ if isinstance(result, str):
547
+ return ActionResult(extracted_content=result)
548
+ elif isinstance(result, ActionResult):
549
+ return result
550
+ elif result is None:
551
+ return ActionResult()
552
+ else:
553
+ raise ValueError(f'Invalid action result type: {
554
+ type(result)} of {result}')
555
+ return ActionResult()
556
+ except Exception as e:
557
+ raise e
@@ -0,0 +1,47 @@
1
+ from typing import Literal, Optional
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ # Action Input Models
7
+ class SearchGoogleAction(BaseModel):
8
+ query: str
9
+
10
+
11
+ class GoToUrlAction(BaseModel):
12
+ url: str
13
+
14
+
15
+ class ClickElementAction(BaseModel):
16
+ index: int
17
+ xpath: Optional[str] = None
18
+
19
+
20
+ class InputTextAction(BaseModel):
21
+ index: int
22
+ text: str
23
+ xpath: Optional[str] = None
24
+
25
+
26
+ class DoneAction(BaseModel):
27
+ text: str
28
+
29
+
30
+ class SwitchTabAction(BaseModel):
31
+ page_id: int
32
+
33
+
34
+ class OpenTabAction(BaseModel):
35
+ url: str
36
+
37
+
38
+ class ExtractPageContentAction(BaseModel):
39
+ value: Literal['text', 'markdown', 'html'] = 'text'
40
+
41
+
42
+ class ScrollAction(BaseModel):
43
+ amount: Optional[int] = None # The number of pixels to scroll. If None, scroll down/up one page
44
+
45
+
46
+ class SendKeysAction(BaseModel):
47
+ keys: str
File without changes