lumivor 0.1.7__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
lumivor/agent/tests.py ADDED
@@ -0,0 +1,204 @@
1
+ import pytest
2
+
3
+ from lumivor.agent.views import (
4
+ ActionResult,
5
+ AgentBrain,
6
+ AgentHistory,
7
+ AgentHistoryList,
8
+ AgentOutput,
9
+ )
10
+ from lumivor.browser.views import BrowserState, BrowserStateHistory, TabInfo
11
+ from lumivor.controller.registry.service import Registry
12
+ from lumivor.controller.views import ClickElementAction, DoneAction, ExtractPageContentAction
13
+ from lumivor.dom.views import DOMElementNode
14
+
15
+
16
+ @pytest.fixture
17
+ def sample_browser_state():
18
+ return BrowserState(
19
+ url='https://example.com',
20
+ title='Example Page',
21
+ tabs=[TabInfo(url='https://example.com',
22
+ title='Example Page', page_id=1)],
23
+ screenshot='screenshot1.png',
24
+ element_tree=DOMElementNode(
25
+ tag_name='root',
26
+ is_visible=True,
27
+ parent=None,
28
+ xpath='',
29
+ attributes={},
30
+ children=[],
31
+ ),
32
+ selector_map={},
33
+ )
34
+
35
+
36
+ @pytest.fixture
37
+ def action_registry():
38
+ registry = Registry()
39
+
40
+ # Register the actions we need for testing
41
+ @registry.action(
42
+ description='Click an element', requires_browser=True, param_model=ClickElementAction
43
+ )
44
+ def click_element(params: ClickElementAction, browser=None):
45
+ pass
46
+
47
+ @registry.action(
48
+ description='Extract page content',
49
+ requires_browser=True,
50
+ param_model=ExtractPageContentAction,
51
+ )
52
+ def extract_page_content(params: ExtractPageContentAction, browser=None):
53
+ pass
54
+
55
+ @registry.action(description='Mark task as done', param_model=DoneAction)
56
+ def done(params: DoneAction):
57
+ pass
58
+
59
+ # Create the dynamic ActionModel with all registered actions
60
+ return registry.create_action_model()
61
+
62
+
63
+ @pytest.fixture
64
+ def sample_history(action_registry):
65
+ # Create actions with nested params structure
66
+ click_action = action_registry(click_element={'index': 1})
67
+
68
+ extract_action = action_registry(extract_page_content={'value': 'text'})
69
+
70
+ done_action = action_registry(done={'text': 'Task completed'})
71
+
72
+ histories = [
73
+ AgentHistory(
74
+ model_output=AgentOutput(
75
+ current_state=AgentBrain(
76
+ evaluation_previous_goal='None', memory='Started task', next_goal='Click button'
77
+ ),
78
+ action=[click_action],
79
+ ),
80
+ result=[ActionResult(is_done=False)],
81
+ state=BrowserStateHistory(
82
+ url='https://example.com',
83
+ title='Page 1',
84
+ tabs=[TabInfo(url='https://example.com',
85
+ title='Page 1', page_id=1)],
86
+ screenshot='screenshot1.png',
87
+ interacted_element=[],
88
+ ),
89
+ ),
90
+ AgentHistory(
91
+ model_output=AgentOutput(
92
+ current_state=AgentBrain(
93
+ evaluation_previous_goal='Clicked button',
94
+ memory='Button clicked',
95
+ next_goal='Extract content',
96
+ ),
97
+ action=[extract_action],
98
+ ),
99
+ result=[
100
+ ActionResult(
101
+ is_done=False,
102
+ extracted_content='Extracted text',
103
+ error='Failed to extract completely',
104
+ )
105
+ ],
106
+ state=BrowserStateHistory(
107
+ url='https://example.com/page2',
108
+ title='Page 2',
109
+ tabs=[TabInfo(url='https://example.com/page2',
110
+ title='Page 2', page_id=2)],
111
+ screenshot='screenshot2.png',
112
+ interacted_element=[],
113
+ ),
114
+ ),
115
+ AgentHistory(
116
+ model_output=AgentOutput(
117
+ current_state=AgentBrain(
118
+ evaluation_previous_goal='Extracted content',
119
+ memory='Content extracted',
120
+ next_goal='Finish task',
121
+ ),
122
+ action=[done_action],
123
+ ),
124
+ result=[ActionResult(
125
+ is_done=True, extracted_content='Task completed', error=None)],
126
+ state=BrowserStateHistory(
127
+ url='https://example.com/page2',
128
+ title='Page 2',
129
+ tabs=[TabInfo(url='https://example.com/page2',
130
+ title='Page 2', page_id=2)],
131
+ screenshot='screenshot3.png',
132
+ interacted_element=[],
133
+ ),
134
+ ),
135
+ ]
136
+ return AgentHistoryList(history=histories)
137
+
138
+
139
+ def test_last_model_output(sample_history: AgentHistoryList):
140
+ last_output = sample_history.last_action()
141
+ print(last_output)
142
+ assert last_output == {'done': {'text': 'Task completed'}}
143
+
144
+
145
+ def test_get_errors(sample_history: AgentHistoryList):
146
+ errors = sample_history.errors()
147
+ assert len(errors) == 1
148
+ assert errors[0] == 'Failed to extract completely'
149
+
150
+
151
+ def test_final_result(sample_history: AgentHistoryList):
152
+ assert sample_history.final_result() == 'Task completed'
153
+
154
+
155
+ def test_is_done(sample_history: AgentHistoryList):
156
+ assert sample_history.is_done() == True
157
+
158
+
159
+ def test_urls(sample_history: AgentHistoryList):
160
+ urls = sample_history.urls()
161
+ assert 'https://example.com' in urls
162
+ assert 'https://example.com/page2' in urls
163
+
164
+
165
+ def test_all_screenshots(sample_history: AgentHistoryList):
166
+ screenshots = sample_history.screenshots()
167
+ assert len(screenshots) == 3
168
+ assert screenshots == ['screenshot1.png',
169
+ 'screenshot2.png', 'screenshot3.png']
170
+
171
+
172
+ def test_all_model_outputs(sample_history: AgentHistoryList):
173
+ outputs = sample_history.model_actions()
174
+ assert len(outputs) == 3
175
+ assert outputs[0] == {'click_element': {
176
+ 'index': 1, 'xpath': '//button[1]'}}
177
+ assert outputs[1] == {'extract_page_content': {'value': 'text'}}
178
+ assert outputs[2] == {'done': {'text': 'Task completed'}}
179
+
180
+
181
+ def test_all_model_outputs_filtered(sample_history: AgentHistoryList):
182
+ filtered = sample_history.model_actions_filtered(include=['click_element'])
183
+ assert len(filtered) == 1
184
+ assert filtered[0]['click_element']['index'] == 1
185
+
186
+
187
+ def test_empty_history():
188
+ empty_history = AgentHistoryList(history=[])
189
+ assert empty_history.last_action() is None
190
+ assert empty_history.final_result() is None
191
+ assert empty_history.is_done() == False
192
+ assert len(empty_history.urls()) == 0
193
+
194
+
195
+ # Add a test to verify action creation
196
+ def test_action_creation(action_registry):
197
+ click_action = action_registry(click_element={'index': 1})
198
+
199
+ assert click_action.model_dump(exclude_none=True) == {
200
+ 'click_element': {'index': 1}}
201
+
202
+
203
+ # run this with:
204
+ # pytest lumivor/agent/tests.py
lumivor/agent/views.py ADDED
@@ -0,0 +1,272 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import traceback
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Optional, Type
8
+
9
+ from openai import RateLimitError
10
+ from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model
11
+
12
+ from lumivor.browser.views import BrowserStateHistory
13
+ from lumivor.controller.registry.views import ActionModel
14
+ from lumivor.dom.history_tree_processor.service import (
15
+ DOMElementNode,
16
+ DOMHistoryElement,
17
+ HistoryTreeProcessor,
18
+ )
19
+ from lumivor.dom.views import SelectorMap
20
+
21
+
22
+ @dataclass
23
+ class AgentStepInfo:
24
+ step_number: int
25
+ max_steps: int
26
+
27
+
28
+ class ActionResult(BaseModel):
29
+ """Result of executing an action"""
30
+
31
+ is_done: Optional[bool] = False
32
+ extracted_content: Optional[str] = None
33
+ error: Optional[str] = None
34
+ # whether to include in past messages as context or not
35
+ include_in_memory: bool = False
36
+
37
+
38
+ class AgentBrain(BaseModel):
39
+ """Current state of the agent"""
40
+
41
+ evaluation_previous_goal: str
42
+ memory: str
43
+ next_goal: str
44
+
45
+
46
+ class AgentOutput(BaseModel):
47
+ """Output model for agent
48
+
49
+ @dev note: this model is extended with custom actions in AgentService. You can also use some fields that are not in this model as provided by the linter, as long as they are registered in the DynamicActions model.
50
+ """
51
+
52
+ model_config = ConfigDict(arbitrary_types_allowed=True)
53
+
54
+ current_state: AgentBrain
55
+ action: list[ActionModel]
56
+
57
+ @staticmethod
58
+ def type_with_custom_actions(custom_actions: Type[ActionModel]) -> Type['AgentOutput']:
59
+ """Extend actions with custom actions"""
60
+ return create_model(
61
+ 'AgentOutput',
62
+ __base__=AgentOutput,
63
+ # Properly annotated field with no default
64
+ action=(list[custom_actions], Field(...)),
65
+ __module__=AgentOutput.__module__,
66
+ )
67
+
68
+
69
+ class AgentHistory(BaseModel):
70
+ """History item for agent actions"""
71
+
72
+ model_output: AgentOutput | None
73
+ result: list[ActionResult]
74
+ state: BrowserStateHistory
75
+
76
+ model_config = ConfigDict(
77
+ arbitrary_types_allowed=True, protected_namespaces=())
78
+
79
+ @staticmethod
80
+ def get_interacted_element(
81
+ model_output: AgentOutput, selector_map: SelectorMap
82
+ ) -> list[DOMHistoryElement | None]:
83
+ elements = []
84
+ for action in model_output.action:
85
+ index = action.get_index()
86
+ if index and index in selector_map:
87
+ el: DOMElementNode = selector_map[index]
88
+ elements.append(
89
+ HistoryTreeProcessor.convert_dom_element_to_history_element(el))
90
+ else:
91
+ elements.append(None)
92
+ return elements
93
+
94
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
95
+ """Custom serialization handling circular references"""
96
+
97
+ # Handle action serialization
98
+ model_output_dump = None
99
+ if self.model_output:
100
+ action_dump = [
101
+ action.model_dump(exclude_none=True) for action in self.model_output.action
102
+ ]
103
+ model_output_dump = {
104
+ 'current_state': self.model_output.current_state.model_dump(),
105
+ 'action': action_dump, # This preserves the actual action data
106
+ }
107
+
108
+ return {
109
+ 'model_output': model_output_dump,
110
+ 'result': [r.model_dump(exclude_none=True) for r in self.result],
111
+ 'state': self.state.to_dict(),
112
+ }
113
+
114
+
115
+ class AgentHistoryList(BaseModel):
116
+ """List of agent history items"""
117
+
118
+ history: list[AgentHistory]
119
+
120
+ def __str__(self) -> str:
121
+ """Representation of the AgentHistoryList object"""
122
+ return f'AgentHistoryList(all_results={self.action_results()}, all_model_outputs={self.model_actions()})'
123
+
124
+ def __repr__(self) -> str:
125
+ """Representation of the AgentHistoryList object"""
126
+ return self.__str__()
127
+
128
+ def save_to_file(self, filepath: str | Path) -> None:
129
+ """Save history to JSON file with proper serialization"""
130
+ try:
131
+ Path(filepath).parent.mkdir(parents=True, exist_ok=True)
132
+ data = self.model_dump()
133
+ with open(filepath, 'w', encoding='utf-8') as f:
134
+ json.dump(data, f, indent=2)
135
+ except Exception as e:
136
+ raise e
137
+
138
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
139
+ """Custom serialization that properly uses AgentHistory's model_dump"""
140
+ return {
141
+ 'history': [h.model_dump(**kwargs) for h in self.history],
142
+ }
143
+
144
+ @classmethod
145
+ def load_from_file(
146
+ cls, filepath: str | Path, output_model: Type[AgentOutput]
147
+ ) -> 'AgentHistoryList':
148
+ """Load history from JSON file"""
149
+ with open(filepath, 'r', encoding='utf-8') as f:
150
+ data = json.load(f)
151
+ # loop through history and validate output_model actions to enrich with custom actions
152
+ for h in data['history']:
153
+ if h['model_output']:
154
+ if isinstance(h['model_output'], dict):
155
+ h['model_output'] = output_model.model_validate(
156
+ h['model_output'])
157
+ else:
158
+ h['model_output'] = None
159
+ if 'interacted_element' not in h['state']:
160
+ h['state']['interacted_element'] = None
161
+ history = cls.model_validate(data)
162
+ return history
163
+
164
+ def last_action(self) -> None | dict:
165
+ """Last action in history"""
166
+ if self.history and self.history[-1].model_output:
167
+ return self.history[-1].model_output.action[-1].model_dump(exclude_none=True)
168
+ return None
169
+
170
+ def errors(self) -> list[str]:
171
+ """Get all errors from history"""
172
+ errors = []
173
+ for h in self.history:
174
+ errors.extend([r.error for r in h.result if r.error])
175
+ return errors
176
+
177
+ def final_result(self) -> None | str:
178
+ """Final result from history"""
179
+ if self.history and self.history[-1].result[-1].extracted_content:
180
+ return self.history[-1].result[-1].extracted_content
181
+ return None
182
+
183
+ def is_done(self) -> bool:
184
+ """Check if the agent is done"""
185
+ if (
186
+ self.history
187
+ and len(self.history[-1].result) > 0
188
+ and self.history[-1].result[-1].is_done
189
+ ):
190
+ return self.history[-1].result[-1].is_done
191
+ return False
192
+
193
+ def has_errors(self) -> bool:
194
+ """Check if the agent has any errors"""
195
+ return len(self.errors()) > 0
196
+
197
+ def urls(self) -> list[str]:
198
+ """Get all unique URLs from history"""
199
+ return [h.state.url for h in self.history if h.state.url]
200
+
201
+ def screenshots(self) -> list[str]:
202
+ """Get all screenshots from history"""
203
+ return [h.state.screenshot for h in self.history if h.state.screenshot]
204
+
205
+ def action_names(self) -> list[str]:
206
+ """Get all action names from history"""
207
+ return [list(action.keys())[0] for action in self.model_actions()]
208
+
209
+ def model_thoughts(self) -> list[AgentBrain]:
210
+ """Get all thoughts from history"""
211
+ return [h.model_output.current_state for h in self.history if h.model_output]
212
+
213
+ def model_outputs(self) -> list[AgentOutput]:
214
+ """Get all model outputs from history"""
215
+ return [h.model_output for h in self.history if h.model_output]
216
+
217
+ # get all actions with params
218
+ def model_actions(self) -> list[dict]:
219
+ """Get all actions from history"""
220
+ outputs = []
221
+
222
+ for h in self.history:
223
+ if h.model_output:
224
+ for action in h.model_output.action:
225
+ output = action.model_dump(exclude_none=True)
226
+ outputs.append(output)
227
+ return outputs
228
+
229
+ def action_results(self) -> list[ActionResult]:
230
+ """Get all results from history"""
231
+ results = []
232
+ for h in self.history:
233
+ results.extend([r for r in h.result if r])
234
+ return results
235
+
236
+ def extracted_content(self) -> list[str]:
237
+ """Get all extracted content from history"""
238
+ content = []
239
+ for h in self.history:
240
+ content.extend(
241
+ [r.extracted_content for r in h.result if r.extracted_content])
242
+ return content
243
+
244
+ def model_actions_filtered(self, include: list[str] = []) -> list[dict]:
245
+ """Get all model actions from history as JSON"""
246
+ outputs = self.model_actions()
247
+ result = []
248
+ for o in outputs:
249
+ for i in include:
250
+ if i == list(o.keys())[0]:
251
+ result.append(o)
252
+ return result
253
+
254
+
255
+ class AgentError:
256
+ """Container for agent error handling"""
257
+
258
+ VALIDATION_ERROR = 'Invalid model output format. Please follow the correct schema.'
259
+ RATE_LIMIT_ERROR = 'Rate limit reached. Waiting before retry.'
260
+ NO_VALID_ACTION = 'No valid action found'
261
+
262
+ @staticmethod
263
+ def format_error(error: Exception, include_trace: bool = False) -> str:
264
+ """Format error message based on error type and optionally include trace"""
265
+ message = ''
266
+ if isinstance(error, ValidationError):
267
+ return f'{AgentError.VALIDATION_ERROR}\nDetails: {str(error)}'
268
+ if isinstance(error, RateLimitError):
269
+ return AgentError.RATE_LIMIT_ERROR
270
+ if include_trace:
271
+ return f'{str(error)}\nStacktrace:\n{traceback.format_exc()}'
272
+ return f'{str(error)}'
@@ -0,0 +1,208 @@
1
+ """
2
+ Playwright browser on steroids.
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+
9
+ from playwright._impl._api_structures import ProxySettings
10
+ from playwright.async_api import Browser as PlaywrightBrowser
11
+ from playwright.async_api import (
12
+ Playwright,
13
+ async_playwright,
14
+ )
15
+
16
+ from lumivor.browser.context import BrowserContext, BrowserContextConfig
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class BrowserConfig:
23
+ """
24
+ Configuration for the Browser.
25
+
26
+ Default values:
27
+ headless: True
28
+ Whether to run browser in headless mode
29
+
30
+ disable_security: False
31
+ Disable browser security features
32
+
33
+ extra_chromium_args: []
34
+ Extra arguments to pass to the browser
35
+
36
+ wss_url: None
37
+ Connect to a browser instance via WebSocket
38
+
39
+ chrome_instance_path: None
40
+ Path to a Chrome instance to use to connect to your normal browser
41
+ e.g. '/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome'
42
+ """
43
+
44
+ headless: bool = False
45
+ disable_security: bool = True
46
+ extra_chromium_args: list[str] = field(default_factory=list)
47
+ chrome_instance_path: str | None = None
48
+ wss_url: str | None = None
49
+
50
+ proxy: ProxySettings | None = field(default=None)
51
+ new_context_config: BrowserContextConfig = field(
52
+ default_factory=BrowserContextConfig)
53
+
54
+
55
+ # @singleton: TODO - think about id singleton makes sense here
56
+ # @dev By default this is a singleton, but you can create multiple instances if you need to.
57
+ class Browser:
58
+ """
59
+ Playwright browser on steroids.
60
+
61
+ This is persistant browser factory that can spawn multiple browser contexts.
62
+ It is recommended to use only one instance of Browser per your application (RAM usage will grow otherwise).
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ config: BrowserConfig = BrowserConfig(),
68
+ ):
69
+ logger.debug('Initializing new browser')
70
+ self.config = config
71
+ self.playwright: Playwright | None = None
72
+ self.playwright_browser: PlaywrightBrowser | None = None
73
+
74
+ async def new_context(
75
+ self, config: BrowserContextConfig = BrowserContextConfig()
76
+ ) -> BrowserContext:
77
+ """Create a browser context"""
78
+ return BrowserContext(config=config, browser=self)
79
+
80
+ async def get_playwright_browser(self) -> PlaywrightBrowser:
81
+ """Get a browser context"""
82
+ if self.playwright_browser is None:
83
+ return await self._init()
84
+
85
+ return self.playwright_browser
86
+
87
+ async def _init(self):
88
+ """Initialize the browser session"""
89
+ playwright = await async_playwright().start()
90
+ browser = await self._setup_browser(playwright)
91
+
92
+ self.playwright = playwright
93
+ self.playwright_browser = browser
94
+
95
+ return self.playwright_browser
96
+
97
+ async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
98
+ """Sets up and returns a Playwright Browser instance with anti-detection measures."""
99
+ if self.config.wss_url:
100
+ browser = await playwright.chromium.connect(self.config.wss_url)
101
+ return browser
102
+ elif self.config.chrome_instance_path:
103
+ import subprocess
104
+
105
+ import requests
106
+
107
+ try:
108
+ # Check if browser is already running
109
+ response = requests.get(
110
+ 'http://localhost:9222/json/version', timeout=2)
111
+ if response.status_code == 200:
112
+ logger.info('Reusing existing Chrome instance')
113
+ browser = await playwright.chromium.connect_over_cdp(
114
+ endpoint_url='http://localhost:9222',
115
+ timeout=20000, # 20 second timeout for connection
116
+ )
117
+ return browser
118
+ except requests.ConnectionError:
119
+ logger.debug(
120
+ 'No existing Chrome instance found, starting a new one')
121
+
122
+ # Start a new Chrome instance
123
+ subprocess.Popen(
124
+ [
125
+ self.config.chrome_instance_path,
126
+ '--remote-debugging-port=9222',
127
+ ],
128
+ stdout=subprocess.DEVNULL,
129
+ stderr=subprocess.DEVNULL,
130
+ )
131
+
132
+ # Attempt to connect again after starting a new instance
133
+ try:
134
+ browser = await playwright.chromium.connect_over_cdp(
135
+ endpoint_url='http://localhost:9222',
136
+ timeout=20000, # 20 second timeout for connection
137
+ )
138
+ return browser
139
+ except Exception as e:
140
+ logger.error(
141
+ f'Failed to start a new Chrome instance.: {str(e)}')
142
+ raise RuntimeError(
143
+ ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
144
+ )
145
+
146
+ else:
147
+ try:
148
+ disable_security_args = []
149
+ if self.config.disable_security:
150
+ disable_security_args = [
151
+ '--disable-web-security',
152
+ '--disable-site-isolation-trials',
153
+ '--disable-features=IsolateOrigins,site-per-process',
154
+ ]
155
+
156
+ browser = await playwright.chromium.launch(
157
+ headless=self.config.headless,
158
+ args=[
159
+ '--no-sandbox',
160
+ '--disable-blink-features=AutomationControlled',
161
+ '--disable-infobars',
162
+ '--disable-background-timer-throttling',
163
+ '--disable-popup-blocking',
164
+ '--disable-backgrounding-occluded-windows',
165
+ '--disable-renderer-backgrounding',
166
+ '--disable-window-activation',
167
+ '--disable-focus-on-load',
168
+ '--no-first-run',
169
+ '--no-default-browser-check',
170
+ '--no-startup-window',
171
+ '--window-position=0,0',
172
+ # '--window-size=1280,1000',
173
+ ]
174
+ + disable_security_args
175
+ + self.config.extra_chromium_args,
176
+ proxy=self.config.proxy,
177
+ )
178
+
179
+ return browser
180
+ except Exception as e:
181
+ logger.error(
182
+ f'Failed to initialize Playwright browser: {str(e)}')
183
+ raise
184
+
185
+ async def close(self):
186
+ """Close the browser instance"""
187
+ try:
188
+ if self.playwright_browser:
189
+ await self.playwright_browser.close()
190
+ if self.playwright:
191
+ await self.playwright.stop()
192
+ except Exception as e:
193
+ logger.debug(f'Failed to close browser properly: {e}')
194
+ finally:
195
+ self.playwright_browser = None
196
+ self.playwright = None
197
+
198
+ def __del__(self):
199
+ """Async cleanup when object is destroyed"""
200
+ try:
201
+ if self.playwright_browser or self.playwright:
202
+ loop = asyncio.get_running_loop()
203
+ if loop.is_running():
204
+ loop.create_task(self.close())
205
+ else:
206
+ asyncio.run(self.close())
207
+ except Exception as e:
208
+ logger.debug(f'Failed to cleanup browser in destructor: {e}')