lumivor 0.1.7__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
lumivor/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # Codebase Structure
2
+
3
+ > The code structure inspired by https://github.com/Netflix/dispatch.
4
+
5
+ Very good structure on how to make a scalable codebase is also in [this repo](https://github.com/zhanymkanov/fastapi-best-practices).
6
+
7
+ Just a brief document about how we should structure our backend codebase.
8
+
9
+ ## Code Structure
10
+
11
+ ```markdown
12
+ src/
13
+ /<service name>/
14
+ models.py
15
+ services.py
16
+ prompts.py
17
+ views.py
18
+ utils.py
19
+ routers.py
20
+
21
+ /_<subservice name>/
22
+ ```
23
+
24
+ ### Service.py
25
+
26
+ Always a single file, except if it becomes too long - more than ~500 lines, split it into \_subservices
27
+
28
+ ### Views.py
29
+
30
+ Always split the views into two parts
31
+
32
+ ```python
33
+ # All
34
+ ...
35
+
36
+ # Requests
37
+ ...
38
+
39
+ # Responses
40
+ ...
41
+ ```
42
+
43
+ If too long → split into multiple files
44
+
45
+ ### Prompts.py
46
+
47
+ Single file; if too long → split into multiple files (one prompt per file or so)
48
+
49
+ ### Routers.py
50
+
51
+ Never split into more than one file
lumivor/__init__.py ADDED
@@ -0,0 +1,25 @@
1
+ from lumivor.dom.service import DomService as DomService
2
+ from lumivor.controller.service import Controller as Controller
3
+ from lumivor.browser.browser import BrowserConfig as BrowserConfig
4
+ from lumivor.browser.browser import Browser as Browser
5
+ from lumivor.agent.views import AgentHistoryList as AgentHistoryList
6
+ from lumivor.agent.views import ActionResult as ActionResult
7
+ from lumivor.agent.views import ActionModel as ActionModel
8
+ from lumivor.agent.service import Agent as Agent
9
+ from lumivor.agent.prompts import SystemPrompt as SystemPrompt
10
+ from lumivor.logging_config import setup_logging
11
+
12
+ setup_logging()
13
+
14
+
15
+ __all__ = [
16
+ 'Agent',
17
+ 'Browser',
18
+ 'BrowserConfig',
19
+ 'Controller',
20
+ 'DomService',
21
+ 'SystemPrompt',
22
+ 'ActionResult',
23
+ 'ActionModel',
24
+ 'AgentHistoryList',
25
+ ]
@@ -0,0 +1,252 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import List, Optional, Type
6
+
7
+ from langchain_anthropic import ChatAnthropic
8
+ from langchain_core.language_models import BaseChatModel
9
+ from langchain_core.messages import (
10
+ AIMessage,
11
+ BaseMessage,
12
+ HumanMessage,
13
+ )
14
+ from langchain_openai import ChatOpenAI
15
+
16
+ from lumivor.agent.message_manager.views import MessageHistory, MessageMetadata
17
+ from lumivor.agent.prompts import AgentMessagePrompt, SystemPrompt
18
+ from lumivor.agent.views import ActionResult, AgentOutput, AgentStepInfo
19
+ from lumivor.browser.views import BrowserState
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class MessageManager:
25
+ def __init__(
26
+ self,
27
+ llm: BaseChatModel,
28
+ task: str,
29
+ action_descriptions: str,
30
+ system_prompt_class: Type[SystemPrompt],
31
+ max_input_tokens: int = 128000,
32
+ estimated_tokens_per_character: int = 3,
33
+ image_tokens: int = 800,
34
+ include_attributes: list[str] = [],
35
+ max_error_length: int = 400,
36
+ max_actions_per_step: int = 10,
37
+ tool_call_in_content: bool = True,
38
+ ):
39
+ self.llm = llm
40
+ self.system_prompt_class = system_prompt_class
41
+ self.max_input_tokens = max_input_tokens
42
+ self.history = MessageHistory()
43
+ self.task = task
44
+ self.action_descriptions = action_descriptions
45
+ self.ESTIMATED_TOKENS_PER_CHARACTER = estimated_tokens_per_character
46
+ self.IMG_TOKENS = image_tokens
47
+ self.include_attributes = include_attributes
48
+ self.max_error_length = max_error_length
49
+
50
+ system_message = self.system_prompt_class(
51
+ self.action_descriptions,
52
+ current_date=datetime.now(),
53
+ max_actions_per_step=max_actions_per_step,
54
+ ).get_system_message()
55
+
56
+ self._add_message_with_tokens(system_message)
57
+ self.system_prompt = system_message
58
+ self.tool_call_in_content = tool_call_in_content
59
+ tool_calls = [
60
+ {
61
+ 'name': 'AgentOutput',
62
+ 'args': {
63
+ 'current_state': {
64
+ 'evaluation_previous_goal': 'Unknown - No previous actions to evaluate.',
65
+ 'memory': '',
66
+ 'next_goal': 'Obtain task from user',
67
+ },
68
+ 'action': [],
69
+ },
70
+ 'id': '',
71
+ 'type': 'tool_call',
72
+ }
73
+ ]
74
+ if self.tool_call_in_content:
75
+ # openai throws error if tool_calls are not responded -> move to content
76
+ example_tool_call = AIMessage(
77
+ content=f'{tool_calls}',
78
+ tool_calls=[],
79
+ )
80
+ else:
81
+ example_tool_call = AIMessage(
82
+ content=f'',
83
+ tool_calls=tool_calls,
84
+ )
85
+
86
+ self._add_message_with_tokens(example_tool_call)
87
+
88
+ task_message = HumanMessage(content=f'Your task is: {task}')
89
+ self._add_message_with_tokens(task_message)
90
+
91
+ def add_state_message(
92
+ self,
93
+ state: BrowserState,
94
+ result: Optional[List[ActionResult]] = None,
95
+ step_info: Optional[AgentStepInfo] = None,
96
+ ) -> None:
97
+ """Add browser state as human message"""
98
+
99
+ # if keep in memory, add to directly to history and add state without result
100
+ if result:
101
+ for r in result:
102
+ if r.include_in_memory:
103
+ if r.extracted_content:
104
+ msg = HumanMessage(
105
+ content='Action result: ' + str(r.extracted_content))
106
+ self._add_message_with_tokens(msg)
107
+ if r.error:
108
+ msg = HumanMessage(
109
+ content='Action error: ' +
110
+ str(r.error)[-self.max_error_length:]
111
+ )
112
+ self._add_message_with_tokens(msg)
113
+ result = None # if result in history, we dont want to add it again
114
+
115
+ # otherwise add state message and result to next message (which will not stay in memory)
116
+ state_message = AgentMessagePrompt(
117
+ state,
118
+ result,
119
+ include_attributes=self.include_attributes,
120
+ max_error_length=self.max_error_length,
121
+ step_info=step_info,
122
+ ).get_user_message()
123
+ self._add_message_with_tokens(state_message)
124
+
125
+ def _remove_last_state_message(self) -> None:
126
+ """Remove last state message from history"""
127
+ if len(self.history.messages) > 2 and isinstance(
128
+ self.history.messages[-1].message, HumanMessage
129
+ ):
130
+ self.history.remove_message()
131
+
132
+ def add_model_output(self, model_output: AgentOutput) -> None:
133
+ """Add model output as AI message"""
134
+ tool_calls = [
135
+ {
136
+ 'name': 'AgentOutput',
137
+ 'args': model_output.model_dump(mode='json', exclude_unset=True),
138
+ 'id': '',
139
+ 'type': 'tool_call',
140
+ }
141
+ ]
142
+ if self.tool_call_in_content:
143
+ msg = AIMessage(
144
+ content=f'{tool_calls}',
145
+ tool_calls=[],
146
+ )
147
+ else:
148
+ msg = AIMessage(
149
+ content='',
150
+ tool_calls=tool_calls,
151
+ )
152
+
153
+ self._add_message_with_tokens(msg)
154
+
155
+ def get_messages(self) -> List[BaseMessage]:
156
+ """Get current message list, potentially trimmed to max tokens"""
157
+ self.cut_messages()
158
+ return [m.message for m in self.history.messages]
159
+
160
+ def cut_messages(self):
161
+ """Get current message list, potentially trimmed to max tokens"""
162
+ diff = self.history.total_tokens - self.max_input_tokens
163
+ if diff <= 0:
164
+ return None
165
+
166
+ msg = self.history.messages[-1]
167
+
168
+ # if list with image remove image
169
+ if isinstance(msg.message.content, list):
170
+ text = ''
171
+ for item in msg.message.content:
172
+ if 'image_url' in item:
173
+ msg.message.content.remove(item)
174
+ diff -= self.IMG_TOKENS
175
+ msg.metadata.input_tokens -= self.IMG_TOKENS
176
+ self.history.total_tokens -= self.IMG_TOKENS
177
+ logger.debug(
178
+ f'Removed image with {self.IMG_TOKENS} tokens - total tokens now: {
179
+ self.history.total_tokens}/{self.max_input_tokens}'
180
+ )
181
+ elif 'text' in item and isinstance(item, dict):
182
+ text += item['text']
183
+ msg.message.content = text
184
+ self.history.messages[-1] = msg
185
+
186
+ if diff <= 0:
187
+ return None
188
+
189
+ # if still over, remove text from state message proportionally to the number of tokens needed with buffer
190
+ # Calculate the proportion of content to remove
191
+ proportion_to_remove = diff / msg.metadata.input_tokens
192
+ if proportion_to_remove > 0.99:
193
+ raise ValueError(
194
+ f'Max token limit reached - history is too long - reduce the system prompt or task less tasks or remove old messages. '
195
+ f'proportion_to_remove: {proportion_to_remove}'
196
+ )
197
+ logger.debug(
198
+ f'Removing {proportion_to_remove * 100:.2f}% of the last message {proportion_to_remove *
199
+ msg.metadata.input_tokens:.2f} / {msg.metadata.input_tokens:.2f} tokens)'
200
+ )
201
+
202
+ content = msg.message.content
203
+ characters_to_remove = int(len(content) * proportion_to_remove)
204
+ content = content[:-characters_to_remove]
205
+
206
+ # remove tokens and old long message
207
+ self.history.remove_message(index=-1)
208
+
209
+ # new message with updated content
210
+ msg = HumanMessage(content=content)
211
+ self._add_message_with_tokens(msg)
212
+
213
+ last_msg = self.history.messages[-1]
214
+
215
+ logger.debug(
216
+ f'Added message with {last_msg.metadata.input_tokens} tokens - total tokens now: {
217
+ self.history.total_tokens}/{self.max_input_tokens} - total messages: {len(self.history.messages)}'
218
+ )
219
+
220
+ def _add_message_with_tokens(self, message: BaseMessage) -> None:
221
+ """Add message with token count metadata"""
222
+ token_count = self._count_tokens(message)
223
+ metadata = MessageMetadata(input_tokens=token_count)
224
+ self.history.add_message(message, metadata)
225
+
226
+ def _count_tokens(self, message: BaseMessage) -> int:
227
+ """Count tokens in a message using the model's tokenizer"""
228
+ tokens = 0
229
+ if isinstance(message.content, list):
230
+ for item in message.content:
231
+ if 'image_url' in item:
232
+ tokens += self.IMG_TOKENS
233
+ elif isinstance(item, dict) and 'text' in item:
234
+ tokens += self._count_text_tokens(item['text'])
235
+ else:
236
+ tokens += self._count_text_tokens(message.content)
237
+ return tokens
238
+
239
+ def _count_text_tokens(self, text: str) -> int:
240
+ """Count tokens in a text string"""
241
+ if isinstance(self.llm, (ChatOpenAI, ChatAnthropic)):
242
+ try:
243
+ tokens = self.llm.get_num_tokens(text)
244
+ except Exception:
245
+ tokens = (
246
+ len(text) // self.ESTIMATED_TOKENS_PER_CHARACTER
247
+ ) # Rough estimate if no tokenizer available
248
+ else:
249
+ tokens = (
250
+ len(text) // self.ESTIMATED_TOKENS_PER_CHARACTER
251
+ ) # Rough estimate if no tokenizer available
252
+ return tokens
@@ -0,0 +1,246 @@
1
+ import pytest
2
+ from langchain_anthropic import ChatAnthropic
3
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
4
+ from langchain_openai import AzureChatOpenAI, ChatOpenAI
5
+
6
+ from lumivor.agent.message_manager.service import MessageManager
7
+ from lumivor.agent.prompts import SystemPrompt
8
+ from lumivor.agent.views import ActionResult
9
+ from lumivor.browser.views import BrowserState, TabInfo
10
+ from lumivor.dom.views import DOMElementNode, DOMTextNode
11
+
12
+
13
+ @pytest.fixture(
14
+ params=[
15
+ ChatOpenAI(model='gpt-4o-mini'),
16
+ AzureChatOpenAI(model='gpt-4o', api_version='2024-02-15-preview'),
17
+ ChatAnthropic(
18
+ model_name='claude-3-5-sonnet-20240620', timeout=100, temperature=0.0, stop=None
19
+ ),
20
+ ],
21
+ ids=['gpt-4o-mini', 'gpt-4o', 'claude-3-5-sonnet'],
22
+ )
23
+ def message_manager(request: pytest.FixtureRequest):
24
+ llm = request.param
25
+ task = 'Test task'
26
+ action_descriptions = 'Test actions'
27
+ return MessageManager(
28
+ llm=llm,
29
+ task=task,
30
+ action_descriptions=action_descriptions,
31
+ system_prompt_class=SystemPrompt,
32
+ max_input_tokens=1000,
33
+ estimated_tokens_per_character=3,
34
+ image_tokens=800,
35
+ )
36
+
37
+
38
+ def test_initial_messages(message_manager: MessageManager):
39
+ """Test that message manager initializes with system and task messages"""
40
+ messages = message_manager.get_messages()
41
+ assert len(messages) == 2
42
+ assert isinstance(messages[0], SystemMessage)
43
+ assert isinstance(messages[1], HumanMessage)
44
+ assert 'Test task' in messages[1].content
45
+
46
+
47
+ def test_add_state_message(message_manager: MessageManager):
48
+ """Test adding browser state message"""
49
+ state = BrowserState(
50
+ url='https://test.com',
51
+ title='Test Page',
52
+ element_tree=DOMElementNode(
53
+ tag_name='div',
54
+ attributes={},
55
+ children=[],
56
+ is_visible=True,
57
+ parent=None,
58
+ xpath='//div',
59
+ ),
60
+ selector_map={},
61
+ tabs=[TabInfo(page_id=1, url='https://test.com', title='Test Page')],
62
+ )
63
+ message_manager.add_state_message(state)
64
+
65
+ messages = message_manager.get_messages()
66
+ assert len(messages) == 3
67
+ assert isinstance(messages[2], HumanMessage)
68
+ assert 'https://test.com' in messages[2].content
69
+
70
+
71
+ def test_add_state_with_memory_result(message_manager: MessageManager):
72
+ """Test adding state with result that should be included in memory"""
73
+ state = BrowserState(
74
+ url='https://test.com',
75
+ title='Test Page',
76
+ element_tree=DOMElementNode(
77
+ tag_name='div',
78
+ attributes={},
79
+ children=[],
80
+ is_visible=True,
81
+ parent=None,
82
+ xpath='//div',
83
+ ),
84
+ selector_map={},
85
+ tabs=[TabInfo(page_id=1, url='https://test.com', title='Test Page')],
86
+ )
87
+ result = ActionResult(
88
+ extracted_content='Important content', include_in_memory=True)
89
+
90
+ message_manager.add_state_message(state, result)
91
+ messages = message_manager.get_messages()
92
+
93
+ # Should have system, task, extracted content, and state messages
94
+ assert len(messages) == 4
95
+ assert 'Important content' in messages[2].content
96
+ assert isinstance(messages[2], HumanMessage)
97
+ assert isinstance(messages[3], HumanMessage)
98
+ assert 'Important content' not in messages[3].content
99
+
100
+
101
+ def test_add_state_with_non_memory_result(message_manager: MessageManager):
102
+ """Test adding state with result that should not be included in memory"""
103
+ state = BrowserState(
104
+ url='https://test.com',
105
+ title='Test Page',
106
+ element_tree=DOMElementNode(
107
+ tag_name='div',
108
+ attributes={},
109
+ children=[],
110
+ is_visible=True,
111
+ parent=None,
112
+ xpath='//div',
113
+ ),
114
+ selector_map={},
115
+ tabs=[TabInfo(page_id=1, url='https://test.com', title='Test Page')],
116
+ )
117
+ result = ActionResult(
118
+ extracted_content='Temporary content', include_in_memory=False)
119
+
120
+ message_manager.add_state_message(state, result)
121
+ messages = message_manager.get_messages()
122
+
123
+ # Should have system, task, and combined state+result message
124
+ assert len(messages) == 3
125
+ assert 'Temporary content' in messages[2].content
126
+ assert isinstance(messages[2], HumanMessage)
127
+
128
+
129
+ @pytest.mark.skip('not sure how to fix this')
130
+ @pytest.mark.parametrize('max_tokens', [100000, 10000, 5000])
131
+ def test_token_overflow_handling_with_real_flow(message_manager: MessageManager, max_tokens):
132
+ """Test handling of token overflow in a realistic message flow"""
133
+ # Set more realistic token limit
134
+ message_manager.max_input_tokens = max_tokens
135
+
136
+ # Create a long sequence of interactions
137
+ for i in range(200): # Simulate 40 steps of interaction
138
+ # Create state with varying content length
139
+ state = BrowserState(
140
+ url=f'https://test{i}.com',
141
+ title=f'Test Page {i}',
142
+ element_tree=DOMElementNode(
143
+ tag_name='div',
144
+ attributes={},
145
+ children=[
146
+ DOMTextNode(
147
+ # Increasing content length
148
+ text=f'Content {j} ' * (10 + i),
149
+ is_visible=True,
150
+ parent=None,
151
+ )
152
+ for j in range(5) # Multiple DOM items
153
+ ],
154
+ is_visible=True,
155
+ parent=None,
156
+ xpath='//div',
157
+ ),
158
+ selector_map={j: f'//div[{j}]' for j in range(5)},
159
+ tabs=[
160
+ TabInfo(page_id=1, url=f'https://test{i}.com', title=f'Test Page {i}')],
161
+ )
162
+
163
+ # Alternate between different types of results
164
+ result = None
165
+ if i % 2 == 0: # Every other iteration
166
+ result = ActionResult(
167
+ extracted_content=f'Important content from step {i}' * 5,
168
+ include_in_memory=i % 4 == 0, # Include in memory every 4th message
169
+ )
170
+
171
+ # Add state message
172
+ message_manager.add_state_message(state, result)
173
+
174
+ try:
175
+ messages = message_manager.get_messages()
176
+ except ValueError as e:
177
+ if 'Max token limit reached - history is too long' in str(e):
178
+ return # If error occurs, end the test
179
+ else:
180
+ raise e
181
+
182
+ assert message_manager.history.total_tokens <= message_manager.max_input_tokens + 100
183
+
184
+ last_msg = messages[-1]
185
+ assert isinstance(last_msg, HumanMessage)
186
+
187
+ if i % 4 == 0:
188
+ assert isinstance(
189
+ message_manager.history.messages[-2].message, HumanMessage)
190
+ if i % 2 == 0 and not i % 4 == 0:
191
+ if isinstance(last_msg.content, list):
192
+ assert 'Current url: https://test' in last_msg.content[0]['text']
193
+ else:
194
+ assert 'Current url: https://test' in last_msg.content
195
+
196
+ # Add model output every time
197
+ from lumivor.agent.views import AgentBrain, AgentOutput
198
+ from lumivor.controller.registry.views import ActionModel
199
+
200
+ output = AgentOutput(
201
+ current_state=AgentBrain(
202
+ evaluation_previous_goal=f'Success in step {i}',
203
+ memory=f'Memory from step {i}',
204
+ next_goal=f'Goal for step {i+1}',
205
+ ),
206
+ action=[ActionModel()],
207
+ )
208
+ message_manager._remove_last_state_message()
209
+ message_manager.add_model_output(output)
210
+
211
+ # Get messages and verify after each addition
212
+ messages = [m.message for m in message_manager.history.messages]
213
+
214
+ # Verify token limit is respected
215
+
216
+ # Verify essential messages are preserved
217
+ # System prompt always first
218
+ assert isinstance(messages[0], SystemMessage)
219
+ assert isinstance(messages[1], HumanMessage) # Task always second
220
+ assert 'Test task' in messages[1].content
221
+
222
+ # Verify structure of latest messages
223
+ # Last message should be model output
224
+ assert isinstance(messages[-1], AIMessage)
225
+ # Should contain current step info
226
+ assert f'step {i}' in messages[-1].content
227
+
228
+ # Log token usage for debugging
229
+ token_usage = message_manager.history.total_tokens
230
+ token_limit = message_manager.max_input_tokens
231
+ # print(f'Step {i}: Using {token_usage}/{token_limit} tokens')
232
+
233
+ # go through all messages and verify that the token count and total tokens is correct
234
+ total_tokens = 0
235
+ real_tokens = []
236
+ stored_tokens = []
237
+ for msg in message_manager.history.messages:
238
+ total_tokens += msg.metadata.input_tokens
239
+ stored_tokens.append(msg.metadata.input_tokens)
240
+ real_tokens.append(message_manager._count_tokens(msg.message))
241
+ assert total_tokens == sum(real_tokens)
242
+ assert stored_tokens == real_tokens
243
+ assert message_manager.history.total_tokens == total_tokens
244
+
245
+
246
+ # pytest -s lumivor/agent/message_manager/tests.py
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Optional
4
+
5
+ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class MessageMetadata(BaseModel):
10
+ """Metadata for a message including token counts"""
11
+
12
+ input_tokens: int = 0
13
+
14
+
15
+ class ManagedMessage(BaseModel):
16
+ """A message with its metadata"""
17
+
18
+ message: BaseMessage
19
+ metadata: MessageMetadata = Field(default_factory=MessageMetadata)
20
+
21
+
22
+ class MessageHistory(BaseModel):
23
+ """Container for message history with metadata"""
24
+
25
+ messages: List[ManagedMessage] = Field(default_factory=list)
26
+ total_tokens: int = 0
27
+
28
+ def add_message(self, message: BaseMessage, metadata: MessageMetadata) -> None:
29
+ """Add a message with metadata"""
30
+ self.messages.append(ManagedMessage(message=message, metadata=metadata))
31
+ self.total_tokens += metadata.input_tokens
32
+
33
+ def remove_message(self, index: int = -1) -> None:
34
+ """Remove last message from history"""
35
+ if self.messages:
36
+ msg = self.messages.pop(index)
37
+ self.total_tokens -= msg.metadata.input_tokens