browser-use-bridge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use_bridge/__init__.py +22 -0
- browser_use_bridge/__main__.py +5 -0
- browser_use_bridge/agent/__init__.py +12 -0
- browser_use_bridge/agent/controller.py +202 -0
- browser_use_bridge/agent/message_manager/__init__.py +254 -0
- browser_use_bridge/agent/planner.py +115 -0
- browser_use_bridge/agent/retry.py +193 -0
- browser_use_bridge/agent/service.py +259 -0
- browser_use_bridge/agent/views.py +108 -0
- browser_use_bridge/browser/__init__.py +30 -0
- browser_use_bridge/browser/events.py +66 -0
- browser_use_bridge/browser/session.py +495 -0
- browser_use_bridge/browser/views.py +13 -0
- browser_use_bridge/browser/watchdogs/__init__.py +104 -0
- browser_use_bridge/checkpoint.py +167 -0
- browser_use_bridge/cli.py +562 -0
- browser_use_bridge/config.py +78 -0
- browser_use_bridge/dom/__init__.py +343 -0
- browser_use_bridge/history/__init__.py +3 -0
- browser_use_bridge/history/exporter.py +518 -0
- browser_use_bridge/llm/__init__.py +25 -0
- browser_use_bridge/llm/_openai_compatible.py +88 -0
- browser_use_bridge/llm/anthropic/__init__.py +3 -0
- browser_use_bridge/llm/anthropic_adapter.py +56 -0
- browser_use_bridge/llm/base.py +133 -0
- browser_use_bridge/llm/browser_use_bridge/__init__.py +0 -0
- browser_use_bridge/llm/custom/__init__.py +384 -0
- browser_use_bridge/llm/deepseek/__init__.py +31 -0
- browser_use_bridge/llm/glm/__init__.py +24 -0
- browser_use_bridge/llm/google/__init__.py +3 -0
- browser_use_bridge/llm/google_adapter.py +55 -0
- browser_use_bridge/llm/kimi/__init__.py +49 -0
- browser_use_bridge/llm/minimax/__init__.py +56 -0
- browser_use_bridge/llm/ollama/__init__.py +395 -0
- browser_use_bridge/llm/openai/__init__.py +3 -0
- browser_use_bridge/llm/openai_adapter.py +55 -0
- browser_use_bridge/llm/qwen/__init__.py +61 -0
- browser_use_bridge/mcp/__init__.py +350 -0
- browser_use_bridge/memory/__init__.py +12 -0
- browser_use_bridge/memory/bm25_backend.py +178 -0
- browser_use_bridge/memory/chromadb_backend.py +97 -0
- browser_use_bridge/memory/store.py +226 -0
- browser_use_bridge/observability.py +64 -0
- browser_use_bridge/skill_cli/__init__.py +0 -0
- browser_use_bridge/tools/__init__.py +169 -0
- browser_use_bridge/tools/actions/__init__.py +330 -0
- browser_use_bridge/tools/registry/__init__.py +128 -0
- browser_use_bridge/tui.py +274 -0
- browser_use_bridge/vision/__init__.py +400 -0
- browser_use_bridge-1.0.0.dist-info/METADATA +473 -0
- browser_use_bridge-1.0.0.dist-info/RECORD +55 -0
- browser_use_bridge-1.0.0.dist-info/WHEEL +5 -0
- browser_use_bridge-1.0.0.dist-info/entry_points.txt +2 -0
- browser_use_bridge-1.0.0.dist-info/licenses/LICENSE +21 -0
- browser_use_bridge-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from browser_use_bridge.agent import Agent
|
|
2
|
+
from browser_use_bridge.config import BrowserProfile, BrowserUseConfig, BrowserViewport, load_config
|
|
3
|
+
from browser_use_bridge.browser import BrowserSession, SessionManager
|
|
4
|
+
from browser_use_bridge.checkpoint import Checkpoint, CheckpointManager, resume_from_checkpoint
|
|
5
|
+
from browser_use_bridge.tui import BrowserUseTUI, DashboardState
|
|
6
|
+
from browser_use_bridge.tools import Tools
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"BrowserProfile",
|
|
10
|
+
"BrowserSession",
|
|
11
|
+
"BrowserUseTUI",
|
|
12
|
+
"BrowserUseConfig",
|
|
13
|
+
"BrowserViewport",
|
|
14
|
+
"Checkpoint",
|
|
15
|
+
"CheckpointManager",
|
|
16
|
+
"Agent",
|
|
17
|
+
"DashboardState",
|
|
18
|
+
"SessionManager",
|
|
19
|
+
"Tools",
|
|
20
|
+
"load_config",
|
|
21
|
+
"resume_from_checkpoint",
|
|
22
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from browser_use_bridge.agent.message_manager import MessageManager
|
|
2
|
+
from browser_use_bridge.agent.service import Agent
|
|
3
|
+
from browser_use_bridge.agent.views import ActionLoopDetector, AgentHistory, AgentHistoryList, AgentOutput
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"ActionLoopDetector",
|
|
7
|
+
"Agent",
|
|
8
|
+
"AgentHistory",
|
|
9
|
+
"AgentHistoryList",
|
|
10
|
+
"AgentOutput",
|
|
11
|
+
"MessageManager",
|
|
12
|
+
]
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
import uuid
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
from browser_use_bridge.agent.planner import Plan, PlanStep
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ControllerState(Enum):
|
|
14
|
+
PLANNING = "planning"
|
|
15
|
+
EXECUTING = "executing"
|
|
16
|
+
VERIFYING = "verifying"
|
|
17
|
+
REPLANNING = "replanning"
|
|
18
|
+
DONE = "done"
|
|
19
|
+
FAILED = "failed"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class StateTransition(BaseModel):
|
|
23
|
+
model_config = ConfigDict(extra="allow")
|
|
24
|
+
|
|
25
|
+
state: ControllerState
|
|
26
|
+
reason: str = ""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class StepResult(BaseModel):
|
|
30
|
+
model_config = ConfigDict(extra="allow")
|
|
31
|
+
|
|
32
|
+
pending_action_id: str
|
|
33
|
+
sub_goal: str
|
|
34
|
+
action: dict[str, Any]
|
|
35
|
+
result: Any = None
|
|
36
|
+
verified: bool = False
|
|
37
|
+
attempts: int = 1
|
|
38
|
+
reason: str = ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ControllerResult(BaseModel):
|
|
42
|
+
model_config = ConfigDict(extra="allow")
|
|
43
|
+
|
|
44
|
+
success: bool
|
|
45
|
+
step_results: list[StepResult] = Field(default_factory=list)
|
|
46
|
+
failure_reason: str | None = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Controller(BaseModel):
|
|
50
|
+
"""Executes planned actions and records observable state transitions."""
|
|
51
|
+
|
|
52
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
53
|
+
|
|
54
|
+
tools: Any
|
|
55
|
+
planner: Any | None = None
|
|
56
|
+
browser_session: Any | None = None
|
|
57
|
+
state: ControllerState = ControllerState.PLANNING
|
|
58
|
+
transition_history: list[StateTransition] = Field(default_factory=list)
|
|
59
|
+
step_results: list[StepResult] = Field(default_factory=list)
|
|
60
|
+
current_plan: Plan | None = None
|
|
61
|
+
failure_reason: str | None = None
|
|
62
|
+
|
|
63
|
+
async def execute_plan(self, plan: Plan) -> ControllerResult:
|
|
64
|
+
self.current_plan = plan
|
|
65
|
+
self.failure_reason = None
|
|
66
|
+
self._transition(ControllerState.PLANNING, "plan accepted")
|
|
67
|
+
active_plan = plan
|
|
68
|
+
|
|
69
|
+
while True:
|
|
70
|
+
replan_requested = False
|
|
71
|
+
for step in active_plan.steps:
|
|
72
|
+
step_result = await self._execute_step(step)
|
|
73
|
+
self.step_results.append(step_result)
|
|
74
|
+
if step_result.verified:
|
|
75
|
+
continue
|
|
76
|
+
if step.fallback_strategy == "replan" and self.planner is not None:
|
|
77
|
+
self._transition(ControllerState.REPLANNING, step_result.reason)
|
|
78
|
+
active_plan = await self._revise_plan(active_plan, step, step_result.reason)
|
|
79
|
+
self.current_plan = active_plan
|
|
80
|
+
replan_requested = True
|
|
81
|
+
break
|
|
82
|
+
self.abort(step_result.reason or f"Step failed: {step.sub_goal}")
|
|
83
|
+
return ControllerResult(
|
|
84
|
+
success=False,
|
|
85
|
+
step_results=self.step_results,
|
|
86
|
+
failure_reason=self.failure_reason,
|
|
87
|
+
)
|
|
88
|
+
if replan_requested:
|
|
89
|
+
continue
|
|
90
|
+
self._transition(ControllerState.DONE, "plan complete")
|
|
91
|
+
return ControllerResult(success=True, step_results=self.step_results)
|
|
92
|
+
|
|
93
|
+
def step_result(self, pending_action_id: str) -> StepResult:
|
|
94
|
+
for result in self.step_results:
|
|
95
|
+
if result.pending_action_id == pending_action_id:
|
|
96
|
+
return result
|
|
97
|
+
raise KeyError(pending_action_id)
|
|
98
|
+
|
|
99
|
+
def abort(self, reason: str = "aborted") -> None:
|
|
100
|
+
self.failure_reason = reason
|
|
101
|
+
self._transition(ControllerState.FAILED, reason)
|
|
102
|
+
|
|
103
|
+
def checkpoint(self) -> dict[str, Any]:
|
|
104
|
+
return {
|
|
105
|
+
"state": self.state.value,
|
|
106
|
+
"failure_reason": self.failure_reason,
|
|
107
|
+
"plan": self.current_plan.model_dump() if self.current_plan else None,
|
|
108
|
+
"step_results": [result.model_dump() for result in self.step_results],
|
|
109
|
+
"transition_history": [entry.model_dump() for entry in self.transition_history],
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async def _execute_step(self, step: PlanStep) -> StepResult:
|
|
113
|
+
pending_action_id = f"step-{uuid.uuid4().hex[:12]}"
|
|
114
|
+
attempts = step.max_retries + 1
|
|
115
|
+
last_result: Any = None
|
|
116
|
+
last_reason = ""
|
|
117
|
+
for attempt in range(1, attempts + 1):
|
|
118
|
+
self._transition(ControllerState.EXECUTING, step.sub_goal)
|
|
119
|
+
last_result = await self._execute_action(step.action)
|
|
120
|
+
self._transition(ControllerState.VERIFYING, step.expected_state)
|
|
121
|
+
verified, last_reason = self._verify_result(step.expected_state, last_result)
|
|
122
|
+
if verified:
|
|
123
|
+
return StepResult(
|
|
124
|
+
pending_action_id=pending_action_id,
|
|
125
|
+
sub_goal=step.sub_goal,
|
|
126
|
+
action=step.action,
|
|
127
|
+
result=last_result,
|
|
128
|
+
verified=True,
|
|
129
|
+
attempts=attempt,
|
|
130
|
+
)
|
|
131
|
+
return StepResult(
|
|
132
|
+
pending_action_id=pending_action_id,
|
|
133
|
+
sub_goal=step.sub_goal,
|
|
134
|
+
action=step.action,
|
|
135
|
+
result=last_result,
|
|
136
|
+
verified=False,
|
|
137
|
+
attempts=attempts,
|
|
138
|
+
reason=last_reason,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
async def _execute_action(self, action: dict[str, Any]) -> Any:
|
|
142
|
+
executor = getattr(self.tools, "execute_action", None)
|
|
143
|
+
if executor is None:
|
|
144
|
+
raise TypeError("tools must provide execute_action(action, **kwargs)")
|
|
145
|
+
kwargs: dict[str, Any] = {}
|
|
146
|
+
parameters = inspect.signature(executor).parameters
|
|
147
|
+
if "browser_session" in parameters or any(
|
|
148
|
+
parameter.kind == inspect.Parameter.VAR_KEYWORD for parameter in parameters.values()
|
|
149
|
+
):
|
|
150
|
+
kwargs["browser_session"] = self.browser_session
|
|
151
|
+
result = executor(action, **kwargs)
|
|
152
|
+
if inspect.isawaitable(result):
|
|
153
|
+
return await result
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
async def _revise_plan(self, plan: Plan, failed_step: PlanStep, reason: str) -> Plan:
|
|
157
|
+
revised = self.planner.revise(plan, failed_step, reason)
|
|
158
|
+
if inspect.isawaitable(revised):
|
|
159
|
+
revised = await revised
|
|
160
|
+
return revised
|
|
161
|
+
|
|
162
|
+
def _transition(self, state: ControllerState, reason: str = "") -> None:
|
|
163
|
+
self.state = state
|
|
164
|
+
self.transition_history.append(StateTransition(state=state, reason=reason))
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def _verify_result(cls, expected_state: str, result: Any) -> tuple[bool, str]:
|
|
168
|
+
if cls._result_failed(result):
|
|
169
|
+
return False, "action result was not ok"
|
|
170
|
+
expected = expected_state.strip()
|
|
171
|
+
if not expected:
|
|
172
|
+
return True, ""
|
|
173
|
+
expected_lower = expected.lower()
|
|
174
|
+
result_text = cls._result_text(result)
|
|
175
|
+
if expected_lower in {"ok", "ok is true", "success", "success is true"}:
|
|
176
|
+
return (not cls._result_failed(result), "") if result is not None else (True, "")
|
|
177
|
+
if expected_lower.startswith("text contains "):
|
|
178
|
+
needle = expected[len("text contains ") :].strip().lower()
|
|
179
|
+
return (needle in result_text, "") if needle in result_text else (False, f"text did not contain {needle!r}")
|
|
180
|
+
if expected_lower.startswith("url contains "):
|
|
181
|
+
needle = expected[len("url contains ") :].strip().lower()
|
|
182
|
+
return (needle in result_text, "") if needle in result_text else (False, f"url did not contain {needle!r}")
|
|
183
|
+
return (expected_lower in result_text, "") if expected_lower in result_text else (False, f"expected state not observed: {expected}")
|
|
184
|
+
|
|
185
|
+
@staticmethod
|
|
186
|
+
def _result_failed(result: Any) -> bool:
|
|
187
|
+
return isinstance(result, dict) and result.get("ok") is False
|
|
188
|
+
|
|
189
|
+
@staticmethod
|
|
190
|
+
def _result_text(result: Any) -> str:
|
|
191
|
+
if isinstance(result, dict):
|
|
192
|
+
return " ".join(str(value) for value in result.values()).lower()
|
|
193
|
+
return str(result).lower()
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
__all__ = [
|
|
197
|
+
"Controller",
|
|
198
|
+
"ControllerResult",
|
|
199
|
+
"ControllerState",
|
|
200
|
+
"StateTransition",
|
|
201
|
+
"StepResult",
|
|
202
|
+
]
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
|
|
7
|
+
from browser_use_bridge.agent.views import AgentHistory
|
|
8
|
+
from browser_use_bridge.browser.views import BrowserStateSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MessageManager(BaseModel):
|
|
12
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
13
|
+
|
|
14
|
+
task: str
|
|
15
|
+
max_tokens: int = 4000
|
|
16
|
+
keep_recent_steps: int = 3
|
|
17
|
+
histories: list[AgentHistory] = Field(default_factory=list)
|
|
18
|
+
memory_store: Any | None = None
|
|
19
|
+
memory_top_k: int = 5
|
|
20
|
+
|
|
21
|
+
def add_history(self, history: AgentHistory) -> None:
|
|
22
|
+
self.histories.append(history)
|
|
23
|
+
|
|
24
|
+
def build_messages(
|
|
25
|
+
self,
|
|
26
|
+
current_state: BrowserStateSummary,
|
|
27
|
+
nudge: str | None = None,
|
|
28
|
+
screenshots: list[Any] | None = None,
|
|
29
|
+
) -> list[dict[str, Any]]:
|
|
30
|
+
content = "\n\n".join(
|
|
31
|
+
part for part in self._build_content_parts(current_state, nudge) if part
|
|
32
|
+
)
|
|
33
|
+
content = self._fit_to_budget(content)
|
|
34
|
+
user_content: str | list[dict[str, Any]]
|
|
35
|
+
if screenshots:
|
|
36
|
+
user_content = [{"type": "text", "text": content}]
|
|
37
|
+
user_content.extend(self._format_screenshot_parts(screenshots))
|
|
38
|
+
else:
|
|
39
|
+
user_content = content
|
|
40
|
+
return [
|
|
41
|
+
{
|
|
42
|
+
"role": "system",
|
|
43
|
+
"content": (
|
|
44
|
+
"You are a browser automation agent. Return structured output "
|
|
45
|
+
"with thinking, evaluation, memory, next_goal, and actions."
|
|
46
|
+
),
|
|
47
|
+
},
|
|
48
|
+
{"role": "user", "content": user_content},
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
def build_planner_messages(
|
|
52
|
+
self,
|
|
53
|
+
task: str,
|
|
54
|
+
browser_state: BrowserStateSummary,
|
|
55
|
+
history: Any | None = None,
|
|
56
|
+
) -> list[dict[str, Any]]:
|
|
57
|
+
original_task = self.task
|
|
58
|
+
original_histories = self.histories
|
|
59
|
+
try:
|
|
60
|
+
self.task = task
|
|
61
|
+
if history is not None:
|
|
62
|
+
self.histories = list(getattr(history, "histories", history))
|
|
63
|
+
content = "\n\n".join(
|
|
64
|
+
part for part in self._build_content_parts(browser_state, nudge=None) if part
|
|
65
|
+
)
|
|
66
|
+
content = self._fit_to_budget(content)
|
|
67
|
+
finally:
|
|
68
|
+
self.task = original_task
|
|
69
|
+
self.histories = original_histories
|
|
70
|
+
return [
|
|
71
|
+
{
|
|
72
|
+
"role": "system",
|
|
73
|
+
"content": "You are a planner. Return structured plan steps with sub-goals and expected states.",
|
|
74
|
+
},
|
|
75
|
+
{"role": "user", "content": content},
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
def build_controller_messages(
|
|
79
|
+
self,
|
|
80
|
+
plan_step: str,
|
|
81
|
+
step_results: list[Any] | None = None,
|
|
82
|
+
) -> list[dict[str, Any]]:
|
|
83
|
+
lines = [
|
|
84
|
+
f"Plan step: {plan_step}",
|
|
85
|
+
"Step results:",
|
|
86
|
+
]
|
|
87
|
+
for result in step_results or []:
|
|
88
|
+
if hasattr(result, "model_dump"):
|
|
89
|
+
result = result.model_dump()
|
|
90
|
+
lines.append(f"- {result}")
|
|
91
|
+
return [
|
|
92
|
+
{
|
|
93
|
+
"role": "system",
|
|
94
|
+
"content": "You are a controller. Use action results to verify planned execution.",
|
|
95
|
+
},
|
|
96
|
+
{"role": "user", "content": "\n".join(lines)},
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
def _build_content_parts(
|
|
100
|
+
self,
|
|
101
|
+
current_state: BrowserStateSummary,
|
|
102
|
+
nudge: str | None,
|
|
103
|
+
) -> list[str]:
|
|
104
|
+
older, recent = self._split_history()
|
|
105
|
+
parts = [
|
|
106
|
+
f"Task: {self.task}",
|
|
107
|
+
self._format_memory_context(),
|
|
108
|
+
self._format_current_state(current_state),
|
|
109
|
+
]
|
|
110
|
+
if older:
|
|
111
|
+
parts.append(self._summarize_history(older))
|
|
112
|
+
if recent:
|
|
113
|
+
parts.append(self._format_recent_history(recent))
|
|
114
|
+
if nudge:
|
|
115
|
+
parts.append(f"Nudge: {nudge}")
|
|
116
|
+
return parts
|
|
117
|
+
|
|
118
|
+
def _split_history(self) -> tuple[list[AgentHistory], list[AgentHistory]]:
|
|
119
|
+
if self.keep_recent_steps <= 0:
|
|
120
|
+
return self.histories, []
|
|
121
|
+
return self.histories[:-self.keep_recent_steps], self.histories[-self.keep_recent_steps :]
|
|
122
|
+
|
|
123
|
+
def _format_current_state(self, state: BrowserStateSummary) -> str:
|
|
124
|
+
return "\n".join(
|
|
125
|
+
[
|
|
126
|
+
"Current browser state:",
|
|
127
|
+
f"- url: {state.url}",
|
|
128
|
+
f"- title: {state.title}",
|
|
129
|
+
f"- elements: {self._format_elements(state.elements)}",
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def _format_memory_context(self) -> str:
|
|
134
|
+
if self.memory_store is None:
|
|
135
|
+
return ""
|
|
136
|
+
try:
|
|
137
|
+
entries = self.memory_store.search(self.task, top_k=self.memory_top_k)
|
|
138
|
+
except Exception:
|
|
139
|
+
return ""
|
|
140
|
+
if not entries:
|
|
141
|
+
return ""
|
|
142
|
+
lines = ["Relevant memory:"]
|
|
143
|
+
for entry in entries:
|
|
144
|
+
memory_type = getattr(getattr(entry, "type", ""), "value", getattr(entry, "type", ""))
|
|
145
|
+
text = str(getattr(entry, "text", "")).strip()
|
|
146
|
+
if text:
|
|
147
|
+
lines.append(f"- [{memory_type}] {text}")
|
|
148
|
+
return "\n".join(lines)
|
|
149
|
+
|
|
150
|
+
def _summarize_history(self, histories: list[AgentHistory]) -> str:
|
|
151
|
+
lines = ["Compressed older history:"]
|
|
152
|
+
for index, history in enumerate(histories):
|
|
153
|
+
output = history.model_output
|
|
154
|
+
if output is None:
|
|
155
|
+
continue
|
|
156
|
+
summary_bits = [
|
|
157
|
+
bit
|
|
158
|
+
for bit in [
|
|
159
|
+
output.evaluation,
|
|
160
|
+
output.memory,
|
|
161
|
+
output.next_goal,
|
|
162
|
+
self._format_actions(output.actions),
|
|
163
|
+
]
|
|
164
|
+
if bit
|
|
165
|
+
]
|
|
166
|
+
lines.append(f"- step {index}: {' | '.join(summary_bits)}")
|
|
167
|
+
return "\n".join(lines)
|
|
168
|
+
|
|
169
|
+
def _format_recent_history(self, histories: list[AgentHistory]) -> str:
|
|
170
|
+
lines = ["Recent uncompressed history:"]
|
|
171
|
+
start = len(self.histories) - len(histories)
|
|
172
|
+
for offset, history in enumerate(histories):
|
|
173
|
+
output = history.model_output
|
|
174
|
+
state = history.state
|
|
175
|
+
lines.append(f"- step {start + offset}:")
|
|
176
|
+
if state is not None:
|
|
177
|
+
lines.append(f" state: {state.title} ({state.url})")
|
|
178
|
+
if output is not None:
|
|
179
|
+
lines.append(f" thinking: {output.thinking}")
|
|
180
|
+
lines.append(f" evaluation: {output.evaluation}")
|
|
181
|
+
lines.append(f" memory: {output.memory}")
|
|
182
|
+
lines.append(f" next_goal: {output.next_goal}")
|
|
183
|
+
lines.append(f" actions: {self._format_actions(output.actions)}")
|
|
184
|
+
return "\n".join(lines)
|
|
185
|
+
|
|
186
|
+
def _fit_to_budget(self, content: str) -> str:
|
|
187
|
+
max_chars = max(self.max_tokens * 4, 1000)
|
|
188
|
+
if len(content) <= max_chars:
|
|
189
|
+
return content
|
|
190
|
+
keep_head = max_chars // 3
|
|
191
|
+
keep_tail = max_chars - keep_head - 40
|
|
192
|
+
return f"{content[:keep_head]}\n...[context compressed]...\n{content[-keep_tail:]}"
|
|
193
|
+
|
|
194
|
+
@staticmethod
|
|
195
|
+
def _format_actions(actions: list[Any]) -> str:
|
|
196
|
+
return ", ".join(str(action) for action in actions)
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _format_elements(elements: list[Any]) -> str:
|
|
200
|
+
if not elements:
|
|
201
|
+
return "[]"
|
|
202
|
+
rows: list[str] = []
|
|
203
|
+
for element in elements[:20]:
|
|
204
|
+
if hasattr(element, "model_dump"):
|
|
205
|
+
element = element.model_dump()
|
|
206
|
+
if isinstance(element, dict):
|
|
207
|
+
index = element.get("index")
|
|
208
|
+
tag = element.get("tag") or element.get("tag_name") or element.get("role") or "element"
|
|
209
|
+
text = element.get("text") or element.get("label") or element.get("aria_label") or ""
|
|
210
|
+
if index is not None:
|
|
211
|
+
rows.append(f"[{index}] <{tag}> {text}".strip())
|
|
212
|
+
else:
|
|
213
|
+
rows.append(str(element))
|
|
214
|
+
else:
|
|
215
|
+
rows.append(str(element))
|
|
216
|
+
return "\n".join(rows)
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def _format_screenshot_parts(screenshots: list[Any]) -> list[dict[str, Any]]:
|
|
220
|
+
parts: list[dict[str, Any]] = []
|
|
221
|
+
for screenshot in screenshots:
|
|
222
|
+
boxes = getattr(screenshot, "bounding_boxes", [])
|
|
223
|
+
box_lines = []
|
|
224
|
+
for box in boxes:
|
|
225
|
+
index = getattr(box, "index", None)
|
|
226
|
+
label = getattr(box, "label", None) or getattr(box, "text", None) or ""
|
|
227
|
+
if index is not None:
|
|
228
|
+
box_lines.append(f"[{index}] {label}".strip())
|
|
229
|
+
parts.append(
|
|
230
|
+
{
|
|
231
|
+
"type": "text",
|
|
232
|
+
"text": "\n".join(
|
|
233
|
+
[
|
|
234
|
+
f"Annotated screenshot ({getattr(screenshot, 'content_type', 'image/jpeg')}):",
|
|
235
|
+
*box_lines,
|
|
236
|
+
]
|
|
237
|
+
),
|
|
238
|
+
}
|
|
239
|
+
)
|
|
240
|
+
parts.append(
|
|
241
|
+
{
|
|
242
|
+
"type": "image_url",
|
|
243
|
+
"image_url": {
|
|
244
|
+
"url": (
|
|
245
|
+
f"data:{getattr(screenshot, 'content_type', 'image/jpeg')};base64,"
|
|
246
|
+
f"{getattr(screenshot, 'base64_data', '')}"
|
|
247
|
+
)
|
|
248
|
+
},
|
|
249
|
+
}
|
|
250
|
+
)
|
|
251
|
+
return parts
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
__all__ = ["MessageManager"]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
|
|
7
|
+
from browser_use_bridge.agent.views import AgentHistoryList
|
|
8
|
+
from browser_use_bridge.browser.views import BrowserStateSummary
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
FallbackStrategy = Literal["retry", "skip", "fallback", "abort", "replan"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PlanStep(BaseModel):
|
|
15
|
+
model_config = ConfigDict(extra="allow")
|
|
16
|
+
|
|
17
|
+
sub_goal: str
|
|
18
|
+
expected_state: str
|
|
19
|
+
action: dict[str, Any] = Field(default_factory=dict)
|
|
20
|
+
max_retries: int = Field(default=1, ge=0)
|
|
21
|
+
fallback_strategy: FallbackStrategy = "replan"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Plan(BaseModel):
|
|
25
|
+
model_config = ConfigDict(extra="allow")
|
|
26
|
+
|
|
27
|
+
task: str
|
|
28
|
+
steps: list[PlanStep] = Field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PlanningContext(BaseModel):
|
|
32
|
+
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
|
|
33
|
+
|
|
34
|
+
task: str
|
|
35
|
+
browser_state: BrowserStateSummary
|
|
36
|
+
history: AgentHistoryList = Field(default_factory=AgentHistoryList)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Planner:
|
|
40
|
+
"""Deterministic task decomposer used by the separated agent loop."""
|
|
41
|
+
|
|
42
|
+
async def decompose(self, context: PlanningContext) -> Plan:
|
|
43
|
+
action = self._choose_action(context)
|
|
44
|
+
sub_goal = self._sub_goal_for(context.task, action)
|
|
45
|
+
expected_state = self._expected_state_for(context.task, action)
|
|
46
|
+
return Plan(
|
|
47
|
+
task=context.task,
|
|
48
|
+
steps=[
|
|
49
|
+
PlanStep(
|
|
50
|
+
sub_goal=sub_goal,
|
|
51
|
+
expected_state=expected_state,
|
|
52
|
+
action=action,
|
|
53
|
+
max_retries=1,
|
|
54
|
+
fallback_strategy="replan",
|
|
55
|
+
)
|
|
56
|
+
],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
async def revise(self, plan: Plan, failed_step: PlanStep, reason: str) -> Plan:
|
|
60
|
+
return Plan(
|
|
61
|
+
task=plan.task,
|
|
62
|
+
steps=[
|
|
63
|
+
PlanStep(
|
|
64
|
+
sub_goal=f"Recover from failed step: {failed_step.sub_goal}",
|
|
65
|
+
expected_state="ok is true",
|
|
66
|
+
action={"done": {"success": False, "text": reason}},
|
|
67
|
+
max_retries=0,
|
|
68
|
+
fallback_strategy="abort",
|
|
69
|
+
)
|
|
70
|
+
],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def _choose_action(self, context: PlanningContext) -> dict[str, Any]:
|
|
74
|
+
task_lower = context.task.lower()
|
|
75
|
+
for element in context.browser_state.elements:
|
|
76
|
+
element_data = self._element_dict(element)
|
|
77
|
+
text = str(element_data.get("text") or "").lower()
|
|
78
|
+
tag = str(element_data.get("tag") or element_data.get("tag_name") or "").lower()
|
|
79
|
+
index = element_data.get("index")
|
|
80
|
+
if index is None:
|
|
81
|
+
continue
|
|
82
|
+
if "click" in task_lower or "continue" in task_lower or tag in {"button", "a"}:
|
|
83
|
+
if not text or text in task_lower or "continue" in text or tag in {"button", "a"}:
|
|
84
|
+
return {"click": {"index": index}}
|
|
85
|
+
if "http://" in context.task or "https://" in context.task or "file://" in context.task:
|
|
86
|
+
for token in context.task.split():
|
|
87
|
+
if token.startswith(("http://", "https://", "file://")):
|
|
88
|
+
return {"navigate": {"url": token.strip(".,")}}
|
|
89
|
+
return {"done": {"success": True, "text": context.task}}
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _sub_goal_for(task: str, action: dict[str, Any]) -> str:
|
|
93
|
+
if "click" in action:
|
|
94
|
+
return f"Click the target element for: {task}"
|
|
95
|
+
if "navigate" in action:
|
|
96
|
+
return f"Navigate to the requested page for: {task}"
|
|
97
|
+
return f"Finish task: {task}"
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def _expected_state_for(task: str, action: dict[str, Any]) -> str:
|
|
101
|
+
if "click" in action:
|
|
102
|
+
return f"task progresses toward {task}"
|
|
103
|
+
if "navigate" in action:
|
|
104
|
+
url = action["navigate"].get("url", "")
|
|
105
|
+
return f"url contains {url}"
|
|
106
|
+
return "ok is true"
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _element_dict(element: Any) -> dict[str, Any]:
|
|
110
|
+
if hasattr(element, "model_dump"):
|
|
111
|
+
element = element.model_dump(exclude_none=True)
|
|
112
|
+
return element if isinstance(element, dict) else {}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
__all__ = ["FallbackStrategy", "Plan", "PlanStep", "Planner", "PlanningContext"]
|