browser-use-bridge 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use_bridge-1.0.0/LICENSE +21 -0
- browser_use_bridge-1.0.0/PKG-INFO +473 -0
- browser_use_bridge-1.0.0/README.md +420 -0
- browser_use_bridge-1.0.0/browser_use_bridge/__init__.py +22 -0
- browser_use_bridge-1.0.0/browser_use_bridge/__main__.py +5 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/__init__.py +12 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/controller.py +202 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/message_manager/__init__.py +254 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/planner.py +115 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/retry.py +193 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/service.py +259 -0
- browser_use_bridge-1.0.0/browser_use_bridge/agent/views.py +108 -0
- browser_use_bridge-1.0.0/browser_use_bridge/browser/__init__.py +30 -0
- browser_use_bridge-1.0.0/browser_use_bridge/browser/events.py +66 -0
- browser_use_bridge-1.0.0/browser_use_bridge/browser/session.py +495 -0
- browser_use_bridge-1.0.0/browser_use_bridge/browser/views.py +13 -0
- browser_use_bridge-1.0.0/browser_use_bridge/browser/watchdogs/__init__.py +104 -0
- browser_use_bridge-1.0.0/browser_use_bridge/checkpoint.py +167 -0
- browser_use_bridge-1.0.0/browser_use_bridge/cli.py +562 -0
- browser_use_bridge-1.0.0/browser_use_bridge/config.py +78 -0
- browser_use_bridge-1.0.0/browser_use_bridge/dom/__init__.py +343 -0
- browser_use_bridge-1.0.0/browser_use_bridge/history/__init__.py +3 -0
- browser_use_bridge-1.0.0/browser_use_bridge/history/exporter.py +518 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/__init__.py +25 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/_openai_compatible.py +88 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/anthropic/__init__.py +3 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/anthropic_adapter.py +56 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/base.py +133 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/browser_use_bridge/__init__.py +0 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/custom/__init__.py +384 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/deepseek/__init__.py +31 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/glm/__init__.py +24 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/google/__init__.py +3 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/google_adapter.py +55 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/kimi/__init__.py +49 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/minimax/__init__.py +56 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/ollama/__init__.py +395 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/openai/__init__.py +3 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/openai_adapter.py +55 -0
- browser_use_bridge-1.0.0/browser_use_bridge/llm/qwen/__init__.py +61 -0
- browser_use_bridge-1.0.0/browser_use_bridge/mcp/__init__.py +350 -0
- browser_use_bridge-1.0.0/browser_use_bridge/memory/__init__.py +12 -0
- browser_use_bridge-1.0.0/browser_use_bridge/memory/bm25_backend.py +178 -0
- browser_use_bridge-1.0.0/browser_use_bridge/memory/chromadb_backend.py +97 -0
- browser_use_bridge-1.0.0/browser_use_bridge/memory/store.py +226 -0
- browser_use_bridge-1.0.0/browser_use_bridge/observability.py +64 -0
- browser_use_bridge-1.0.0/browser_use_bridge/skill_cli/__init__.py +0 -0
- browser_use_bridge-1.0.0/browser_use_bridge/tools/__init__.py +169 -0
- browser_use_bridge-1.0.0/browser_use_bridge/tools/actions/__init__.py +330 -0
- browser_use_bridge-1.0.0/browser_use_bridge/tools/registry/__init__.py +128 -0
- browser_use_bridge-1.0.0/browser_use_bridge/tui.py +274 -0
- browser_use_bridge-1.0.0/browser_use_bridge/vision/__init__.py +400 -0
- browser_use_bridge-1.0.0/browser_use_bridge.egg-info/PKG-INFO +473 -0
- browser_use_bridge-1.0.0/browser_use_bridge.egg-info/SOURCES.txt +71 -0
- browser_use_bridge-1.0.0/browser_use_bridge.egg-info/dependency_links.txt +1 -0
- browser_use_bridge-1.0.0/browser_use_bridge.egg-info/entry_points.txt +2 -0
- browser_use_bridge-1.0.0/browser_use_bridge.egg-info/requires.txt +41 -0
- browser_use_bridge-1.0.0/browser_use_bridge.egg-info/top_level.txt +1 -0
- browser_use_bridge-1.0.0/pyproject.toml +95 -0
- browser_use_bridge-1.0.0/setup.cfg +4 -0
- browser_use_bridge-1.0.0/tests/test_cli_sprint7.py +96 -0
- browser_use_bridge-1.0.0/tests/test_llm_sprint9.py +207 -0
- browser_use_bridge-1.0.0/tests/test_mcp_sprint8.py +106 -0
- browser_use_bridge-1.0.0/tests/test_sprint10.py +105 -0
- browser_use_bridge-1.0.0/tests/test_sprint11.py +142 -0
- browser_use_bridge-1.0.0/tests/test_sprint13.py +296 -0
- browser_use_bridge-1.0.0/tests/test_sprint14.py +137 -0
- browser_use_bridge-1.0.0/tests/test_sprint15.py +153 -0
- browser_use_bridge-1.0.0/tests/test_sprint16.py +123 -0
- browser_use_bridge-1.0.0/tests/test_sprint17.py +141 -0
- browser_use_bridge-1.0.0/tests/test_sprint18.py +117 -0
- browser_use_bridge-1.0.0/tests/test_sprint19.py +164 -0
- browser_use_bridge-1.0.0/tests/test_tui.py +43 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 YuSec2021
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: browser-use-bridge
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AI browser automation bridge for Chinese LLMs, custom model providers, and OpenAI-compatible endpoints
|
|
5
|
+
Author-email: YuSec2021 <gushangdianer@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/YuSec2021/browser-use-bridge
|
|
8
|
+
Project-URL: Repository, https://github.com/YuSec2021/browser-use-bridge
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/YuSec2021/browser-use-bridge/issues
|
|
10
|
+
Keywords: browser,automation,ai,playwright,llm,chinese-llm,kimi,qwen,glm,minimax,deepseek,custom-model,openai-compatible,国产大模型,浏览器自动化
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: playwright>=1.40.0
|
|
21
|
+
Requires-Dist: pydantic>=2.0.0
|
|
22
|
+
Requires-Dist: click>=8.1.0
|
|
23
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
24
|
+
Requires-Dist: httpx>=0.27.0
|
|
25
|
+
Requires-Dist: aiofiles>=23.0.0
|
|
26
|
+
Requires-Dist: textual>=0.50.0
|
|
27
|
+
Requires-Dist: openai>=1.30.0
|
|
28
|
+
Provides-Extra: cn
|
|
29
|
+
Requires-Dist: dashscope>=1.20.0; extra == "cn"
|
|
30
|
+
Requires-Dist: zhipuai>=2.1.0; extra == "cn"
|
|
31
|
+
Requires-Dist: anthropic>=0.30.0; extra == "cn"
|
|
32
|
+
Provides-Extra: kimi
|
|
33
|
+
Requires-Dist: openai>=1.30.0; extra == "kimi"
|
|
34
|
+
Provides-Extra: qwen
|
|
35
|
+
Requires-Dist: dashscope>=1.20.0; extra == "qwen"
|
|
36
|
+
Provides-Extra: glm
|
|
37
|
+
Requires-Dist: zhipuai>=2.1.0; extra == "glm"
|
|
38
|
+
Provides-Extra: minimax
|
|
39
|
+
Requires-Dist: openai>=1.30.0; extra == "minimax"
|
|
40
|
+
Provides-Extra: deepseek
|
|
41
|
+
Requires-Dist: openai>=1.30.0; extra == "deepseek"
|
|
42
|
+
Provides-Extra: ollama
|
|
43
|
+
Requires-Dist: ollama>=0.2.0; extra == "ollama"
|
|
44
|
+
Provides-Extra: dev
|
|
45
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
46
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
48
|
+
Requires-Dist: build; extra == "dev"
|
|
49
|
+
Requires-Dist: twine; extra == "dev"
|
|
50
|
+
Provides-Extra: all
|
|
51
|
+
Requires-Dist: browser-use-bridge[cn,dev,ollama]; extra == "all"
|
|
52
|
+
Dynamic: license-file
|
|
53
|
+
|
|
54
|
+
# Browser Use Bridge
|
|
55
|
+
|
|
56
|
+
**English** | [中文](#中文说明)
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
AI browser automation bridge with first-class support for Chinese LLMs, custom model providers, and any OpenAI-compatible endpoint.
|
|
61
|
+
|
|
62
|
+
Built on top of [browser-use](https://github.com/browser-use/browser-use) — extending it with Chinese LLM adapters, vision understanding, memory, checkpointing, and more.
|
|
63
|
+
|
|
64
|
+
[](https://pypi.org/project/browser-use-bridge/)
|
|
65
|
+
[](https://pypi.org/project/browser-use-bridge/)
|
|
66
|
+
[](LICENSE)
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## What's Different from browser-use
|
|
71
|
+
|
|
72
|
+
`browser-use-bridge` is a fork of [browser-use](https://github.com/browser-use/browser-use) with the following additions and changes:
|
|
73
|
+
|
|
74
|
+
### Added
|
|
75
|
+
|
|
76
|
+
| Feature | Details |
|
|
77
|
+
|---|---|
|
|
78
|
+
| **Chinese LLM adapters** | Native support for Kimi (Moonshot), Qwen (DashScope), GLM (Zhipu), MiniMax, DeepSeek — no LangChain required |
|
|
79
|
+
| **Custom model provider** | `ChatCustom`: point at any OpenAI-compatible endpoint with `base_url` + `api_key` |
|
|
80
|
+
| **Ollama local models** | `ChatOllama` with health checking, model discovery, streaming, and vision model support |
|
|
81
|
+
| **Vision understanding** | `VisionService`: screenshot → annotated image → Vision LLM analysis; automatic fallback when DOM is sparse |
|
|
82
|
+
| **Planner / Controller separation** | Two-agent architecture: Planner decomposes tasks into sub-goals; Controller executes and verifies each step |
|
|
83
|
+
| **Memory store** | BM25 keyword retrieval (zero deps) or ChromaDB vector backend; injected into Agent context automatically |
|
|
84
|
+
| **Checkpoint / Resume** | `CheckpointManager`: save task state at any step, resume after interruption |
|
|
85
|
+
| **History export** | `HistoryExporter`: export completed runs as JSON, self-contained HTML timeline, or animated GIF |
|
|
86
|
+
| **Structured retry** | `RetryController`: exponential backoff, error classification, loop detection with page fingerprinting |
|
|
87
|
+
| **Updated default models** | Kimi `kimi-2.6`, Qwen `qwen3.6-plus`, GLM `glm-5.1`, MiniMax `MiniMax-M2.7`, DeepSeek `deepseek-v4-pro` |
|
|
88
|
+
| **Independent packaging** | Published as `browser-use-bridge` on PyPI with optional dependency groups per provider |
|
|
89
|
+
|
|
90
|
+
### Changed
|
|
91
|
+
|
|
92
|
+
| Aspect | browser-use | browser-use-bridge |
|
|
93
|
+
|---|---|---|
|
|
94
|
+
| Package name | `browser_use` | `browser_use_bridge` |
|
|
95
|
+
| CLI command | `browser-use` | `browser-use-bridge` |
|
|
96
|
+
| LLM base class | LangChain `BaseChatModel` | Lightweight custom `BaseChatModel` (no LangChain dependency) |
|
|
97
|
+
| Provider auto-detection | — | Detects Chinese gateways from `base_url` pattern |
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Installation
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install browser-use-bridge
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Install with Chinese LLM SDKs:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install "browser-use-bridge[cn]" # Qwen (DashScope) + GLM (Zhipu) + Anthropic
|
|
111
|
+
pip install "browser-use-bridge[kimi]" # Moonshot Kimi
|
|
112
|
+
pip install "browser-use-bridge[deepseek]" # DeepSeek
|
|
113
|
+
pip install "browser-use-bridge[minimax]" # MiniMax
|
|
114
|
+
pip install "browser-use-bridge[ollama]" # Ollama local models
|
|
115
|
+
pip install "browser-use-bridge[all]" # Everything
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Quick Start
|
|
121
|
+
|
|
122
|
+
### Python API
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import asyncio
|
|
126
|
+
|
|
127
|
+
from browser_use_bridge import Agent, BrowserSession
|
|
128
|
+
from browser_use_bridge.llm import ChatKimi
|
|
129
|
+
|
|
130
|
+
async def main():
|
|
131
|
+
session = BrowserSession()
|
|
132
|
+
try:
|
|
133
|
+
await session.start()
|
|
134
|
+
agent = Agent(
|
|
135
|
+
task="Search for the latest AI news and summarize the top 3 results",
|
|
136
|
+
llm=ChatKimi(model="kimi-2.6", api_key="your-key"),
|
|
137
|
+
browser_session=session,
|
|
138
|
+
)
|
|
139
|
+
history = await agent.run()
|
|
140
|
+
return history
|
|
141
|
+
finally:
|
|
142
|
+
await session.close()
|
|
143
|
+
|
|
144
|
+
history = asyncio.run(main())
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### With Memory and Checkpoint
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
import asyncio
|
|
151
|
+
|
|
152
|
+
from browser_use_bridge import Agent
|
|
153
|
+
from browser_use_bridge.browser import BrowserSession
|
|
154
|
+
from browser_use_bridge.llm import ChatQwen
|
|
155
|
+
from browser_use_bridge.memory import MemoryStore
|
|
156
|
+
from browser_use_bridge.checkpoint import CheckpointManager
|
|
157
|
+
|
|
158
|
+
async def main():
|
|
159
|
+
session = BrowserSession()
|
|
160
|
+
checkpoint_manager = CheckpointManager(autosave_every_steps=5)
|
|
161
|
+
try:
|
|
162
|
+
await session.start()
|
|
163
|
+
agent = Agent(
|
|
164
|
+
task="Fill in the registration form at example.com",
|
|
165
|
+
llm=ChatQwen(model="qwen3.6-plus"),
|
|
166
|
+
browser_session=session,
|
|
167
|
+
memory_store=MemoryStore(),
|
|
168
|
+
)
|
|
169
|
+
history = await agent.run()
|
|
170
|
+
checkpoint_manager.save(
|
|
171
|
+
task_id="registration-form",
|
|
172
|
+
step_counter=len(history.histories),
|
|
173
|
+
current_url=await session.get_current_url(),
|
|
174
|
+
agent_history=history.model_dump(mode="json"),
|
|
175
|
+
label="completed",
|
|
176
|
+
)
|
|
177
|
+
return history
|
|
178
|
+
finally:
|
|
179
|
+
await session.close()
|
|
180
|
+
|
|
181
|
+
history = asyncio.run(main())
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### CLI
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Run a task
|
|
188
|
+
browser-use-bridge run --task "Open baidu.com and search for Python" --provider kimi
|
|
189
|
+
|
|
190
|
+
# List all registered tools
|
|
191
|
+
browser-use-bridge list-tools
|
|
192
|
+
|
|
193
|
+
# Start MCP server for Claude Desktop
|
|
194
|
+
browser-use-bridge mcp --stdio
|
|
195
|
+
|
|
196
|
+
# Resume an interrupted task
|
|
197
|
+
browser-use-bridge resume <checkpoint_id>
|
|
198
|
+
|
|
199
|
+
# List saved checkpoints
|
|
200
|
+
browser-use-bridge checkpoint list
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Export History
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from browser_use_bridge.history import HistoryExporter
|
|
207
|
+
|
|
208
|
+
exporter = HistoryExporter(output_dir="history-exports")
|
|
209
|
+
artifacts = exporter.export("<checkpoint_id>", format="html")
|
|
210
|
+
print(artifacts["html"])
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Custom / Local Model
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
from browser_use_bridge.llm import ChatCustom
|
|
217
|
+
|
|
218
|
+
# Any OpenAI-compatible endpoint
|
|
219
|
+
llm = ChatCustom(
|
|
220
|
+
model="my-model",
|
|
221
|
+
base_url="http://localhost:8080/v1",
|
|
222
|
+
api_key="optional",
|
|
223
|
+
)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## Supported Providers
|
|
229
|
+
|
|
230
|
+
| Provider | Class | Default Model | Install |
|
|
231
|
+
|---|---|---|---|
|
|
232
|
+
| OpenAI | `ChatOpenAI` | `gpt-4o` | built-in |
|
|
233
|
+
| Anthropic | `ChatAnthropic` | `claude-sonnet-4-20250514` | `[cn]` |
|
|
234
|
+
| Google Gemini | `ChatGoogle` | `gemini-2.0-flash` | built-in |
|
|
235
|
+
| Kimi (Moonshot) | `ChatKimi` | `kimi-2.6` | built-in |
|
|
236
|
+
| Qwen (DashScope) | `ChatQwen` | `qwen3.6-plus` | `[cn]` |
|
|
237
|
+
| GLM (Zhipu) | `ChatGLM` | `glm-5.1` | `[cn]` |
|
|
238
|
+
| MiniMax | `ChatMiniMax` | `MiniMax-M2.7` | built-in |
|
|
239
|
+
| DeepSeek | `ChatDeepSeek` | `deepseek-v4-pro` | built-in |
|
|
240
|
+
| Ollama (local) | `ChatOllama` | `llama3` | `[ollama]` |
|
|
241
|
+
| Custom endpoint | `ChatCustom` | configurable | built-in |
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## Environment Variables
|
|
246
|
+
|
|
247
|
+
Create a `.env` file in your project root:
|
|
248
|
+
|
|
249
|
+
```env
|
|
250
|
+
MOONSHOT_API_KEY=your-kimi-key
|
|
251
|
+
DASHSCOPE_API_KEY=your-qwen-key
|
|
252
|
+
ZHIPU_API_KEY=your-glm-key
|
|
253
|
+
MINIMAX_API_KEY=your-minimax-key
|
|
254
|
+
DEEPSEEK_API_KEY=your-deepseek-key
|
|
255
|
+
OPENAI_API_KEY=your-openai-key
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
MIT — see [LICENSE](LICENSE).
|
|
263
|
+
|
|
264
|
+
Original [browser-use](https://github.com/browser-use/browser-use) is also MIT licensed.
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
# 中文说明
|
|
269
|
+
|
|
270
|
+
**[English](#browser-use-bridge)** | 中文
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
基于 [browser-use](https://github.com/browser-use/browser-use) 构建的 AI 浏览器自动化框架,新增国产大模型支持、视觉理解、记忆存储、断点续传等能力。
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## 相比 browser-use 的改动说明
|
|
279
|
+
|
|
280
|
+
`browser-use-bridge` 是 [browser-use](https://github.com/browser-use/browser-use) 的 Fork 版本,主要改动如下:
|
|
281
|
+
|
|
282
|
+
### 新增功能
|
|
283
|
+
|
|
284
|
+
| 功能 | 说明 |
|
|
285
|
+
|---|---|
|
|
286
|
+
| **国产大模型适配器** | 原生支持 Kimi(月之暗面)、通义千问(DashScope)、智谱 GLM、MiniMax、DeepSeek,无需 LangChain |
|
|
287
|
+
| **自定义模型提供商** | `ChatCustom`:通过 `base_url` + `api_key` 接入任意 OpenAI 兼容接口 |
|
|
288
|
+
| **Ollama 本地模型** | `ChatOllama`:含健康检查、模型发现、流式输出、视觉模型支持 |
|
|
289
|
+
| **视觉理解模块** | `VisionService`:截图 → 标注图像 → Vision LLM 分析;DOM 稀少时自动降级到视觉模式 |
|
|
290
|
+
| **Planner / Controller 分离** | 双 Agent 架构:Planner 将任务分解为子目标,Controller 逐步执行并验证 |
|
|
291
|
+
| **记忆存储** | BM25 关键词检索(零依赖)或 ChromaDB 向量后端;自动注入 Agent 上下文 |
|
|
292
|
+
| **断点续传** | `CheckpointManager`:任意步骤保存任务状态,中断后可恢复 |
|
|
293
|
+
| **历史回放导出** | `HistoryExporter`:导出为 JSON、自包含 HTML 时间线、或 GIF 动画 |
|
|
294
|
+
| **结构化重试** | `RetryController`:指数退避、错误分级、基于页面指纹的循环检测 |
|
|
295
|
+
| **最新默认模型** | Kimi `kimi-2.6`、千问 `qwen3.6-plus`、GLM `glm-5.1`、MiniMax `MiniMax-M2.7`、DeepSeek `deepseek-v4-pro` |
|
|
296
|
+
| **独立 PyPI 发布** | 以 `browser-use-bridge` 发布,各模型 SDK 按需安装 |
|
|
297
|
+
|
|
298
|
+
### 变更对比
|
|
299
|
+
|
|
300
|
+
| 方面 | browser-use | browser-use-bridge |
|
|
301
|
+
|---|---|---|
|
|
302
|
+
| 包名 | `browser_use` | `browser_use_bridge` |
|
|
303
|
+
| CLI 命令 | `browser-use` | `browser-use-bridge` |
|
|
304
|
+
| LLM 基类 | LangChain `BaseChatModel` | 轻量自研 `BaseChatModel`(无 LangChain 依赖) |
|
|
305
|
+
| 国产模型接入 | 不支持 | 原生支持,含 API Key 自动读取 |
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## 安装
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
pip install browser-use-bridge
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
安装国产模型 SDK:
|
|
316
|
+
|
|
317
|
+
```bash
|
|
318
|
+
pip install "browser-use-bridge[cn]" # 千问 + GLM + Anthropic
|
|
319
|
+
pip install "browser-use-bridge[kimi]" # Kimi(月之暗面)
|
|
320
|
+
pip install "browser-use-bridge[deepseek]" # DeepSeek
|
|
321
|
+
pip install "browser-use-bridge[minimax]" # MiniMax
|
|
322
|
+
pip install "browser-use-bridge[ollama]" # Ollama 本地模型
|
|
323
|
+
pip install "browser-use-bridge[all]" # 全部安装
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## 快速开始
|
|
329
|
+
|
|
330
|
+
### Python API
|
|
331
|
+
|
|
332
|
+
```python
|
|
333
|
+
import asyncio
|
|
334
|
+
|
|
335
|
+
from browser_use_bridge import Agent
|
|
336
|
+
from browser_use_bridge.browser import BrowserSession
|
|
337
|
+
from browser_use_bridge.llm import ChatKimi
|
|
338
|
+
|
|
339
|
+
async def main():
|
|
340
|
+
session = BrowserSession()
|
|
341
|
+
try:
|
|
342
|
+
await session.start()
|
|
343
|
+
agent = Agent(
|
|
344
|
+
task="搜索最新的 AI 新闻,总结前 3 条结果",
|
|
345
|
+
llm=ChatKimi(model="kimi-2.6", api_key="your-key"),
|
|
346
|
+
browser_session=session,
|
|
347
|
+
)
|
|
348
|
+
history = await agent.run()
|
|
349
|
+
return history
|
|
350
|
+
finally:
|
|
351
|
+
await session.close()
|
|
352
|
+
|
|
353
|
+
history = asyncio.run(main())
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
### 带记忆和断点续传
|
|
357
|
+
|
|
358
|
+
```python
|
|
359
|
+
import asyncio
|
|
360
|
+
|
|
361
|
+
from browser_use_bridge import Agent
|
|
362
|
+
from browser_use_bridge.browser import BrowserSession
|
|
363
|
+
from browser_use_bridge.llm import ChatQwen
|
|
364
|
+
from browser_use_bridge.memory import MemoryStore
|
|
365
|
+
from browser_use_bridge.checkpoint import CheckpointManager
|
|
366
|
+
|
|
367
|
+
async def main():
|
|
368
|
+
session = BrowserSession()
|
|
369
|
+
checkpoint_manager = CheckpointManager(autosave_every_steps=5)
|
|
370
|
+
try:
|
|
371
|
+
await session.start()
|
|
372
|
+
agent = Agent(
|
|
373
|
+
task="填写 example.com 的注册表单",
|
|
374
|
+
llm=ChatQwen(model="qwen3.6-plus"),
|
|
375
|
+
browser_session=session,
|
|
376
|
+
memory_store=MemoryStore(),
|
|
377
|
+
)
|
|
378
|
+
history = await agent.run()
|
|
379
|
+
checkpoint_manager.save(
|
|
380
|
+
task_id="registration-form",
|
|
381
|
+
step_counter=len(history.histories),
|
|
382
|
+
current_url=await session.get_current_url(),
|
|
383
|
+
agent_history=history.model_dump(mode="json"),
|
|
384
|
+
label="completed",
|
|
385
|
+
)
|
|
386
|
+
return history
|
|
387
|
+
finally:
|
|
388
|
+
await session.close()
|
|
389
|
+
|
|
390
|
+
history = asyncio.run(main())
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### CLI
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
# 执行任务
|
|
397
|
+
browser-use-bridge run --task "打开百度搜索 Python" --provider kimi
|
|
398
|
+
|
|
399
|
+
# 列出所有工具
|
|
400
|
+
browser-use-bridge list-tools
|
|
401
|
+
|
|
402
|
+
# 启动 MCP 服务(供 Claude Desktop 使用)
|
|
403
|
+
browser-use-bridge mcp --stdio
|
|
404
|
+
|
|
405
|
+
# 恢复中断的任务
|
|
406
|
+
browser-use-bridge resume <checkpoint_id>
|
|
407
|
+
|
|
408
|
+
# 列出已保存的断点
|
|
409
|
+
browser-use-bridge checkpoint list
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
### 导出历史
|
|
413
|
+
|
|
414
|
+
```python
|
|
415
|
+
from browser_use_bridge.history import HistoryExporter
|
|
416
|
+
|
|
417
|
+
exporter = HistoryExporter(output_dir="history-exports")
|
|
418
|
+
artifacts = exporter.export("<checkpoint_id>", format="html")
|
|
419
|
+
print(artifacts["html"])
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
### 自定义 / 本地模型
|
|
423
|
+
|
|
424
|
+
```python
|
|
425
|
+
from browser_use_bridge.llm import ChatCustom
|
|
426
|
+
|
|
427
|
+
# 任意 OpenAI 兼容接口
|
|
428
|
+
llm = ChatCustom(
|
|
429
|
+
model="my-model",
|
|
430
|
+
base_url="http://localhost:8080/v1",
|
|
431
|
+
api_key="optional",
|
|
432
|
+
)
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
---
|
|
436
|
+
|
|
437
|
+
## 支持的模型提供商
|
|
438
|
+
|
|
439
|
+
| 提供商 | 类名 | 默认模型 | 安装方式 |
|
|
440
|
+
|---|---|---|---|
|
|
441
|
+
| OpenAI | `ChatOpenAI` | `gpt-4o` | 内置 |
|
|
442
|
+
| Anthropic | `ChatAnthropic` | `claude-sonnet-4-20250514` | `[cn]` |
|
|
443
|
+
| Google Gemini | `ChatGoogle` | `gemini-2.0-flash` | 内置 |
|
|
444
|
+
| Kimi(月之暗面) | `ChatKimi` | `kimi-2.6` | 内置 |
|
|
445
|
+
| 通义千问(DashScope) | `ChatQwen` | `qwen3.6-plus` | `[cn]` |
|
|
446
|
+
| 智谱 GLM | `ChatGLM` | `glm-5.1` | `[cn]` |
|
|
447
|
+
| MiniMax | `ChatMiniMax` | `MiniMax-M2.7` | 内置 |
|
|
448
|
+
| DeepSeek | `ChatDeepSeek` | `deepseek-v4-pro` | 内置 |
|
|
449
|
+
| Ollama(本地) | `ChatOllama` | `llama3` | `[ollama]` |
|
|
450
|
+
| 自定义接口 | `ChatCustom` | 可配置 | 内置 |
|
|
451
|
+
|
|
452
|
+
---
|
|
453
|
+
|
|
454
|
+
## 环境变量
|
|
455
|
+
|
|
456
|
+
在项目根目录创建 `.env` 文件:
|
|
457
|
+
|
|
458
|
+
```env
|
|
459
|
+
MOONSHOT_API_KEY=your-kimi-key
|
|
460
|
+
DASHSCOPE_API_KEY=your-qwen-key
|
|
461
|
+
ZHIPU_API_KEY=your-glm-key
|
|
462
|
+
MINIMAX_API_KEY=your-minimax-key
|
|
463
|
+
DEEPSEEK_API_KEY=your-deepseek-key
|
|
464
|
+
OPENAI_API_KEY=your-openai-key
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
---
|
|
468
|
+
|
|
469
|
+
## 开源协议
|
|
470
|
+
|
|
471
|
+
MIT — 详见 [LICENSE](LICENSE)。
|
|
472
|
+
|
|
473
|
+
原项目 [browser-use](https://github.com/browser-use/browser-use) 同样采用 MIT 协议。
|