cdp-bridge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ import json, threading, time, uuid, queue, socket, requests, traceback
2
+ from typing import Dict, Any, Optional, List
3
+ from simple_websocket_server import WebSocketServer, WebSocket
4
+ from bs4 import BeautifulSoup
5
+ import bottle, random
6
+ from bottle import route, template, request, response
7
+
8
+ class Session:
9
+ def __init__(self, session_id, info, client=None):
10
+ self.id = session_id
11
+ self.info = info
12
+ self.connect_at = time.time()
13
+ self.disconnect_at = None
14
+ self.type = info.get('type', 'ws')
15
+ self.ws_client = client if self.type in ('ws', 'ext_ws') else None
16
+ self.http_queue = client if self.type == 'http' else None
17
+ @property
18
+ def url(self): return self.info.get('url', '')
19
+ def is_active(self):
20
+ if self.type == 'http' and time.time() - self.connect_at > 60: self.mark_disconnected()
21
+ return self.disconnect_at is None
22
+ def reconnect(self, client, info):
23
+ self.info = info
24
+ self.type = info.get('type', 'ws')
25
+ if self.type in ('ws', 'ext_ws'):
26
+ self.ws_client = client
27
+ self.http_queue = None
28
+ elif self.type == 'http':
29
+ self.http_queue = client
30
+ self.connect_at = time.time()
31
+ self.disconnect_at = None
32
+ def mark_disconnected(self):
33
+ if self.is_active(): print(f"Tab disconnected: {self.url} (Session: {self.id})")
34
+ self.disconnect_at = time.time()
35
+
36
+
37
+ class TMWebDriver:
38
+ def __init__(self, host: str = '127.0.0.1', port: int = 18765):
39
+ self.host, self.port = host, port
40
+ self.sessions, self.results, self.acks = {}, {}, {}
41
+ self.default_session_id = None
42
+ self.latest_session_id = None
43
+ self.is_remote = socket.socket().connect_ex((host, port+1)) == 0
44
+ if not self.is_remote:
45
+ self.start_ws_server()
46
+ self.start_http_server()
47
+ else:
48
+ self.remote = f'http://{self.host}:{self.port+1}/link'
49
+
50
+ def start_http_server(self):
51
+ self.app = app = bottle.Bottle()
52
+
53
+ @app.route('/api/longpoll', method=['GET', 'POST'])
54
+ def long_poll():
55
+ data = request.json
56
+ session_id = data.get('sessionId')
57
+ session_info = {'url': data.get('url'), 'title': data.get('title', ''), 'type': 'http'}
58
+ if session_id not in self.sessions:
59
+ session = Session(session_id, session_info, queue.Queue())
60
+ print(f"Browser http connected: {session.url} (Session: {session_id})")
61
+ self.sessions[session_id] = session
62
+ session = self.sessions[session_id]
63
+ if session.disconnect_at is not None and session.type != 'http': session.reconnect(queue.Queue(), session_info)
64
+ session.disconnect_at = None
65
+ if session.type == 'http': msgQ = session.http_queue
66
+ else: return json.dumps({"id": "", "ret": "use ws"})
67
+ session.connect_at = start_time = time.time()
68
+ while time.time() - start_time < 5:
69
+ try:
70
+ msg = msgQ.get(timeout=0.2)
71
+ try: self.acks[json.loads(msg).get('id','')] = True
72
+ except: traceback.print_exc()
73
+ return msg
74
+ except queue.Empty: continue
75
+ return json.dumps({"id": "", "ret": "next long-poll"})
76
+
77
+ @app.route('/api/result', method=['GET','POST'])
78
+ def result():
79
+ data = request.json
80
+ if data.get('type') == 'result':
81
+ self.results[data.get('id')] = {'success': True, 'data': data.get('result'), 'newTabs': data.get('newTabs', [])}
82
+ elif data.get('type') == 'error':
83
+ self.results[data.get('id')] = {'success': False, 'data': data.get('error'), 'newTabs': data.get('newTabs', [])}
84
+ return 'ok'
85
+
86
+ @app.route('/link', method=['GET','POST'])
87
+ def link():
88
+ data = request.json
89
+ if data.get('cmd') == 'get_all_sessions': return json.dumps({'r': self.get_all_sessions()}, ensure_ascii=False)
90
+ if data.get('cmd') == 'find_session':
91
+ url_pattern = data.get('url_pattern', '')
92
+ return json.dumps({'r': self.find_session(url_pattern)}, ensure_ascii=False)
93
+ if data.get('cmd') == 'execute_js':
94
+ session_id = data.get('sessionId')
95
+ code = data.get('code')
96
+ timeout = float(data.get('timeout', 10.0))
97
+ try:
98
+ result = self.execute_js(code, timeout=timeout, session_id=session_id)
99
+ print('[remote result]', (str(code)[:50] + ' RESULT:' +str(result)[:50]).replace('\n', ' '))
100
+ return json.dumps({'r': result}, ensure_ascii=False)
101
+ except Exception as e:
102
+ return json.dumps({'r': {'error': str(e)}}, ensure_ascii=False)
103
+ return 'ok'
104
+ def run():
105
+ from wsgiref.simple_server import make_server, WSGIServer, WSGIRequestHandler
106
+ from socketserver import ThreadingMixIn
107
+ class _T(ThreadingMixIn, WSGIServer): pass
108
+ class _H(WSGIRequestHandler):
109
+ def log_request(self, *a): pass
110
+ make_server(self.host, self.port+1, app, server_class=_T, handler_class=_H).serve_forever()
111
+ http_thread = threading.Thread(target=run, daemon=True)
112
+ http_thread.start()
113
+
114
+ def clean_sessions(self):
115
+ sids = list(self.sessions.keys())
116
+ for sid in sids:
117
+ session = self.sessions[sid]
118
+ if not session.is_active() and time.time() - session.disconnect_at > 600:
119
+ del self.sessions[sid]
120
+
121
+ def start_ws_server(self) -> None:
122
+ driver = self
123
+ class JSExecutor(WebSocket):
124
+ def handle(self) -> None:
125
+ try:
126
+ data = json.loads(self.data)
127
+ if data.get('type') == 'ready':
128
+ session_id = data.get('sessionId')
129
+ session_info = {'url': data.get('url'), 'title': data.get('title', ''),
130
+ 'connected_at': time.time(), 'type': 'ws'}
131
+ driver._register_client(session_id, self, session_info)
132
+ elif data.get('type') in ['ext_ready', 'tabs_update']:
133
+ tabs = data.get('tabs', [])
134
+ current_tab_ids = {str(tab['id']) for tab in tabs}
135
+ print(f"Received tabs update: {current_tab_ids}")
136
+ for sid in list(driver.sessions.keys()):
137
+ sess = driver.sessions[sid]
138
+ if sess.type == 'ext_ws' and sid not in current_tab_ids:
139
+ sess.mark_disconnected()
140
+ for tab in tabs:
141
+ session_id = str(tab['id'])
142
+ session_info = {'url': tab.get('url'), 'title': tab.get('title', ''), 'connected_at': time.time(), 'type': 'ext_ws'}
143
+ sess = driver.sessions.get(session_id)
144
+ if sess and sess.is_active(): sess.info = session_info
145
+ else: driver._register_client(session_id, self, session_info)
146
+ elif data.get('type') == 'ack': driver.acks[data.get('id','')] = True
147
+ elif data.get('type') == 'result':
148
+ driver.results[data.get('id')] = {'success': True, 'data': data.get('result'), 'newTabs': data.get('newTabs', [])}
149
+ elif data.get('type') == 'error':
150
+ driver.results[data.get('id')] = {'success': False, 'data': data.get('error'), 'newTabs': data.get('newTabs', [])}
151
+ except Exception as e:
152
+ print(f"Error handling message: {e}")
153
+ if hasattr(self, 'data'): print(self.data)
154
+ def connected(self): (f"New connection from {self.address}")
155
+ def handle_close(self):
156
+ print(f"WS Connection closed: {self.address}")
157
+ driver._unregister_client(self)
158
+
159
+ self.server = WebSocketServer(self.host, self.port, JSExecutor)
160
+ server_thread = threading.Thread(target=self.server.serve_forever)
161
+ server_thread.daemon = True
162
+ server_thread.start()
163
+ print(f"WebSocket server running on ws://{self.host}:{self.port}")
164
+
165
+ def _register_client(self, session_id: str, client: WebSocket, session_info) -> None:
166
+ is_new_session = session_id not in self.sessions
167
+
168
+ if is_new_session:
169
+ session = Session(session_id, session_info, client)
170
+ self.sessions[session_id] = session
171
+ print(f"New tab connected: {session.url} (Session: {session_id})")
172
+ else:
173
+ session = self.sessions[session_id]
174
+ session.reconnect(client, session_info)
175
+ print(f"Tab reconnected: {session.url} (Session: {session_id})")
176
+
177
+ self.latest_session_id = session_id
178
+ if self.default_session_id is None: self.default_session_id = session_id
179
+
180
+ def _unregister_client(self, client: WebSocket) -> None:
181
+ for session in self.sessions.values():
182
+ if session.ws_client == client: session.mark_disconnected()
183
+
184
+ def execute_js(self, code, timeout=15, session_id=None) -> Any:
185
+ if session_id is None: session_id = self.default_session_id
186
+ if self.is_remote:
187
+ print('remote_execute_js')
188
+ response = self._remote_cmd({"cmd": "execute_js", "sessionId": session_id,
189
+ "code": code, "timeout": str(timeout)}).get('r', {})
190
+ if response.get('error'): raise Exception(response['error'])
191
+ return response
192
+
193
+ session = self.sessions.get(session_id)
194
+ if not session or not session.is_active():
195
+ time.sleep(3)
196
+ session = self.sessions.get(session_id)
197
+ if not session or not session.is_active():
198
+ alive_sessions = [s for s in self.sessions.values() if s.is_active()]
199
+ if alive_sessions:
200
+ session = alive_sessions[0]
201
+ print(f"会话 {session_id} 未连接,自动切换到最新活动会话: {session.id}")
202
+ session_id = self.default_session_id = session.id
203
+ if not session or not session.is_active():
204
+ raise ValueError(f"会话ID {session_id} 未连接")
205
+
206
+ tp = session.type
207
+ if tp not in ('ws', 'http', 'ext_ws'):
208
+ raise ValueError(f"Unsupported session type: {tp}")
209
+ exec_id = str(uuid.uuid4())
210
+ payload_dict = {'id': exec_id, 'code': code}
211
+ if tp == 'ext_ws': payload_dict['tabId'] = int(session.id)
212
+ payload = json.dumps(payload_dict)
213
+
214
+ if tp in ['ws', 'ext_ws']: session.ws_client.send_message(payload)
215
+ elif tp == 'http': session.http_queue.put(payload)
216
+
217
+ start_time = time.time()
218
+ self.clean_sessions()
219
+ hasjump = acked = False
220
+
221
+ while exec_id not in self.results:
222
+ time.sleep(0.2)
223
+ if not acked and exec_id in self.acks:
224
+ acked = True; start_time = time.time()
225
+ if tp in ['ws', 'ext_ws']:
226
+ if not session.is_active(): hasjump = True
227
+ if hasjump and session.is_active():
228
+ return {'result': f"Session {session_id} reloaded.", "closed":1}
229
+ if time.time() - start_time > timeout:
230
+ if tp in ['ws', 'ext_ws']:
231
+ if hasjump: return {'result': f"Session {session_id} reloaded and new page is loading...", 'closed':1}
232
+ if acked: return {"result": f"No response data in {timeout}s (ACK received, script may still be running)"}
233
+ return {"result": f"No response data in {timeout}s (no ACK, script may not have been delivered)"}
234
+ elif tp == 'http':
235
+ if acked: return {"result": f"Session {session_id} no response in {timeout}s (delivered but no result)"}
236
+ return {"result": f"Session {session_id} no response in {timeout}s (script not polled)"}
237
+
238
+ result = self.results.pop(exec_id)
239
+ if exec_id in self.acks: self.acks.pop(exec_id)
240
+ if not result['success']: raise Exception(result['data'])
241
+ rr = {'data': result['data']}
242
+ newtabs = result.get('newTabs', []); [x.pop('ts', None) for x in newtabs]
243
+ if newtabs: rr['newTabs'] = newtabs
244
+ return rr
245
+
246
+ def _remote_cmd(self, cmd):
247
+ try: return requests.post(self.remote, headers={"Content-Type": "application/json"}, json=cmd, timeout=30).json()
248
+ except (ConnectionError, requests.exceptions.ConnectionError):
249
+ raise ConnectionError("TMWebDriver master未运行,看tmwebdriver_sop启动master")
250
+
251
+ def get_all_sessions(self):
252
+ if self.is_remote:
253
+ return self._remote_cmd({"cmd": "get_all_sessions"}).get('r', [])
254
+ return [{'id': session.id, **session.info} for session in self.sessions.values()
255
+ if session.is_active()]
256
+
257
+ def get_session_dict(self):
258
+ return {session['id']: session['url'] for session in self.get_all_sessions()}
259
+
260
+ def find_session(self, url_pattern: str):
261
+ if url_pattern == '':
262
+ session = self.sessions.get(self.latest_session_id)
263
+ return [(session.id, session.info)] if session else []
264
+ matching_sessions = []
265
+ for session in self.sessions.values():
266
+ if not session.is_active(): continue
267
+ if 'url' in session.info and url_pattern in session.info['url']:
268
+ matching_sessions.append((session.id, session.info))
269
+ return matching_sessions
270
+
271
+ def set_session(self, url_pattern: str) -> bool:
272
+ if self.is_remote:
273
+ matched = self._remote_cmd({"cmd": "find_session", "url_pattern": url_pattern}).get('r', [])
274
+ else:
275
+ matched = self.find_session(url_pattern)
276
+ if not matched: return print(f"警告: 未找到URL包含 '{url_pattern}' 的会话")
277
+ if len(matched) > 1: print(f"警告: 找到多个URL包含 '{url_pattern}' 的会话,选择第一个")
278
+ self.default_session_id, info = matched[0]
279
+ print(f"成功设置默认会话: {self.default_session_id}: {info['url']}")
280
+ return self.default_session_id
281
+
282
+ def jump(self, url, timeout=10): self.execute_js(f"window.location.href='{url}'", timeout=timeout)
283
+
284
+ if __name__ == "__main__":
285
+ driver = TMWebDriver(host='127.0.0.1', port=18765)
cdp_bridge/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ import argparse
2
+ from importlib import resources
3
+
4
+ from .server import mcp
5
+
6
+
7
+ def main():
8
+ """Run the CDP Bridge MCP server."""
9
+ parser = argparse.ArgumentParser(
10
+ description="Run the CDP Bridge MCP server for browser automation through the companion extension."
11
+ )
12
+ parser.parse_args()
13
+ mcp.run()
14
+
15
+
16
+ def extension_path():
17
+ """Print the packaged Chrome extension directory."""
18
+ extension_dir = resources.files(__package__) / "tmwd_cdp_bridge"
19
+ print(extension_dir)
20
+
21
+
22
+ if __name__ == "__main__":
23
+ main()
cdp_bridge/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from . import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()
cdp_bridge/server.py ADDED
@@ -0,0 +1,164 @@
1
+ import asyncio, json
2
+ import importlib
3
+
4
+ from mcp.server.fastmcp import FastMCP
5
+
6
+ from . import simphtml
7
+
8
+ mcp = FastMCP("tmwebdriver-bridge")
9
+
10
+ driver = None
11
+
12
+ def get_driver():
13
+ global driver
14
+ if driver is None:
15
+ from .TMWebDriver import TMWebDriver
16
+ driver = TMWebDriver()
17
+ return driver
18
+
19
+
20
+ @mcp.tool()
21
+ async def browser_get_tabs() -> str:
22
+ """Get all open browser tabs with their IDs, URLs, and titles."""
23
+ def _run():
24
+ d = get_driver()
25
+ sessions = d.get_all_sessions()
26
+ for s in sessions:
27
+ s.pop('connected_at', None)
28
+ s.pop('type', None)
29
+ return json.dumps({"tabs": sessions, "active_tab": d.default_session_id}, ensure_ascii=False)
30
+ return await asyncio.to_thread(_run)
31
+
32
+
33
+ @mcp.tool()
34
+ async def browser_scan(tabs_only: bool = False, switch_tab_id: str = "", text_only: bool = False) -> str:
35
+ """Get simplified HTML content of the active tab plus tab list. The HTML is optimized for LLM consumption (stripped of scripts, styles, invisible elements).
36
+
37
+ Args:
38
+ tabs_only: Only return tab list without page content (saves tokens).
39
+ switch_tab_id: Switch to this tab before scanning.
40
+ text_only: Return plain text instead of simplified HTML.
41
+ """
42
+ def _run():
43
+ d = get_driver()
44
+ if len(d.get_all_sessions()) == 0:
45
+ return json.dumps({"status": "error", "msg": "No browser tabs connected. Ensure Chrome extension is running."}, ensure_ascii=False)
46
+
47
+ if switch_tab_id:
48
+ d.default_session_id = switch_tab_id
49
+
50
+ tabs = []
51
+ for sess in d.get_all_sessions():
52
+ sess.pop('connected_at', None)
53
+ sess.pop('type', None)
54
+ sess['url'] = sess.get('url', '')[:80]
55
+ tabs.append(sess)
56
+
57
+ result = {
58
+ "status": "success",
59
+ "metadata": {"tabs_count": len(tabs), "tabs": tabs, "active_tab": d.default_session_id}
60
+ }
61
+ if not tabs_only:
62
+ importlib.reload(simphtml)
63
+ result["content"] = simphtml.get_html(d, cutlist=True, maxchars=35000, text_only=text_only)
64
+ return json.dumps(result, ensure_ascii=False, default=str)
65
+ return await asyncio.to_thread(_run)
66
+
67
+
68
+ @mcp.tool()
69
+ async def browser_execute_js(script: str, switch_tab_id: str = "", no_monitor: bool = False) -> str:
70
+ """Execute JavaScript in the browser and capture results plus DOM changes.
71
+
72
+ Args:
73
+ script: JavaScript code to execute (or JSON command for CDP operations).
74
+ switch_tab_id: Switch to this tab before executing.
75
+ no_monitor: Skip DOM change monitoring (faster, less info).
76
+ """
77
+ def _run():
78
+ d = get_driver()
79
+ if len(d.get_all_sessions()) == 0:
80
+ return json.dumps({"status": "error", "msg": "No browser tabs connected."}, ensure_ascii=False)
81
+ if switch_tab_id:
82
+ d.default_session_id = switch_tab_id
83
+ importlib.reload(simphtml)
84
+ result = simphtml.execute_js_rich(script, d, no_monitor=no_monitor)
85
+ return json.dumps(result, ensure_ascii=False, default=str)
86
+ return await asyncio.to_thread(_run)
87
+
88
+
89
+ @mcp.tool()
90
+ async def browser_switch_tab(tab_id: str) -> str:
91
+ """Switch the active browser tab.
92
+
93
+ Args:
94
+ tab_id: The tab ID to switch to (from browser_get_tabs).
95
+ """
96
+ def _run():
97
+ d = get_driver()
98
+ d.default_session_id = tab_id
99
+ session = d.sessions.get(tab_id)
100
+ if session and session.is_active():
101
+ return json.dumps({"status": "success", "active_tab": tab_id, "url": session.info.get('url', '')}, ensure_ascii=False)
102
+ return json.dumps({"status": "error", "msg": f"Tab {tab_id} not found or disconnected."}, ensure_ascii=False)
103
+ return await asyncio.to_thread(_run)
104
+
105
+
106
+ @mcp.tool()
107
+ async def browser_navigate(url: str) -> str:
108
+ """Navigate the active tab to a URL.
109
+
110
+ Args:
111
+ url: The URL to navigate to.
112
+ """
113
+ def _run():
114
+ d = get_driver()
115
+ if len(d.get_all_sessions()) == 0:
116
+ return json.dumps({"status": "error", "msg": "No browser tabs connected."}, ensure_ascii=False)
117
+ d.jump(url, timeout=10)
118
+ return json.dumps({"status": "success", "msg": f"Navigating to {url}"}, ensure_ascii=False)
119
+ return await asyncio.to_thread(_run)
120
+
121
+
122
+ @mcp.tool()
123
+ async def browser_screenshot(tab_id: str = "") -> str:
124
+ """Take a screenshot of the active tab (returns base64 PNG).
125
+
126
+ Args:
127
+ tab_id: Optional tab ID to screenshot. Uses active tab if empty.
128
+ """
129
+ def _run():
130
+ d = get_driver()
131
+ if len(d.get_all_sessions()) == 0:
132
+ return json.dumps({"status": "error", "msg": "No browser tabs connected."}, ensure_ascii=False)
133
+ cmd = {"cmd": "cdp", "method": "Page.captureScreenshot", "params": {"format": "png"}}
134
+ if tab_id:
135
+ cmd["tabId"] = int(tab_id)
136
+ result = d.execute_js(json.dumps(cmd))
137
+ data = result.get('data', {})
138
+ if isinstance(data, dict) and 'data' in data:
139
+ return json.dumps({"status": "success", "format": "png", "base64": data['data']}, ensure_ascii=False)
140
+ return json.dumps({"status": "success", "data": data}, ensure_ascii=False, default=str)
141
+ return await asyncio.to_thread(_run)
142
+
143
+
144
+ @mcp.tool()
145
+ async def browser_cookies(url: str = "") -> str:
146
+ """Get cookies for the current page or a specific URL.
147
+
148
+ Args:
149
+ url: URL to get cookies for. If empty, gets cookies for the active tab's URL.
150
+ """
151
+ def _run():
152
+ d = get_driver()
153
+ if len(d.get_all_sessions()) == 0:
154
+ return json.dumps({"status": "error", "msg": "No browser tabs connected."}, ensure_ascii=False)
155
+ cmd = {"cmd": "cookies"}
156
+ if url:
157
+ cmd["url"] = url
158
+ result = d.execute_js(json.dumps(cmd))
159
+ return json.dumps({"status": "success", "cookies": result.get('data', [])}, ensure_ascii=False, default=str)
160
+ return await asyncio.to_thread(_run)
161
+
162
+
163
+ if __name__ == "__main__":
164
+ mcp.run()