mcp-query-table 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  from ._version import __version__
2
2
 
3
3
  from .enums import QueryType, Site, Provider
4
- from .tool import BrowserManager, query, chat
4
+ from .tool import query, chat
5
5
 
6
6
  TIMEOUT = 1000 * 60 * 3 # 3分钟,在抓取EventStream数据时等待数据返回,防止外层30秒超时
7
7
  TIMEOUT_60 = 1000 * 60 # 1分钟
@@ -1,5 +1,6 @@
1
- import getpass
1
+ import asyncio
2
2
 
3
+ from mcp_query_table.playwright_helper import get_chrome_use_data
3
4
  from mcp_query_table.server import serve
4
5
 
5
6
 
@@ -17,7 +18,7 @@ def main():
17
18
  parser.add_argument("--executable_path", type=str, help="浏览器路径",
18
19
  nargs="?", default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
19
20
  parser.add_argument("--user_data_dir", type=str, help="浏览器用户数据目录",
20
- nargs="?", default=rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data')
21
+ nargs="?", default=f'{get_chrome_use_data()}')
21
22
  parser.add_argument("--transport", type=str, help="传输类型",
22
23
  default='stdio', choices=['stdio', 'sse', 'streamable-http'])
23
24
  parser.add_argument("--host", type=str, help="MCP服务端绑定地址",
@@ -25,9 +26,10 @@ def main():
25
26
  parser.add_argument("--port", type=int, help="MCP服务端绑定端口",
26
27
  default='8000')
27
28
  args = parser.parse_args()
28
- serve(args.format, args.endpoint,
29
- args.executable_path, args.user_data_dir,
30
- args.transport, args.host, args.port)
29
+
30
+ asyncio.run(serve(args.format,
31
+ args.endpoint, args.executable_path, args.user_data_dir,
32
+ args.transport, args.host, args.port))
31
33
 
32
34
 
33
35
  if __name__ == "__main__":
@@ -1 +1 @@
1
- __version__ = "0.3.11"
1
+ __version__ = "0.3.13"
@@ -0,0 +1,351 @@
1
+ import shutil
2
+ import subprocess
3
+ import sys
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Literal
7
+ from urllib.parse import urlparse
8
+
9
+ import psutil
10
+ from loguru import logger
11
+ from playwright.async_api import async_playwright
12
+ from playwright.sync_api import sync_playwright
13
+ from playwright_stealth import Stealth
14
+
15
+
16
+ def get_chrome_path() -> str:
17
+ """Chrome可执行文件路径"""
18
+ if sys.platform == "darwin":
19
+ return "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
20
+ elif sys.platform == "win32":
21
+ return r"C:\Program Files\Google\Chrome\Application\chrome.exe"
22
+ elif sys.platform.startswith("linux"):
23
+ return "/usr/bin/google-chrome"
24
+ return shutil.which('google-chrome')
25
+
26
+
27
+ def get_edge_path() -> str:
28
+ """Edge可执行文件路径"""
29
+ if sys.platform == "darwin":
30
+ return "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"
31
+ elif sys.platform == "win32":
32
+ return r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"
33
+ elif sys.platform.startswith("linux"):
34
+ return "/usr/bin/microsoft-edge"
35
+ return shutil.which('microsoft-edge')
36
+
37
+
38
+ def get_edge_use_data():
39
+ """Edge用户配置路径"""
40
+ if sys.platform == "darwin":
41
+ return Path.home() / "Library/Application Support/Google/Chrome"
42
+ elif sys.platform == "win32":
43
+ return Path.home() / r"AppData\Local\Google\Chrome\User Data"
44
+ elif sys.platform.startswith("linux"):
45
+ return Path.home() / ".config/google-chrome"
46
+ return None
47
+
48
+
49
+ def get_chrome_use_data():
50
+ """Chrome用户配置路径"""
51
+ if sys.platform == "darwin":
52
+ return Path.home() / "Library/Application Support/Microsoft Edge"
53
+ elif sys.platform == "win32":
54
+ return Path.home() / r"AppData\Local\Microsoft\Edge\User Data"
55
+ elif sys.platform.startswith("linux"):
56
+ return Path.home() / ".config/microsoft-edge"
57
+ return None
58
+
59
+
60
+ def get_browser(proc_name: Literal["chrome.exe", "msedge.exe"], port: int | None = None) -> psutil.Process | None:
61
+ """查进程"""
62
+ for proc in psutil.process_iter(["name", "cmdline", "exe"]):
63
+ name = proc.info["name"].lower()
64
+ if proc_name in name:
65
+ exe_path = proc.info.get("exe", "") or ""
66
+
67
+ if port is None:
68
+ return proc
69
+ else:
70
+ if any(f"--remote-debugging-port={port}" in arg for arg in proc.info["cmdline"]):
71
+ return proc
72
+ return None
73
+
74
+
75
+ def kill_browsers(proc_name: Literal["chrome.exe", "msedge.exe"]) -> None:
76
+ """Edge需要退出所有进程,再次启动进程才能生效"""
77
+ for proc in psutil.process_iter(["name", "cmdline", "exe"]):
78
+ name = proc.info["name"].lower()
79
+ if proc_name in name:
80
+ if proc.is_running():
81
+ try:
82
+ proc.kill()
83
+ except psutil.NoSuchProcess:
84
+ pass
85
+
86
+
87
+ def start_browser(browser_path: str, port: int, devtools: bool, user_data_dir: str | None = None):
88
+ """启动浏览器进程"""
89
+ command = [browser_path, f'--remote-debugging-port={port}', '--start-maximized']
90
+ if devtools:
91
+ command.append('--auto-open-devtools-for-tabs')
92
+ if user_data_dir:
93
+ # chrome不能使用默认配置,因为安全限制
94
+ command.append(f'--user-data-dir={user_data_dir}')
95
+ return subprocess.Popen(command)
96
+
97
+
98
+ def is_local_url(url: str) -> bool:
99
+ """判断url是否是本地地址"""
100
+ for local in ('localhost', '127.0.0.1', "::1"):
101
+ if local in url.lower():
102
+ return True
103
+ return False
104
+
105
+
106
+ def is_cdp_url(url: str) -> bool:
107
+ """判断url是否是CDP地址"""
108
+ if url.startswith('ws://') or url.startswith('wss://'):
109
+ return False
110
+ return True
111
+
112
+
113
+ def is_url(url: str) -> bool:
114
+ urls = (
115
+ "devtools://",
116
+ "chrome-extension://",
117
+ "extension://",
118
+ "chrome://",
119
+ "edge://",
120
+ )
121
+ for u in urls:
122
+ if url.startswith(u):
123
+ return False
124
+ return True
125
+
126
+
127
+ class BaseBrowser:
128
+ def __init__(self, endpoint: str | None, executable_path: str | None = None, devtools: bool = False, user_data_dir: str | None = None):
129
+ """连接参数
130
+
131
+ Parameters
132
+ ----------
133
+ endpoint:
134
+ 浏览器CDP地址或服务器WS地址
135
+ executable_path:
136
+ Chrome或Edge的绝对路径
137
+ devtools:
138
+ 是否显示开发者工具
139
+ user_data_dir
140
+ 用户数据。可解决登录问题
141
+
142
+ Examples
143
+ --------
144
+ # python退出chrome也自动退出,需提前`playwright install chromium`
145
+ async with AsyncBrowser(endpoint=None, user_data_dir="D:\\user_data") as browser:
146
+ pass
147
+
148
+ # 本地先启动chrome进程,然后使用CDP协议进行控制
149
+ # chrome一定要另外提供`user_data_dir`
150
+ # edge需要完全退出才能启动时启动CDP
151
+ async with AsyncBrowser(endpoint="http://127.0.0.1:9222", executable_path=get_chrome_path(), user_data_dir="D:\\user_data") as browser:
152
+ pass
153
+
154
+ # 连接到远程的Playwright Server
155
+ # 参考 https://playwright.dev/python/docs/docker#connecting-to-the-server
156
+ with SyncBrowser(endpoint="ws://127.0.0.1:3000") as browser:
157
+ pass
158
+
159
+ """
160
+ self.endpoint = endpoint
161
+ self.executable_path = executable_path
162
+ self.devtools = devtools
163
+ self.user_data_dir = user_data_dir
164
+
165
+ self.headless = False
166
+ self.playwright = None
167
+ self.browser = None
168
+
169
+ self.timeout = 20000
170
+ self.slow_mo = 1000
171
+
172
+ def _start_chrome(self, sleep: int = 5):
173
+ if self.executable_path:
174
+ name = Path(self.executable_path).name
175
+ port = urlparse(self.endpoint).port
176
+ proc = get_browser(name, port)
177
+ if proc is None:
178
+ kill_browsers(name)
179
+ ret = start_browser(self.executable_path, port, self.devtools, self.user_data_dir)
180
+ time.sleep(sleep)
181
+ else:
182
+ logger.warning("连接本地CDP时未提供executable_path,需手工启动带参数的浏览器")
183
+
184
+
185
+ class AsyncBrowser(BaseBrowser):
186
+ async def __aenter__(self):
187
+ await self._launch()
188
+
189
+ async def get_page():
190
+ contexts = self.browser.contexts
191
+ if contexts:
192
+ context = contexts[0]
193
+ else:
194
+ context = await self.browser.new_context()
195
+
196
+ await Stealth().apply_stealth_async(context)
197
+
198
+ pages = context.pages
199
+ if pages:
200
+ for page in pages:
201
+ if is_url(page.url):
202
+ return page
203
+
204
+ return await context.new_page()
205
+
206
+ self.browser.get_page = get_page
207
+
208
+ return self.browser
209
+
210
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
211
+ if self.browser:
212
+ await self.browser.close()
213
+ if self.playwright:
214
+ await self.playwright.stop()
215
+
216
+ async def _launch(self):
217
+ """启动并连接"""
218
+ self.playwright = await async_playwright().start()
219
+ if self.endpoint is None:
220
+ await self._connect_to_launch()
221
+ return
222
+ elif is_local_url(self.endpoint) and is_cdp_url(self.endpoint):
223
+ self._start_chrome()
224
+
225
+ kwargs = {
226
+ "timeout": self.timeout,
227
+ "slow_mo": self.slow_mo,
228
+ }
229
+ if is_cdp_url(self.endpoint):
230
+ self.browser = await self.playwright.chromium.connect_over_cdp(self.endpoint, **kwargs)
231
+ else:
232
+ # https://playwright.dev/python/docs/docker#connecting-to-the-server
233
+ self.browser = await self.playwright.chromium.connect(self.endpoint, **kwargs)
234
+
235
+ async def _connect_to_launch(self) -> None:
236
+ logger.info("executable_path={}", self.executable_path)
237
+ kwargs = {
238
+ "executable_path": self.executable_path,
239
+ "headless": self.headless,
240
+ "timeout": self.timeout,
241
+ "slow_mo": self.slow_mo,
242
+ # "devtools": self.devtools,
243
+ }
244
+ if self.user_data_dir:
245
+ logger.info("user_data_dir={}", self.user_data_dir)
246
+ try:
247
+ context = await self.playwright.chromium.launch_persistent_context(user_data_dir=self.user_data_dir, **kwargs)
248
+ self.browser = context.browser
249
+ except:
250
+ raise ConnectionError(f"launch失败,可能已经有浏览器已经打开了数据目录。{self.user_data_dir}")
251
+ else:
252
+ logger.warning("未指定浏览器用户数据目录,部分需要的网站可能无法使用")
253
+ self.browser = await self.playwright.chromium.launch(**kwargs)
254
+
255
+
256
+ class SyncBrowser(BaseBrowser):
257
+
258
+ def __enter__(self):
259
+ self._launch()
260
+
261
+ def get_page():
262
+ contexts = self.browser.contexts
263
+ if contexts:
264
+ context = contexts[0]
265
+ else:
266
+ context = self.browser.new_context()
267
+
268
+ Stealth().apply_stealth_sync(context)
269
+
270
+ pages = context.pages
271
+ if pages:
272
+ for page in pages:
273
+ if is_url(page.url):
274
+ return page
275
+
276
+ return context.new_page()
277
+
278
+ self.browser.get_page = get_page
279
+
280
+ return self.browser
281
+
282
+ def __exit__(self, exc_type, exc_val, exc_tb):
283
+ if self.browser:
284
+ self.browser.close()
285
+ if self.playwright:
286
+ self.playwright.stop()
287
+
288
+ def _launch(self):
289
+ """启动并连接"""
290
+ self.playwright = sync_playwright().start()
291
+ if self.endpoint is None:
292
+ self._connect_to_launch()
293
+ return
294
+ elif is_local_url(self.endpoint) and is_cdp_url(self.endpoint):
295
+ self._start_chrome()
296
+
297
+ kwargs = {
298
+ "timeout": self.timeout,
299
+ "slow_mo": self.slow_mo,
300
+ }
301
+ if is_cdp_url(self.endpoint):
302
+ self.browser = self.playwright.chromium.connect_over_cdp(self.endpoint, **kwargs)
303
+ else:
304
+ # https://playwright.dev/python/docs/docker#connecting-to-the-server
305
+ self.browser = self.playwright.chromium.connect(self.endpoint, **kwargs)
306
+
307
+ def _connect_to_launch(self) -> None:
308
+ logger.info("executable_path={}", self.executable_path)
309
+ kwargs = {
310
+ "executable_path": self.executable_path,
311
+ "headless": self.headless,
312
+ "timeout": self.timeout,
313
+ "slow_mo": self.slow_mo,
314
+ # "devtools": self.devtools,
315
+ }
316
+ if self.user_data_dir:
317
+ logger.info("user_data_dir={}", self.user_data_dir)
318
+ try:
319
+ context = self.playwright.chromium.launch_persistent_context(user_data_dir=self.user_data_dir, **kwargs)
320
+ self.browser = context.browser
321
+ except:
322
+ raise ConnectionError(f"launch失败,可能已经有浏览器已经打开了数据目录。{self.user_data_dir}")
323
+ else:
324
+ logger.warning("未指定浏览器用户数据目录,部分需要的网站可能无法使用")
325
+ self.browser = self.playwright.chromium.launch(**kwargs)
326
+
327
+
328
+ async def async_main():
329
+ async with AsyncBrowser(endpoint="http://127.0.0.1:9222", executable_path=get_chrome_path(), user_data_dir="D:\\user_data") as browser:
330
+ context = await browser.new_context(proxy={"server": "http://127.0.0.1:10808"})
331
+ page = await browser.new_page()
332
+ await page.goto("https://ipw.cn/")
333
+ page = await context.new_page()
334
+ await page.goto("https://ipw.cn/")
335
+ input("AAA")
336
+
337
+
338
+ def sync_main():
339
+ with SyncBrowser(endpoint="http://127.0.0.1:9222", executable_path=get_chrome_path(), user_data_dir="D:\\user_data") as browser:
340
+ context = browser.new_context(proxy={"server": "http://127.0.0.1:10808"})
341
+ page = browser.new_page()
342
+ page.goto("https://ipw.cn/")
343
+ page = context.new_page()
344
+ page.goto("https://ipw.cn/")
345
+ input("BBB")
346
+
347
+ # if __name__ == "__main__":
348
+ # import asyncio
349
+ #
350
+ # asyncio.run(async_main())
351
+ # sync_main()
mcp_query_table/server.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from typing import Annotated, List
2
2
 
3
+ import fastmcp
3
4
  from loguru import logger
4
- from mcp.server.fastmcp import FastMCP
5
5
  from pydantic import Field
6
6
 
7
7
  from mcp_query_table import QueryType, Site, query as qt_query, chat as qt_chat
8
8
  from mcp_query_table.enums import Provider
9
- from mcp_query_table.tool import BrowserManager
9
+ from mcp_query_table.playwright_helper import AsyncBrowser
10
10
 
11
11
 
12
12
  class QueryServer:
@@ -14,18 +14,16 @@ class QueryServer:
14
14
  self.format: str = "markdown"
15
15
  self.browser = None
16
16
 
17
- def start(self, format, endpoint, executable_path, user_data_dir):
17
+ async def start(self, format, endpoint, executable_path, user_data_dir):
18
18
  self.format: str = format
19
- self.browser = BrowserManager(endpoint=endpoint,
20
- executable_path=executable_path,
21
- user_data_dir=user_data_dir,
22
- devtools=False,
23
- headless=True)
19
+ self.browser = await AsyncBrowser(endpoint=endpoint,
20
+ executable_path=executable_path,
21
+ devtools=False,
22
+ user_data_dir=user_data_dir).__aenter__()
24
23
 
25
24
  async def query(self, query_input: str, query_type: QueryType, max_page: int, rename: bool, site: Site):
26
25
  page = await self.browser.get_page()
27
26
  df = await qt_query(page, query_input, query_type, max_page, rename, site)
28
- self.browser.release_page(page)
29
27
 
30
28
  if self.format == 'csv':
31
29
  return df.to_csv()
@@ -37,12 +35,10 @@ class QueryServer:
37
35
  async def chat(self, prompt: str, create: bool, files: List[str], provider: Provider):
38
36
  page = await self.browser.get_page()
39
37
  txt = await qt_chat(page, prompt, create, files, provider)
40
- self.browser.release_page(page)
41
38
  return txt
42
39
 
43
40
 
44
- # !!!log_level这一句非常重要,否则Cline/MCP Server/Tools工作不正常
45
- mcp = FastMCP("query_table_mcp", log_level="ERROR")
41
+ mcp = fastmcp.FastMCP("query_table_mcp")
46
42
  qsv = QueryServer()
47
43
 
48
44
 
@@ -71,16 +67,15 @@ async def chat(
71
67
  return await qsv.chat(prompt, create, files, provider)
72
68
 
73
69
 
74
- def serve(format, endpoint, executable_path, user_data_dir, transport, host, port):
75
- qsv.start(format, endpoint, executable_path, user_data_dir)
70
+ async def serve(format, endpoint, executable_path, user_data_dir, transport, host, port):
76
71
  logger.info(f"{endpoint=}")
77
72
  logger.info(f"{executable_path=}")
78
73
  logger.info(f"{user_data_dir=}")
79
- if transport == 'sse':
80
- logger.info(f"{transport=},{format=},{host=},{port=}")
81
- else:
82
- logger.info(f"{transport=},{format=}")
74
+ await qsv.start(format, endpoint, executable_path, user_data_dir)
83
75
 
84
- mcp.settings.host = host
85
- mcp.settings.port = port
86
- mcp.run(transport=transport)
76
+ if transport == "stdio":
77
+ logger.info(f"{transport=},{format=}")
78
+ await mcp.run_async(transport=transport)
79
+ else:
80
+ logger.info(f"{transport=},{format=},{host=},{port=}")
81
+ await mcp.run_async(transport=transport, host=host, port=port)
mcp_query_table/tool.py CHANGED
@@ -1,253 +1,11 @@
1
- import getpass
2
- import subprocess
3
- import sys
4
- import time
5
- from pathlib import Path
6
- from typing import Optional
7
- from urllib.parse import urlparse, quote
1
+ from urllib.parse import quote
8
2
 
9
3
  import pandas as pd
10
- from loguru import logger
11
- from playwright.async_api import async_playwright, Playwright, Page
12
- from playwright_stealth import Stealth
4
+ from playwright.async_api import Page
13
5
 
14
6
  from mcp_query_table.enums import QueryType, Site, Provider
15
7
 
16
8
 
17
- def create_detached_process(command):
18
- # 设置通用参数
19
- kwargs = {}
20
-
21
- if sys.platform == 'win32':
22
- kwargs.update({
23
- # 在PyCharm中运行还是会出现新建进程被关闭
24
- 'creationflags': subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
25
- })
26
- else:
27
- # Unix-like 系统(Linux, macOS)特定设置
28
- kwargs.update({
29
- 'start_new_session': True # 创建新的会话
30
- })
31
- logger.info(f"Popen: {command}")
32
- return subprocess.Popen(command, **kwargs)
33
-
34
-
35
- def is_local_url(url: str) -> bool:
36
- """判断url是否是本地地址"""
37
- for local in ('localhost', '127.0.0.1'):
38
- if local in url.lower():
39
- return True
40
- return False
41
-
42
-
43
- def is_cdp_url(url: str) -> bool:
44
- """判断url是否是CDP地址"""
45
- if url.startswith('ws://') or url.startswith('wss://'):
46
- return False
47
- return True
48
-
49
-
50
- def get_executable_path(executable_path) -> Optional[str]:
51
- """获取浏览器可执行文件路径"""
52
- browsers = {
53
- "default": executable_path,
54
- "chrome.exe": r"C:\Program Files\Google\Chrome\Application\chrome.exe",
55
- "msedge.exe": r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
56
- }
57
- for k, v in browsers.items():
58
- if v is None:
59
- continue
60
- if Path(v).exists():
61
- return v
62
- return None
63
-
64
-
65
- def get_user_data_dir(user_data_dir) -> Optional[str]:
66
- """获取浏览器可用户目录"""
67
- browsers = {
68
- "default": user_data_dir,
69
- "chrome.exe": rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data', # 使用默认配置文件时无法创建CDP
70
- "msedge.exe": rf"C:\Users\{getpass.getuser()}\AppData\Local\Microsoft\Edge\User Data",
71
- }
72
- for k, v in browsers.items():
73
- if v is None:
74
- continue
75
- if Path(v).exists():
76
- return v
77
- return None
78
-
79
-
80
- class BrowserManager:
81
- async def __aenter__(self):
82
- return self
83
-
84
- async def __aexit__(self, exc_type, exc_val, exc_tb):
85
- await self.cleanup()
86
-
87
- def __init__(self,
88
- endpoint: Optional[str],
89
- executable_path: Optional[str] = None,
90
- devtools: bool = False,
91
- headless: bool = True,
92
- user_data_dir: Optional[str] = None):
93
- """
94
-
95
- Parameters
96
- ----------
97
- endpoint:str or None
98
- 浏览器CDP地址/WS地址。
99
- 如果为None,则直接启动浏览器实例。可用无头模式。建议指定用户数据目录,否则可能无法使用某些需要登录的网站
100
- executable_path:str
101
- 浏览器可执行文件路径。推荐使用chrome,因为Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
102
- devtools:bool
103
- 是否显示开发者工具
104
- headless:bool
105
- 是否无头模式启动浏览器
106
- user_data_dir:str
107
- 浏览器用户数据目录。无头模式。强烈建议指定用户数据目录,否则可能无法使用某些需要登录的网站
108
-
109
- """
110
- if devtools:
111
- headless = False
112
-
113
- self.endpoint = endpoint
114
- self.executable_path = executable_path
115
- self.devtools = devtools
116
- self.headless = headless
117
- self.user_data_dir = user_data_dir
118
-
119
- self.playwright: Optional[Playwright] = None
120
- self.browser = None
121
- self.context = None
122
- # 空闲page池
123
- self.pages = []
124
-
125
- async def cleanup(self):
126
- if self.browser:
127
- await self.browser.close()
128
- if self.playwright:
129
- await self.playwright.stop()
130
-
131
- async def _connect_to_local(self) -> None:
132
- """连接本地浏览器"""
133
- port = urlparse(self.endpoint).port
134
- executable_path = get_executable_path(self.executable_path)
135
- name = Path(executable_path).name
136
- command = [executable_path, f'--remote-debugging-port={port}', '--start-maximized']
137
- if self.devtools:
138
- command.append('--auto-open-devtools-for-tabs')
139
- if self.user_data_dir:
140
- command.append(f'--user-data-dir={self.user_data_dir}')
141
- else:
142
- logger.warning('Chrome必须另行指定`--user-data-dir`才能创建CDP连接')
143
-
144
- for i in range(2):
145
- try:
146
- self.browser = await self.playwright.chromium.connect_over_cdp(self.endpoint,
147
- timeout=10000, slow_mo=1000)
148
- break
149
- except:
150
- if i == 0:
151
- create_detached_process(command)
152
- time.sleep(5)
153
- continue
154
- if i == 1:
155
- raise ConnectionError(
156
- f"已提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试 `taskkill /f /im {name}`")
157
-
158
- async def _connect_to_remote(self) -> None:
159
- """连接远程浏览器"""
160
- try:
161
- if is_cdp_url(self.endpoint):
162
- self.browser = await self.playwright.chromium.connect_over_cdp(self.endpoint,
163
- timeout=10000, slow_mo=1000)
164
- else:
165
- self.browser = await self.playwright.chromium.connect(self.endpoint,
166
- timeout=10000, slow_mo=1000)
167
- except:
168
- raise ConnectionError(f"连接远程浏览器失败,请检查CDP/WS地址和端口是否正确。{self.endpoint}")
169
-
170
- async def _connect_to_launch(self) -> None:
171
- logger.info("executable_path={}", self.executable_path)
172
- if self.user_data_dir:
173
- logger.info("user_data_dir={}", self.user_data_dir)
174
- try:
175
- self.context = await self.playwright.chromium.launch_persistent_context(
176
- user_data_dir=self.user_data_dir,
177
- executable_path=self.executable_path,
178
- headless=self.headless,
179
- devtools=self.devtools,
180
- timeout=10000, slow_mo=1000)
181
- except:
182
- raise ConnectionError(f"launch失败,可能已经有浏览器已经打开了数据目录。{self.user_data_dir}")
183
- else:
184
- logger.warning("未指定浏览器用户数据目录,部分需要的网站可能无法使用")
185
- self.browser = await self.playwright.chromium.launch(
186
- executable_path=self.executable_path,
187
- headless=self.headless,
188
- devtools=self.devtools)
189
-
190
- async def _launch(self) -> None:
191
- """启动浏览器,并连接CDP协议
192
-
193
- References
194
- ----------
195
- https://blog.csdn.net/qq_30576521/article/details/142370538
196
-
197
- """
198
- self.playwright = await async_playwright().start()
199
- if self.endpoint is None:
200
- await self._connect_to_launch()
201
- elif is_local_url(self.endpoint) and is_cdp_url(self.endpoint):
202
- await self._connect_to_local()
203
- else:
204
- await self._connect_to_remote()
205
-
206
- if self.browser is None:
207
- pass
208
- elif len(self.browser.contexts) == 0:
209
- self.context = await self.browser.new_context()
210
- else:
211
- self.context = self.browser.contexts[0]
212
- # 爱问财,无头模式,需要使用 stealth 插件
213
- await Stealth().apply_stealth_async(self.context)
214
-
215
- # 复用打开的page
216
- for page in self.context.pages:
217
- # 防止开发者工具被使用
218
- if page.url.startswith("devtools://"):
219
- continue
220
- # 防止chrome扩展被使用
221
- if page.url.startswith("chrome-extension://"):
222
- continue
223
- # 防止edge扩展被使用
224
- if page.url.startswith("extension://"):
225
- continue
226
- self.pages.append(page)
227
-
228
- async def get_page(self) -> Page:
229
- """获取可用Page。无空闲标签时会打开新标签"""
230
- if self.context is None:
231
- await self._launch()
232
-
233
- # 反复取第一个tab
234
- while len(self.pages) > 0:
235
- page = self.pages.pop()
236
- if page.is_closed():
237
- continue
238
- return page
239
-
240
- # 不够,新建一个
241
- return await self.context.new_page()
242
-
243
- def release_page(self, page) -> None:
244
- """用完的Page释放到池中。如果用完不放回,get_page会一直打开新标签"""
245
- if page.is_closed():
246
- return
247
- # 放回
248
- self.pages.append(page)
249
-
250
-
251
9
  async def query(
252
10
  page: Page,
253
11
  query_input: str = "收盘价>100元",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp_query_table
3
- Version: 0.3.11
3
+ Version: 0.3.13
4
4
  Summary: query table from website, support MCP
5
5
  Author-email: wukan <wu-kan@163.com>
6
6
  License: MIT License
@@ -29,11 +29,12 @@ Keywords: eastmoney,iwencai,mcp,playwright,table,tdx
29
29
  Classifier: Development Status :: 4 - Beta
30
30
  Classifier: Programming Language :: Python
31
31
  Requires-Python: >=3.10
32
+ Requires-Dist: fastmcp
32
33
  Requires-Dist: loguru
33
- Requires-Dist: mcp
34
34
  Requires-Dist: pandas
35
35
  Requires-Dist: playwright
36
36
  Requires-Dist: playwright-stealth>=2.0.0
37
+ Requires-Dist: psutil
37
38
  Requires-Dist: tabulate
38
39
  Description-Content-Type: text/markdown
39
40
 
@@ -1,9 +1,10 @@
1
- mcp_query_table/__init__.py,sha256=K-0DU2hpeRvM9ZAuky0aWZtJLuOgKg7ZRd-pL9noc0o,330
2
- mcp_query_table/__main__.py,sha256=Hl70DkzAY1wJNnrirjiMRXHhemptXXy_1Q3sASMTWSk,1472
3
- mcp_query_table/_version.py,sha256=TESjMH0a_iUkwdfWT4nyzKizSFmmCY2omxnS2XyT97Y,23
1
+ mcp_query_table/__init__.py,sha256=u325hAlABfQJZZerqmEGQxr5b4r1KJzx0BaP1I0vieY,314
2
+ mcp_query_table/__main__.py,sha256=8I1HC8EJ_JjHUZ2r3dlBt-bs9Sz5NIPT9mnpheiypEg,1532
3
+ mcp_query_table/_version.py,sha256=jXmhGysidmiPOxLAzcyzQjjT98GxaQk2jHMECTsHr04,23
4
4
  mcp_query_table/enums.py,sha256=7bu0m0zJBIfiS-eHGURw1ZHWNXgsq6gH1SztUhCgF-Y,678
5
- mcp_query_table/server.py,sha256=re3UnNAe75IJAH8oyJZAzYAzp_rg3uH1vIroKJ7k69w,3871
6
- mcp_query_table/tool.py,sha256=3wUZfvfA_4fNmc5sWi60gdGuqT5y7ZPKt-Nibw9NzY4,11737
5
+ mcp_query_table/playwright_helper.py,sha256=piFOHQ0cPTw3DrpmzdjaRZgr9r_bymYpaetJCWByjho,12151
6
+ mcp_query_table/server.py,sha256=nYHFvF74w0NtThfiCrcxO0tbDvWNn3KPTYJ8RoMGS1M,3710
7
+ mcp_query_table/tool.py,sha256=FY9LvCAbkYOhELHwr0WWTZzvf5HgrrmscST6EieV2iQ,2795
7
8
  mcp_query_table/utils.py,sha256=MUKcklPF9TkABhM8wN0-kW0iy9AlmjL6oycZyxB_Qk8,722
8
9
  mcp_query_table/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
10
  mcp_query_table/providers/baidu.py,sha256=S75D2zbpqG1r4Rxz7pJf5u2ZHNLO8nqV-LPEjlACtHg,3390
@@ -13,7 +14,7 @@ mcp_query_table/sites/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
13
14
  mcp_query_table/sites/eastmoney.py,sha256=wuM1rJkuQKrtL0a6ZyslMohPtucSgrs_jjaZWdkxhZo,4593
14
15
  mcp_query_table/sites/iwencai.py,sha256=oCxNuGxiYcaMREHGpNoqspmCLlXSkvTFw2_EsmJSzlw,5174
15
16
  mcp_query_table/sites/tdx.py,sha256=-u-rzhmYPW1m3zUFQsd-RUztafC43gQPhtmB6OuqA4M,4184
16
- mcp_query_table-0.3.11.dist-info/METADATA,sha256=s7yPOuqh4HbNpdz5CjRo5INmcSGwxm0neIr0XrM2quA,9830
17
- mcp_query_table-0.3.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- mcp_query_table-0.3.11.dist-info/licenses/LICENSE,sha256=rbvv_CTd7biGwT21tvhgQ2zkbPFXOoON7WFQWEdElBA,1063
19
- mcp_query_table-0.3.11.dist-info/RECORD,,
17
+ mcp_query_table-0.3.13.dist-info/METADATA,sha256=gwelESHlSA4OD4Qc0rOi4Cthozw-rqvSwcuzUB2N-IQ,9856
18
+ mcp_query_table-0.3.13.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
19
+ mcp_query_table-0.3.13.dist-info/licenses/LICENSE,sha256=rbvv_CTd7biGwT21tvhgQ2zkbPFXOoON7WFQWEdElBA,1063
20
+ mcp_query_table-0.3.13.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any