mcp-query-table 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_query_table/__init__.py +4 -0
- mcp_query_table/__main__.py +9 -5
- mcp_query_table/_version.py +1 -1
- mcp_query_table/enums.py +1 -1
- mcp_query_table/providers/baidu.py +1 -1
- mcp_query_table/providers/n.py +5 -1
- mcp_query_table/providers/yuanbao.py +4 -1
- mcp_query_table/server.py +16 -8
- mcp_query_table/sites/iwencai.py +5 -2
- mcp_query_table/tool.py +70 -54
- mcp_query_table/utils.py +51 -0
- {mcp_query_table-0.3.6.dist-info → mcp_query_table-0.3.8.dist-info}/METADATA +24 -22
- mcp_query_table-0.3.8.dist-info/RECORD +19 -0
- {mcp_query_table-0.3.6.dist-info → mcp_query_table-0.3.8.dist-info}/WHEEL +1 -2
- mcp_query_table-0.3.6.dist-info/RECORD +0 -19
- mcp_query_table-0.3.6.dist-info/top_level.txt +0 -1
- {mcp_query_table-0.3.6.dist-info → mcp_query_table-0.3.8.dist-info}/licenses/LICENSE +0 -0
mcp_query_table/__init__.py
CHANGED
mcp_query_table/__main__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import getpass
|
|
2
|
+
|
|
1
3
|
from mcp_query_table.server import serve
|
|
2
4
|
|
|
3
5
|
|
|
@@ -11,10 +13,11 @@ def main():
|
|
|
11
13
|
parser.add_argument("--format", type=str, help="输出格式",
|
|
12
14
|
default='markdown', choices=['markdown', 'csv', 'json'])
|
|
13
15
|
parser.add_argument("--endpoint", type=str, help="浏览器CDP地址/WS地址",
|
|
14
|
-
default=
|
|
15
|
-
parser.add_argument("--executable_path", type=str, help="
|
|
16
|
-
default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
|
|
17
|
-
|
|
16
|
+
nargs="?", default=r'http://127.0.0.1:9222')
|
|
17
|
+
parser.add_argument("--executable_path", type=str, help="浏览器路径",
|
|
18
|
+
nargs="?", default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
|
|
19
|
+
parser.add_argument("--user_data_dir", type=str, help="浏览器用户数据目录",
|
|
20
|
+
nargs="?", default=rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data')
|
|
18
21
|
parser.add_argument("--transport", type=str, help="传输类型",
|
|
19
22
|
default='stdio', choices=['stdio', 'sse'])
|
|
20
23
|
parser.add_argument("--host", type=str, help="MCP服务端绑定地址",
|
|
@@ -22,7 +25,8 @@ def main():
|
|
|
22
25
|
parser.add_argument("--port", type=int, help="MCP服务端绑定端口",
|
|
23
26
|
default='8000')
|
|
24
27
|
args = parser.parse_args()
|
|
25
|
-
serve(args.format, args.endpoint,
|
|
28
|
+
serve(args.format, args.endpoint,
|
|
29
|
+
args.executable_path, args.user_data_dir,
|
|
26
30
|
args.transport, args.host, args.port)
|
|
27
31
|
|
|
28
32
|
|
mcp_query_table/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.8"
|
mcp_query_table/enums.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
|
8
8
|
from playwright.async_api import Page
|
|
9
9
|
|
|
10
10
|
import mcp_query_table
|
|
11
|
-
from mcp_query_table.
|
|
11
|
+
from mcp_query_table.utils import split_images, GlobalVars
|
|
12
12
|
|
|
13
13
|
_PAGE0_ = "https://chat.baidu.com/search"
|
|
14
14
|
_PAGE1_ = "https://chat.baidu.com/aichat/api/conversation"
|
mcp_query_table/providers/n.py
CHANGED
|
@@ -3,10 +3,11 @@
|
|
|
3
3
|
"""
|
|
4
4
|
import json
|
|
5
5
|
|
|
6
|
+
from loguru import logger
|
|
6
7
|
from playwright.async_api import Page
|
|
7
8
|
|
|
8
9
|
import mcp_query_table
|
|
9
|
-
from mcp_query_table.
|
|
10
|
+
from mcp_query_table.utils import is_image, GlobalVars
|
|
10
11
|
|
|
11
12
|
_PAGE0_ = "https://www.n.cn"
|
|
12
13
|
_PAGE1_ = "https://www.n.cn/search"
|
|
@@ -78,6 +79,8 @@ async def chat(page: Page,
|
|
|
78
79
|
str
|
|
79
80
|
回答
|
|
80
81
|
"""
|
|
82
|
+
logger.warning("纳米搜索。不登录可以使用。但无头模式要指定`user_data_dir`才能正常工作")
|
|
83
|
+
|
|
81
84
|
if not create:
|
|
82
85
|
if not page.url.startswith(_PAGE1_):
|
|
83
86
|
create = True
|
|
@@ -102,6 +105,7 @@ async def chat(page: Page,
|
|
|
102
105
|
textbox = page.get_by_role("textbox", name=name)
|
|
103
106
|
await textbox.fill(prompt)
|
|
104
107
|
await textbox.press("Enter")
|
|
108
|
+
# await page.screenshot(path="n.png")
|
|
105
109
|
await on_response(await response_info.value)
|
|
106
110
|
|
|
107
111
|
return G.get_text()
|
|
@@ -4,10 +4,11 @@
|
|
|
4
4
|
import json
|
|
5
5
|
import re
|
|
6
6
|
|
|
7
|
+
from loguru import logger
|
|
7
8
|
from playwright.async_api import Page
|
|
8
9
|
|
|
9
10
|
import mcp_query_table
|
|
10
|
-
from mcp_query_table.
|
|
11
|
+
from mcp_query_table.utils import split_images, GlobalVars
|
|
11
12
|
|
|
12
13
|
_PAGE0_ = "https://yuanbao.tencent.com/"
|
|
13
14
|
_PAGE1_ = "https://yuanbao.tencent.com/api/chat"
|
|
@@ -69,6 +70,8 @@ async def chat(page: Page,
|
|
|
69
70
|
create: bool,
|
|
70
71
|
files: list[str]
|
|
71
72
|
) -> str:
|
|
73
|
+
logger.info("腾讯元宝。登录才可以使用。无头模式时要指定`user_data_dir`才能正常工作")
|
|
74
|
+
|
|
72
75
|
if not page.url.startswith(_PAGE0_):
|
|
73
76
|
create = True
|
|
74
77
|
|
mcp_query_table/server.py
CHANGED
|
@@ -14,9 +14,13 @@ class QueryServer:
|
|
|
14
14
|
self.format: str = "markdown"
|
|
15
15
|
self.browser = None
|
|
16
16
|
|
|
17
|
-
def start(self, format, endpoint, executable_path):
|
|
17
|
+
def start(self, format, endpoint, executable_path, user_data_dir):
|
|
18
18
|
self.format: str = format
|
|
19
|
-
self.browser = BrowserManager(endpoint=endpoint,
|
|
19
|
+
self.browser = BrowserManager(endpoint=endpoint,
|
|
20
|
+
executable_path=executable_path,
|
|
21
|
+
user_data_dir=user_data_dir,
|
|
22
|
+
devtools=False,
|
|
23
|
+
headless=True)
|
|
20
24
|
|
|
21
25
|
async def query(self, query_input: str, query_type: QueryType, max_page: int, site: Site):
|
|
22
26
|
page = await self.browser.get_page()
|
|
@@ -54,7 +58,8 @@ async def query(
|
|
|
54
58
|
return await qsv.query(query_input, query_type, max_page, site)
|
|
55
59
|
|
|
56
60
|
|
|
57
|
-
|
|
61
|
+
# chat功能不通过mcp暴露,因为在Cline等客户端中本就有LLM功能,反而导致返回的数据没有正确提交
|
|
62
|
+
# @mcp.tool(description="大语言模型对话")
|
|
58
63
|
async def chat(
|
|
59
64
|
prompt: Annotated[str, Field(description="提示词。如:`9.9大还是9.11大?`")],
|
|
60
65
|
create: Annotated[bool, Field(default=False, description="是否创建新对话")],
|
|
@@ -65,12 +70,15 @@ async def chat(
|
|
|
65
70
|
return await qsv.chat(prompt, create, files, provider)
|
|
66
71
|
|
|
67
72
|
|
|
68
|
-
def serve(format, endpoint, executable_path, transport, host, port):
|
|
69
|
-
qsv.start(format, endpoint, executable_path)
|
|
70
|
-
logger.info(f"{
|
|
71
|
-
logger.info(f"{
|
|
73
|
+
def serve(format, endpoint, executable_path, user_data_dir, transport, host, port):
|
|
74
|
+
qsv.start(format, endpoint, executable_path, user_data_dir)
|
|
75
|
+
logger.info(f"{endpoint=}")
|
|
76
|
+
logger.info(f"{executable_path=}")
|
|
77
|
+
logger.info(f"{user_data_dir=}")
|
|
72
78
|
if transport == 'sse':
|
|
73
|
-
logger.info(f"{
|
|
79
|
+
logger.info(f"{transport=},{format=},{host=},{port=}")
|
|
80
|
+
else:
|
|
81
|
+
logger.info(f"{transport=},{format=}")
|
|
74
82
|
|
|
75
83
|
mcp.settings.host = host
|
|
76
84
|
mcp.settings.port = port
|
mcp_query_table/sites/iwencai.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
同花顺问财
|
|
3
3
|
https://www.iwencai.com/
|
|
4
4
|
|
|
5
5
|
1. 一定要保证浏览器宽度>768,防止界面变成适应手机
|
|
@@ -10,8 +10,10 @@ import re
|
|
|
10
10
|
import pandas as pd
|
|
11
11
|
from loguru import logger
|
|
12
12
|
from playwright.async_api import Page
|
|
13
|
+
from playwright_stealth import stealth_async
|
|
13
14
|
|
|
14
15
|
from mcp_query_table.enums import QueryType
|
|
16
|
+
from mcp_query_table.utils import FixedConfig
|
|
15
17
|
|
|
16
18
|
# 初次查询页面
|
|
17
19
|
_PAGE1_ = 'https://www.iwencai.com/customized/chart/get-robot-data'
|
|
@@ -140,7 +142,6 @@ json_data['answer']['components'][0]['data']['meta']['extra']['row_count']
|
|
|
140
142
|
|
|
141
143
|
async def on_response(response):
|
|
142
144
|
if response.url == _PAGE1_:
|
|
143
|
-
# TODO 不支持headless模式,需要以后解决
|
|
144
145
|
P.update(*get_robot_data(await response.json()))
|
|
145
146
|
if response.url == _PAGE2_:
|
|
146
147
|
P.update(*getDataList(await response.json()))
|
|
@@ -153,6 +154,8 @@ async def query(page: Page,
|
|
|
153
154
|
querytype = _querytype_.get(type_, None)
|
|
154
155
|
assert querytype is not None, f"不支持的类型:{type_}"
|
|
155
156
|
|
|
157
|
+
await stealth_async(page, FixedConfig())
|
|
158
|
+
|
|
156
159
|
await page.route(re.compile(r'.*\.(?:jpg|jpeg|png|gif|webp)(?:$|\?)'), lambda route: route.abort())
|
|
157
160
|
|
|
158
161
|
P.reset()
|
mcp_query_table/tool.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import getpass
|
|
1
2
|
import subprocess
|
|
2
3
|
import sys
|
|
3
4
|
import time
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Optional
|
|
6
|
+
from typing import Optional
|
|
6
7
|
from urllib.parse import urlparse
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
@@ -26,6 +27,7 @@ def create_detached_process(command):
|
|
|
26
27
|
kwargs.update({
|
|
27
28
|
'start_new_session': True # 创建新的会话
|
|
28
29
|
})
|
|
30
|
+
logger.info(f"Popen: {command}")
|
|
29
31
|
return subprocess.Popen(command, **kwargs)
|
|
30
32
|
|
|
31
33
|
|
|
@@ -59,6 +61,21 @@ def get_executable_path(executable_path) -> Optional[str]:
|
|
|
59
61
|
return None
|
|
60
62
|
|
|
61
63
|
|
|
64
|
+
def get_user_data_dir(user_data_dir) -> Optional[str]:
|
|
65
|
+
"""获取浏览器可用户目录"""
|
|
66
|
+
browsers = {
|
|
67
|
+
"default": user_data_dir,
|
|
68
|
+
"chrome.exe": rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data\Default',
|
|
69
|
+
"msedge.exe": rf"C:\Users\{getpass.getuser()}\AppData\Local\Microsoft\Edge\User Data\Default",
|
|
70
|
+
}
|
|
71
|
+
for k, v in browsers.items():
|
|
72
|
+
if v is None:
|
|
73
|
+
continue
|
|
74
|
+
if Path(v).exists():
|
|
75
|
+
return v
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
62
79
|
class BrowserManager:
|
|
63
80
|
async def __aenter__(self):
|
|
64
81
|
return self
|
|
@@ -67,24 +84,36 @@ class BrowserManager:
|
|
|
67
84
|
await self.cleanup()
|
|
68
85
|
|
|
69
86
|
def __init__(self,
|
|
70
|
-
endpoint: Optional[str]
|
|
87
|
+
endpoint: Optional[str],
|
|
71
88
|
executable_path: Optional[str] = None,
|
|
72
|
-
|
|
89
|
+
devtools: bool = False,
|
|
90
|
+
headless: bool = True,
|
|
91
|
+
user_data_dir: Optional[str] = None):
|
|
73
92
|
"""
|
|
74
93
|
|
|
75
94
|
Parameters
|
|
76
95
|
----------
|
|
77
|
-
endpoint:str
|
|
78
|
-
浏览器CDP地址/WS
|
|
96
|
+
endpoint:str or None
|
|
97
|
+
浏览器CDP地址/WS地址。
|
|
98
|
+
如果为None,则直接启动浏览器实例。可用无头模式。建议指定用户数据目录,否则可能无法使用某些需要登录的网站
|
|
79
99
|
executable_path:str
|
|
80
100
|
浏览器可执行文件路径。推荐使用chrome,因为Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
|
|
81
|
-
|
|
101
|
+
devtools:bool
|
|
82
102
|
是否显示开发者工具
|
|
103
|
+
headless:bool
|
|
104
|
+
是否无头模式启动浏览器
|
|
105
|
+
user_data_dir:str
|
|
106
|
+
浏览器用户数据目录。无头模式。强烈建议指定用户数据目录,否则可能无法使用某些需要登录的网站
|
|
83
107
|
|
|
84
108
|
"""
|
|
85
|
-
|
|
109
|
+
if devtools:
|
|
110
|
+
headless = False
|
|
111
|
+
|
|
112
|
+
self.endpoint = endpoint
|
|
86
113
|
self.executable_path = executable_path
|
|
87
|
-
self.
|
|
114
|
+
self.devtools = devtools
|
|
115
|
+
self.headless = headless
|
|
116
|
+
self.user_data_dir = user_data_dir
|
|
88
117
|
|
|
89
118
|
self.playwright: Optional[Playwright] = None
|
|
90
119
|
self.browser = None
|
|
@@ -102,8 +131,9 @@ class BrowserManager:
|
|
|
102
131
|
"""连接本地浏览器"""
|
|
103
132
|
port = urlparse(self.endpoint).port
|
|
104
133
|
executable_path = get_executable_path(self.executable_path)
|
|
134
|
+
name = Path(executable_path).name
|
|
105
135
|
command = [executable_path, f'--remote-debugging-port={port}', '--start-maximized']
|
|
106
|
-
if self.
|
|
136
|
+
if self.devtools:
|
|
107
137
|
command.append('--auto-open-devtools-for-tabs')
|
|
108
138
|
|
|
109
139
|
for i in range(2):
|
|
@@ -113,13 +143,12 @@ class BrowserManager:
|
|
|
113
143
|
break
|
|
114
144
|
except:
|
|
115
145
|
if i == 0:
|
|
116
|
-
logger.info(f"start browser:{command}")
|
|
117
146
|
create_detached_process(command)
|
|
118
|
-
time.sleep(
|
|
147
|
+
time.sleep(5)
|
|
119
148
|
continue
|
|
120
149
|
if i == 1:
|
|
121
150
|
raise ConnectionError(
|
|
122
|
-
f"已提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试 `taskkill /f /im {
|
|
151
|
+
f"已提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试 `taskkill /f /im {name}`")
|
|
123
152
|
|
|
124
153
|
async def _connect_to_remote(self) -> None:
|
|
125
154
|
"""连接远程浏览器"""
|
|
@@ -133,6 +162,26 @@ class BrowserManager:
|
|
|
133
162
|
except:
|
|
134
163
|
raise ConnectionError(f"连接远程浏览器失败,请检查CDP/WS地址和端口是否正确。{self.endpoint}")
|
|
135
164
|
|
|
165
|
+
async def _connect_to_launch(self) -> None:
|
|
166
|
+
logger.info("executable_path={}", self.executable_path)
|
|
167
|
+
if self.user_data_dir:
|
|
168
|
+
logger.info("user_data_dir={}", self.user_data_dir)
|
|
169
|
+
try:
|
|
170
|
+
self.context = await self.playwright.chromium.launch_persistent_context(
|
|
171
|
+
user_data_dir=self.user_data_dir,
|
|
172
|
+
executable_path=self.executable_path,
|
|
173
|
+
headless=self.headless,
|
|
174
|
+
devtools=self.devtools,
|
|
175
|
+
timeout=10000, slow_mo=1000)
|
|
176
|
+
except:
|
|
177
|
+
raise ConnectionError(f"launch失败,可能已经有浏览器已经打开了数据目录。{self.user_data_dir}")
|
|
178
|
+
else:
|
|
179
|
+
logger.warning("未指定浏览器用户数据目录,部分需要的网站可能无法使用")
|
|
180
|
+
self.browser = await self.playwright.chromium.launch(
|
|
181
|
+
executable_path=self.executable_path,
|
|
182
|
+
headless=self.headless,
|
|
183
|
+
devtools=self.devtools)
|
|
184
|
+
|
|
136
185
|
async def _launch(self) -> None:
|
|
137
186
|
"""启动浏览器,并连接CDP协议
|
|
138
187
|
|
|
@@ -142,16 +191,20 @@ class BrowserManager:
|
|
|
142
191
|
|
|
143
192
|
"""
|
|
144
193
|
self.playwright = await async_playwright().start()
|
|
145
|
-
|
|
146
|
-
|
|
194
|
+
if self.endpoint is None:
|
|
195
|
+
await self._connect_to_launch()
|
|
196
|
+
elif is_local_url(self.endpoint) and is_cdp_url(self.endpoint):
|
|
147
197
|
await self._connect_to_local()
|
|
148
198
|
else:
|
|
149
199
|
await self._connect_to_remote()
|
|
150
200
|
|
|
151
|
-
if
|
|
201
|
+
if self.browser is None:
|
|
202
|
+
pass
|
|
203
|
+
elif len(self.browser.contexts) == 0:
|
|
152
204
|
self.context = await self.browser.new_context()
|
|
153
205
|
else:
|
|
154
206
|
self.context = self.browser.contexts[0]
|
|
207
|
+
|
|
155
208
|
# 复用打开的page
|
|
156
209
|
for page in self.context.pages:
|
|
157
210
|
# 防止开发者工具被使用
|
|
@@ -165,15 +218,10 @@ class BrowserManager:
|
|
|
165
218
|
continue
|
|
166
219
|
self.pages.append(page)
|
|
167
220
|
|
|
168
|
-
async def _try_launch(self) -> None:
|
|
169
|
-
if self.browser is None:
|
|
170
|
-
await self._launch()
|
|
171
|
-
if not self.browser.is_connected():
|
|
172
|
-
await self._launch()
|
|
173
|
-
|
|
174
221
|
async def get_page(self) -> Page:
|
|
175
222
|
"""获取可用Page。无空闲标签时会打开新标签"""
|
|
176
|
-
|
|
223
|
+
if self.context is None:
|
|
224
|
+
await self._launch()
|
|
177
225
|
|
|
178
226
|
# 反复取第一个tab
|
|
179
227
|
while len(self.pages) > 0:
|
|
@@ -193,19 +241,6 @@ class BrowserManager:
|
|
|
193
241
|
self.pages.append(page)
|
|
194
242
|
|
|
195
243
|
|
|
196
|
-
class GlobalVars:
|
|
197
|
-
"""全局变量"""
|
|
198
|
-
|
|
199
|
-
def __init__(self):
|
|
200
|
-
self.text = ""
|
|
201
|
-
|
|
202
|
-
def set_text(self, text):
|
|
203
|
-
self.text = text
|
|
204
|
-
|
|
205
|
-
def get_text(self):
|
|
206
|
-
return self.text
|
|
207
|
-
|
|
208
|
-
|
|
209
244
|
async def query(
|
|
210
245
|
page: Page,
|
|
211
246
|
query_input: str = "收盘价>100元",
|
|
@@ -289,22 +324,3 @@ async def chat(
|
|
|
289
324
|
return await chat(page, prompt, create, files)
|
|
290
325
|
|
|
291
326
|
raise ValueError(f"未支持的提供商:{provider}")
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def is_image(path: str) -> bool:
|
|
295
|
-
"""判断是否是图片文件"""
|
|
296
|
-
img_ext = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
|
|
297
|
-
ext = Path(path).suffix.lower()
|
|
298
|
-
return ext in img_ext
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
def split_images(files: List[str]) -> Tuple[List[str], List[str]]:
|
|
302
|
-
"""图片列表分成两部分"""
|
|
303
|
-
imgs = []
|
|
304
|
-
docs = []
|
|
305
|
-
for f in files:
|
|
306
|
-
if is_image(f):
|
|
307
|
-
imgs.append(f)
|
|
308
|
-
else:
|
|
309
|
-
docs.append(f)
|
|
310
|
-
return imgs, docs
|
mcp_query_table/utils.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import string
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Tuple
|
|
5
|
+
|
|
6
|
+
from playwright_stealth import StealthConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def is_image(path: str) -> bool:
|
|
10
|
+
"""判断是否是图片文件"""
|
|
11
|
+
img_ext = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
|
|
12
|
+
ext = Path(path).suffix.lower()
|
|
13
|
+
return ext in img_ext
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def split_images(files: List[str]) -> Tuple[List[str], List[str]]:
|
|
17
|
+
"""图片列表分成两部分"""
|
|
18
|
+
imgs = []
|
|
19
|
+
docs = []
|
|
20
|
+
for f in files:
|
|
21
|
+
if is_image(f):
|
|
22
|
+
imgs.append(f)
|
|
23
|
+
else:
|
|
24
|
+
docs.append(f)
|
|
25
|
+
return imgs, docs
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class GlobalVars:
|
|
29
|
+
"""全局变量"""
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
self.text = ""
|
|
33
|
+
|
|
34
|
+
def set_text(self, text):
|
|
35
|
+
self.text = text
|
|
36
|
+
|
|
37
|
+
def get_text(self):
|
|
38
|
+
return self.text
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# https://github.com/AtuboDad/playwright_stealth/issues/31#issuecomment-2342541305
|
|
42
|
+
class FixedConfig(StealthConfig):
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def enabled_scripts(self):
|
|
46
|
+
key = "".join(random.choices(string.ascii_letters, k=10))
|
|
47
|
+
for script in super().enabled_scripts:
|
|
48
|
+
if "const opts" in script:
|
|
49
|
+
yield script.replace("const opts", f"window.{key}")
|
|
50
|
+
continue
|
|
51
|
+
yield script.replace("opts", f"window.{key}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp_query_table
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.8
|
|
4
4
|
Summary: query table from website, support MCP
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: MIT License
|
|
@@ -24,24 +24,25 @@ License: MIT License
|
|
|
24
24
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
26
|
SOFTWARE.
|
|
27
|
-
|
|
28
|
-
Keywords:
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Keywords: eastmoney,iwencai,mcp,playwright,table,tdx
|
|
29
29
|
Classifier: Development Status :: 4 - Beta
|
|
30
30
|
Classifier: Programming Language :: Python
|
|
31
31
|
Requires-Python: >=3.10
|
|
32
|
-
Description-Content-Type: text/markdown
|
|
33
|
-
License-File: LICENSE
|
|
34
|
-
Requires-Dist: pandas
|
|
35
32
|
Requires-Dist: loguru
|
|
36
|
-
Requires-Dist: playwright
|
|
37
33
|
Requires-Dist: mcp
|
|
38
|
-
|
|
34
|
+
Requires-Dist: pandas
|
|
35
|
+
Requires-Dist: playwright
|
|
36
|
+
Requires-Dist: playwright-stealth
|
|
37
|
+
Requires-Dist: setuptools
|
|
38
|
+
Requires-Dist: tabulate
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
39
40
|
|
|
40
41
|
# mcp_query_table
|
|
41
42
|
|
|
42
43
|
1. 基于`playwright`实现的财经网页表格爬虫,支持`Model Context Protocol (MCP) `。目前可查询来源为
|
|
43
44
|
|
|
44
|
-
- [
|
|
45
|
+
- [同花顺问财](http://iwencai.com/)
|
|
45
46
|
- [通达信问小达](https://wenda.tdx.com.cn/)
|
|
46
47
|
- [东方财富条件选股](https://xuangu.eastmoney.com/)
|
|
47
48
|
|
|
@@ -70,7 +71,7 @@ from mcp_query_table import *
|
|
|
70
71
|
|
|
71
72
|
|
|
72
73
|
async def main() -> None:
|
|
73
|
-
async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None,
|
|
74
|
+
async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, devtools=True) as bm:
|
|
74
75
|
# 问财需要保证浏览器宽度>768,防止界面变成适应手机
|
|
75
76
|
page = await bm.get_page()
|
|
76
77
|
df = await query(page, '收益最好的200只ETF', query_type=QueryType.ETF, max_page=1, site=Site.THS)
|
|
@@ -128,19 +129,21 @@ if __name__ == '__main__':
|
|
|
128
129
|
|
|
129
130
|
后期会根据不同的网站改版情况,使用更适合的方法。
|
|
130
131
|
|
|
131
|
-
##
|
|
132
|
+
## 无头模式
|
|
132
133
|
|
|
133
|
-
|
|
134
|
+
无头模式运行速度更快,但部分网站需要提前登录,所以,无头模式一定要指定`user_data_dir`,否则会出现需要登录的情况。
|
|
134
135
|
|
|
135
|
-
|
|
136
|
-
|
|
136
|
+
- `endpoint=None`时,`headless=True`可无头启动新浏览器实例。指定`executable_path`和`user_data_dir`,才能确保无头模式下正常运行。
|
|
137
|
+
- `endpoint`以`http://`开头,连接`CDP`模式启动的有头浏览器,参数必有`--remote-debugging-port`。`executable_path`为本地浏览器路径。
|
|
138
|
+
- `endpoint`以`ws://`开头,连接远程`Playwright Server`。也是无头模式,但无法指定`user_data_dir`,所以使用受限
|
|
139
|
+
- 参考:https://playwright.dev/python/docs/docker#running-the-playwright-server
|
|
137
140
|
|
|
138
|
-
|
|
141
|
+
## MCP支持
|
|
139
142
|
|
|
140
|
-
-
|
|
141
|
-
- ws方式:服务器上`docker run -p 3000:3000 --rm --init -it --workdir /home/pwuser --user pwuser mcr.microsoft.com/playwright:v1.51.0-noble /bin/sh -c "npx -y playwright@1.51.0 run-server --port 3000 --host 0.0.0.0"`
|
|
143
|
+
确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
|
|
142
144
|
|
|
143
|
-
|
|
145
|
+
在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`timeout`是超时时间,单位为秒。 在各`AI`
|
|
146
|
+
平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
|
|
144
147
|
|
|
145
148
|
### STDIO方式
|
|
146
149
|
|
|
@@ -170,7 +173,7 @@ if __name__ == '__main__':
|
|
|
170
173
|
先在控制台中执行如下命令,启动`MCP`服务
|
|
171
174
|
|
|
172
175
|
```commandline
|
|
173
|
-
python -m mcp_query_table --format markdown --transport sse --port 8000
|
|
176
|
+
python -m mcp_query_table --format markdown --transport sse --port 8000 --endpoint http://127.0.0.1:9222
|
|
174
177
|
```
|
|
175
178
|
|
|
176
179
|
然后就可以连接到`MCP`服务了
|
|
@@ -189,7 +192,7 @@ python -m mcp_query_table --format markdown --transport sse --port 8000
|
|
|
189
192
|
## 使用`MCP Inspector`进行调试
|
|
190
193
|
|
|
191
194
|
```commandline
|
|
192
|
-
npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
|
|
195
|
+
npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown --endpoint http://127.0.0.1:9222
|
|
193
196
|
```
|
|
194
197
|
|
|
195
198
|
打开浏览器并翻页是一个比较耗时的操作,会导致`MCP Inspector`页面超时,可以`http://localhost:5173/?timeout=300000`
|
|
@@ -219,6 +222,5 @@ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
|
|
|
219
222
|

|
|
220
223
|
|
|
221
224
|
## 参考
|
|
222
|
-
|
|
223
|
-
- [Playwright](https://playwright.dev/python/docs/intro)
|
|
224
225
|
- [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
|
|
226
|
+
- https://github.com/AtuboDad/playwright_stealth/issues/31
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
mcp_query_table/__init__.py,sha256=K-0DU2hpeRvM9ZAuky0aWZtJLuOgKg7ZRd-pL9noc0o,330
|
|
2
|
+
mcp_query_table/__main__.py,sha256=oePptyDeLtOHcR0XZxx-O12hO6LSe6cplb4gaJBG4rI,1453
|
|
3
|
+
mcp_query_table/_version.py,sha256=7dTW0A5-FkrEuNOotvR8oW59M2lvIwYouVqfJzvXpKk,22
|
|
4
|
+
mcp_query_table/enums.py,sha256=7bu0m0zJBIfiS-eHGURw1ZHWNXgsq6gH1SztUhCgF-Y,678
|
|
5
|
+
mcp_query_table/server.py,sha256=D2-7ZmutijphasbLMosg9P5EOhJTB4RvC9-zmvCvc5k,3749
|
|
6
|
+
mcp_query_table/tool.py,sha256=PpsDFwaKQ9NsfqSMCvWm1p0MSyxT1Uwpve198TOP8FQ,11157
|
|
7
|
+
mcp_query_table/utils.py,sha256=VjKYLRPEa-W3qAOaNSJa9GnezrAAYykn4XY-R4P5NJg,1264
|
|
8
|
+
mcp_query_table/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
mcp_query_table/providers/baidu.py,sha256=S75D2zbpqG1r4Rxz7pJf5u2ZHNLO8nqV-LPEjlACtHg,3390
|
|
10
|
+
mcp_query_table/providers/n.py,sha256=SLalpwHSdkXNoMCLQEx1TEFlo50dS7I9JNli5jz8w6k,3202
|
|
11
|
+
mcp_query_table/providers/yuanbao.py,sha256=1wRMy7Z2JraM3MrgLDSdyg-EqX-D26ysx7CW1GFtVho,3292
|
|
12
|
+
mcp_query_table/sites/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
mcp_query_table/sites/eastmoney.py,sha256=LImjpYVuM5YnXwnNzB2hkKfHofocZZScetGqMOCHZpk,4477
|
|
14
|
+
mcp_query_table/sites/iwencai.py,sha256=43sBrVCXgiIybv25lEwTS6dlR2jXXceSBVGCOb03woE,5194
|
|
15
|
+
mcp_query_table/sites/tdx.py,sha256=P-GNFUsS5_INy3sicaZbUBdudgQBZuhu_QyVvyw4yDg,4126
|
|
16
|
+
mcp_query_table-0.3.8.dist-info/METADATA,sha256=4nnj_OrLYG0SMPlvSn9TpNI5IOx1DNsTfsWt22sVdqs,9372
|
|
17
|
+
mcp_query_table-0.3.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
18
|
+
mcp_query_table-0.3.8.dist-info/licenses/LICENSE,sha256=rbvv_CTd7biGwT21tvhgQ2zkbPFXOoON7WFQWEdElBA,1063
|
|
19
|
+
mcp_query_table-0.3.8.dist-info/RECORD,,
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
mcp_query_table/__init__.py,sha256=NmnAOKcJjQoeTJMbIY79-JsNNdj-PJpnZ-x8MPjpE4o,272
|
|
2
|
-
mcp_query_table/__main__.py,sha256=a_CiwS8y0R_8XDDfOeUMyIBmDr1rdAGBgVG8pfzoNJw,1181
|
|
3
|
-
mcp_query_table/_version.py,sha256=W_9dCm49nLvZulVAvvsafxLJjVBSKDBHz9K7szFZllo,22
|
|
4
|
-
mcp_query_table/enums.py,sha256=Hen0X1f2of69f08epun6HvYIgbpw_rf8BvoRQ184kS4,679
|
|
5
|
-
mcp_query_table/server.py,sha256=bAqS3At2T8TlnEUtgScFc0q-UapHsVe12n1k9uw9mVQ,3303
|
|
6
|
-
mcp_query_table/tool.py,sha256=i1Wrwj038V5EGPcYC-qzfOlY31IIUvz_mugzKgi0XkU,9681
|
|
7
|
-
mcp_query_table/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
mcp_query_table/providers/baidu.py,sha256=q0vZxl_UTO8v4LbznwT2b1hM-00jswDR8CqB3tc-X9U,3389
|
|
9
|
-
mcp_query_table/providers/n.py,sha256=J0Xo-BlXveIQdDbwKkuEP4I1mfAr8i1rviD0UTztI9Y,3009
|
|
10
|
-
mcp_query_table/providers/yuanbao.py,sha256=4DzoLfgg7dSP1BhiS_tKyTQ0byc1zHRuZQjyq-2pZfQ,3148
|
|
11
|
-
mcp_query_table/sites/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
mcp_query_table/sites/eastmoney.py,sha256=LImjpYVuM5YnXwnNzB2hkKfHofocZZScetGqMOCHZpk,4477
|
|
13
|
-
mcp_query_table/sites/iwencai.py,sha256=YDGiV9wMvWxmLM3dvm4ccZxBhDOW8PdaPm9toQN0ue4,5119
|
|
14
|
-
mcp_query_table/sites/tdx.py,sha256=P-GNFUsS5_INy3sicaZbUBdudgQBZuhu_QyVvyw4yDg,4126
|
|
15
|
-
mcp_query_table-0.3.6.dist-info/licenses/LICENSE,sha256=rbvv_CTd7biGwT21tvhgQ2zkbPFXOoON7WFQWEdElBA,1063
|
|
16
|
-
mcp_query_table-0.3.6.dist-info/METADATA,sha256=nubh5fDdQXsGu7E0wP_EOKNeDZ7uaYzlFV5byH8T0Vo,9040
|
|
17
|
-
mcp_query_table-0.3.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
18
|
-
mcp_query_table-0.3.6.dist-info/top_level.txt,sha256=5M_8dkO1USOX7_EWbWS6O_TEsZ5yo-AodFNKeUEgvEQ,16
|
|
19
|
-
mcp_query_table-0.3.6.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
mcp_query_table
|
|
File without changes
|