mcp-query-table 0.3.7__tar.gz → 0.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_query_table-0.3.9/.gitignore +174 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/PKG-INFO +9 -10
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/__main__.py +1 -1
- mcp_query_table-0.3.9/mcp_query_table/_version.py +1 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/server.py +2 -1
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/sites/iwencai.py +0 -4
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/tool.py +10 -5
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/utils.py +0 -17
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/pyproject.toml +13 -13
- mcp_query_table-0.3.7/mcp_query_table/_version.py +0 -1
- mcp_query_table-0.3.7/mcp_query_table.egg-info/PKG-INFO +0 -226
- mcp_query_table-0.3.7/mcp_query_table.egg-info/SOURCES.txt +0 -23
- mcp_query_table-0.3.7/mcp_query_table.egg-info/dependency_links.txt +0 -1
- mcp_query_table-0.3.7/mcp_query_table.egg-info/requires.txt +0 -5
- mcp_query_table-0.3.7/mcp_query_table.egg-info/top_level.txt +0 -1
- mcp_query_table-0.3.7/setup.cfg +0 -4
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/LICENSE +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/README.md +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/__init__.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/enums.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/providers/__init__.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/providers/baidu.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/providers/n.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/providers/yuanbao.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/sites/__init__.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/sites/eastmoney.py +0 -0
- {mcp_query_table-0.3.7 → mcp_query_table-0.3.9}/mcp_query_table/sites/tdx.py +0 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
|
|
110
|
+
# pdm
|
|
111
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
112
|
+
#pdm.lock
|
|
113
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
114
|
+
# in version control.
|
|
115
|
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
116
|
+
.pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
121
|
+
__pypackages__/
|
|
122
|
+
|
|
123
|
+
# Celery stuff
|
|
124
|
+
celerybeat-schedule
|
|
125
|
+
celerybeat.pid
|
|
126
|
+
|
|
127
|
+
# SageMath parsed files
|
|
128
|
+
*.sage.py
|
|
129
|
+
|
|
130
|
+
# Environments
|
|
131
|
+
.env
|
|
132
|
+
.venv
|
|
133
|
+
env/
|
|
134
|
+
venv/
|
|
135
|
+
ENV/
|
|
136
|
+
env.bak/
|
|
137
|
+
venv.bak/
|
|
138
|
+
|
|
139
|
+
# Spyder project settings
|
|
140
|
+
.spyderproject
|
|
141
|
+
.spyproject
|
|
142
|
+
|
|
143
|
+
# Rope project settings
|
|
144
|
+
.ropeproject
|
|
145
|
+
|
|
146
|
+
# mkdocs documentation
|
|
147
|
+
/site
|
|
148
|
+
|
|
149
|
+
# mypy
|
|
150
|
+
.mypy_cache/
|
|
151
|
+
.dmypy.json
|
|
152
|
+
dmypy.json
|
|
153
|
+
|
|
154
|
+
# Pyre type checker
|
|
155
|
+
.pyre/
|
|
156
|
+
|
|
157
|
+
# pytype static type analyzer
|
|
158
|
+
.pytype/
|
|
159
|
+
|
|
160
|
+
# Cython debug symbols
|
|
161
|
+
cython_debug/
|
|
162
|
+
|
|
163
|
+
# PyCharm
|
|
164
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
165
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
166
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
167
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
168
|
+
#.idea/
|
|
169
|
+
|
|
170
|
+
# Ruff stuff:
|
|
171
|
+
.ruff_cache/
|
|
172
|
+
|
|
173
|
+
# PyPI configuration file
|
|
174
|
+
.pypirc
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp_query_table
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: query table from website, support MCP
|
|
5
5
|
Author-email: wukan <wu-kan@163.com>
|
|
6
6
|
License: MIT License
|
|
@@ -24,19 +24,18 @@ License: MIT License
|
|
|
24
24
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
25
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
26
|
SOFTWARE.
|
|
27
|
-
|
|
28
|
-
Keywords:
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Keywords: eastmoney,iwencai,mcp,playwright,table,tdx
|
|
29
29
|
Classifier: Development Status :: 4 - Beta
|
|
30
30
|
Classifier: Programming Language :: Python
|
|
31
31
|
Requires-Python: >=3.10
|
|
32
|
-
Description-Content-Type: text/markdown
|
|
33
|
-
License-File: LICENSE
|
|
34
|
-
Requires-Dist: pandas
|
|
35
32
|
Requires-Dist: loguru
|
|
36
|
-
Requires-Dist: playwright
|
|
37
|
-
Requires-Dist: playwright-stealth
|
|
38
33
|
Requires-Dist: mcp
|
|
39
|
-
|
|
34
|
+
Requires-Dist: pandas
|
|
35
|
+
Requires-Dist: playwright
|
|
36
|
+
Requires-Dist: playwright-stealth>=2.0.0
|
|
37
|
+
Requires-Dist: tabulate
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
40
39
|
|
|
41
40
|
# mcp_query_table
|
|
42
41
|
|
|
@@ -223,4 +222,4 @@ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
|
|
|
223
222
|
|
|
224
223
|
## 参考
|
|
225
224
|
- [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
|
|
226
|
-
- https://github.com/AtuboDad/playwright_stealth/issues/31
|
|
225
|
+
- https://github.com/AtuboDad/playwright_stealth/issues/31
|
|
@@ -17,7 +17,7 @@ def main():
|
|
|
17
17
|
parser.add_argument("--executable_path", type=str, help="浏览器路径",
|
|
18
18
|
nargs="?", default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
|
|
19
19
|
parser.add_argument("--user_data_dir", type=str, help="浏览器用户数据目录",
|
|
20
|
-
nargs="?", default=rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data')
|
|
20
|
+
nargs="?", default=rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data\Default')
|
|
21
21
|
parser.add_argument("--transport", type=str, help="传输类型",
|
|
22
22
|
default='stdio', choices=['stdio', 'sse'])
|
|
23
23
|
parser.add_argument("--host", type=str, help="MCP服务端绑定地址",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.9"
|
|
@@ -58,7 +58,8 @@ async def query(
|
|
|
58
58
|
return await qsv.query(query_input, query_type, max_page, site)
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
|
|
61
|
+
# chat功能不通过mcp暴露,因为在Cline等客户端中本就有LLM功能,反而导致返回的数据没有正确提交
|
|
62
|
+
# @mcp.tool(description="大语言模型对话")
|
|
62
63
|
async def chat(
|
|
63
64
|
prompt: Annotated[str, Field(description="提示词。如:`9.9大还是9.11大?`")],
|
|
64
65
|
create: Annotated[bool, Field(default=False, description="是否创建新对话")],
|
|
@@ -10,10 +10,8 @@ import re
|
|
|
10
10
|
import pandas as pd
|
|
11
11
|
from loguru import logger
|
|
12
12
|
from playwright.async_api import Page
|
|
13
|
-
from playwright_stealth import stealth_async
|
|
14
13
|
|
|
15
14
|
from mcp_query_table.enums import QueryType
|
|
16
|
-
from mcp_query_table.utils import FixedConfig
|
|
17
15
|
|
|
18
16
|
# 初次查询页面
|
|
19
17
|
_PAGE1_ = 'https://www.iwencai.com/customized/chart/get-robot-data'
|
|
@@ -154,8 +152,6 @@ async def query(page: Page,
|
|
|
154
152
|
querytype = _querytype_.get(type_, None)
|
|
155
153
|
assert querytype is not None, f"不支持的类型:{type_}"
|
|
156
154
|
|
|
157
|
-
await stealth_async(page, FixedConfig())
|
|
158
|
-
|
|
159
155
|
await page.route(re.compile(r'.*\.(?:jpg|jpeg|png|gif|webp)(?:$|\?)'), lambda route: route.abort())
|
|
160
156
|
|
|
161
157
|
P.reset()
|
|
@@ -4,11 +4,12 @@ import sys
|
|
|
4
4
|
import time
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Optional
|
|
7
|
-
from urllib.parse import urlparse
|
|
7
|
+
from urllib.parse import urlparse, quote
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from loguru import logger
|
|
11
11
|
from playwright.async_api import async_playwright, Playwright, Page
|
|
12
|
+
from playwright_stealth import Stealth
|
|
12
13
|
|
|
13
14
|
from mcp_query_table.enums import QueryType, Site, Provider
|
|
14
15
|
|
|
@@ -65,8 +66,8 @@ def get_user_data_dir(user_data_dir) -> Optional[str]:
|
|
|
65
66
|
"""获取浏览器可用户目录"""
|
|
66
67
|
browsers = {
|
|
67
68
|
"default": user_data_dir,
|
|
68
|
-
"chrome.exe": rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data',
|
|
69
|
-
"msedge.exe": rf"C:\Users\{getpass.getuser()}\AppData\Local\Microsoft\Edge\User Data",
|
|
69
|
+
"chrome.exe": rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data\Default',
|
|
70
|
+
"msedge.exe": rf"C:\Users\{getpass.getuser()}\AppData\Local\Microsoft\Edge\User Data\Default",
|
|
70
71
|
}
|
|
71
72
|
for k, v in browsers.items():
|
|
72
73
|
if v is None:
|
|
@@ -144,7 +145,7 @@ class BrowserManager:
|
|
|
144
145
|
except:
|
|
145
146
|
if i == 0:
|
|
146
147
|
create_detached_process(command)
|
|
147
|
-
time.sleep(
|
|
148
|
+
time.sleep(5)
|
|
148
149
|
continue
|
|
149
150
|
if i == 1:
|
|
150
151
|
raise ConnectionError(
|
|
@@ -171,7 +172,8 @@ class BrowserManager:
|
|
|
171
172
|
user_data_dir=self.user_data_dir,
|
|
172
173
|
executable_path=self.executable_path,
|
|
173
174
|
headless=self.headless,
|
|
174
|
-
devtools=self.devtools
|
|
175
|
+
devtools=self.devtools,
|
|
176
|
+
timeout=10000, slow_mo=1000)
|
|
175
177
|
except:
|
|
176
178
|
raise ConnectionError(f"launch失败,可能已经有浏览器已经打开了数据目录。{self.user_data_dir}")
|
|
177
179
|
else:
|
|
@@ -203,6 +205,8 @@ class BrowserManager:
|
|
|
203
205
|
self.context = await self.browser.new_context()
|
|
204
206
|
else:
|
|
205
207
|
self.context = self.browser.contexts[0]
|
|
208
|
+
# 爱问财,无头模式,需要使用 stealth 插件
|
|
209
|
+
await Stealth().apply_stealth_async(self.context)
|
|
206
210
|
|
|
207
211
|
# 复用打开的page
|
|
208
212
|
for page in self.context.pages:
|
|
@@ -267,6 +271,7 @@ async def query(
|
|
|
267
271
|
查询结果
|
|
268
272
|
|
|
269
273
|
"""
|
|
274
|
+
query_input = quote(query_input.strip(), safe='')
|
|
270
275
|
|
|
271
276
|
if site == Site.EastMoney:
|
|
272
277
|
from mcp_query_table.sites.eastmoney import query
|
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
import random
|
|
2
|
-
import string
|
|
3
1
|
from pathlib import Path
|
|
4
2
|
from typing import List, Tuple
|
|
5
3
|
|
|
6
|
-
from playwright_stealth import StealthConfig
|
|
7
|
-
|
|
8
4
|
|
|
9
5
|
def is_image(path: str) -> bool:
|
|
10
6
|
"""判断是否是图片文件"""
|
|
@@ -36,16 +32,3 @@ class GlobalVars:
|
|
|
36
32
|
|
|
37
33
|
def get_text(self):
|
|
38
34
|
return self.text
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
# https://github.com/AtuboDad/playwright_stealth/issues/31#issuecomment-2342541305
|
|
42
|
-
class FixedConfig(StealthConfig):
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def enabled_scripts(self):
|
|
46
|
-
key = "".join(random.choices(string.ascii_letters, k=10))
|
|
47
|
-
for script in super().enabled_scripts:
|
|
48
|
-
if "const opts" in script:
|
|
49
|
-
yield script.replace("const opts", f"window.{key}")
|
|
50
|
-
continue
|
|
51
|
-
yield script.replace("opts", f"window.{key}")
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
1
|
[project]
|
|
6
2
|
name = "mcp_query_table"
|
|
7
3
|
authors = [
|
|
@@ -20,18 +16,22 @@ dependencies = [
|
|
|
20
16
|
"pandas",
|
|
21
17
|
"loguru",
|
|
22
18
|
"playwright",
|
|
23
|
-
"playwright-stealth",
|
|
19
|
+
"playwright-stealth>=2.0.0", # https://github.com/Mattwmaster58/playwright_stealth
|
|
24
20
|
"mcp",
|
|
21
|
+
"tabulate"
|
|
25
22
|
]
|
|
26
23
|
dynamic = ["version"]
|
|
27
24
|
|
|
25
|
+
[build-system]
|
|
26
|
+
requires = ["hatchling"]
|
|
27
|
+
build-backend = "hatchling.build"
|
|
28
28
|
|
|
29
|
-
[tool.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
29
|
+
[tool.hatch.version]
|
|
30
|
+
path = "mcp_query_table/_version.py"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = ["mcp_query_table"]
|
|
34
|
+
include-package-data = true
|
|
35
35
|
|
|
36
|
-
[tool.
|
|
37
|
-
|
|
36
|
+
[tool.hatch.build.targets.sdist]
|
|
37
|
+
include = ["mcp_query_table*"]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.3.7"
|
|
@@ -1,226 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: mcp_query_table
|
|
3
|
-
Version: 0.3.7
|
|
4
|
-
Summary: query table from website, support MCP
|
|
5
|
-
Author-email: wukan <wu-kan@163.com>
|
|
6
|
-
License: MIT License
|
|
7
|
-
|
|
8
|
-
Copyright (c) 2025 伍侃
|
|
9
|
-
|
|
10
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
-
in the Software without restriction, including without limitation the rights
|
|
13
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
-
furnished to do so, subject to the following conditions:
|
|
16
|
-
|
|
17
|
-
The above copyright notice and this permission notice shall be included in all
|
|
18
|
-
copies or substantial portions of the Software.
|
|
19
|
-
|
|
20
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
-
SOFTWARE.
|
|
27
|
-
|
|
28
|
-
Keywords: playwright,mcp,table,iwencai,tdx,eastmoney
|
|
29
|
-
Classifier: Development Status :: 4 - Beta
|
|
30
|
-
Classifier: Programming Language :: Python
|
|
31
|
-
Requires-Python: >=3.10
|
|
32
|
-
Description-Content-Type: text/markdown
|
|
33
|
-
License-File: LICENSE
|
|
34
|
-
Requires-Dist: pandas
|
|
35
|
-
Requires-Dist: loguru
|
|
36
|
-
Requires-Dist: playwright
|
|
37
|
-
Requires-Dist: playwright-stealth
|
|
38
|
-
Requires-Dist: mcp
|
|
39
|
-
Dynamic: license-file
|
|
40
|
-
|
|
41
|
-
# mcp_query_table
|
|
42
|
-
|
|
43
|
-
1. 基于`playwright`实现的财经网页表格爬虫,支持`Model Context Protocol (MCP) `。目前可查询来源为
|
|
44
|
-
|
|
45
|
-
- [同花顺问财](http://iwencai.com/)
|
|
46
|
-
- [通达信问小达](https://wenda.tdx.com.cn/)
|
|
47
|
-
- [东方财富条件选股](https://xuangu.eastmoney.com/)
|
|
48
|
-
|
|
49
|
-
实盘时,如果某网站宕机或改版,可以立即切换到其他网站。(注意:不同网站的表格结构不同,需要提前做适配)
|
|
50
|
-
|
|
51
|
-
2. 基于`playwright`实现的大语言模型调用爬虫。目前可用来源为
|
|
52
|
-
- [纳米搜索](https://www.n.cn/)
|
|
53
|
-
- [腾讯元宝](https://yuanbao.tencent.com/)
|
|
54
|
-
- [百度AI搜索](https://chat.baidu.com/)
|
|
55
|
-
|
|
56
|
-
`RooCode`提供了`Human Reply`功能。但发现`纳米搜索`网页版复制时格式破坏,所以研发了此功能
|
|
57
|
-
|
|
58
|
-
## 安装
|
|
59
|
-
|
|
60
|
-
```commandline
|
|
61
|
-
pip install -i https://pypi.org/simple --upgrade mcp_query_table
|
|
62
|
-
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade mcp_query_table
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
## 使用
|
|
66
|
-
|
|
67
|
-
```python
|
|
68
|
-
import asyncio
|
|
69
|
-
|
|
70
|
-
from mcp_query_table import *
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
async def main() -> None:
|
|
74
|
-
async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, devtools=True) as bm:
|
|
75
|
-
# 问财需要保证浏览器宽度>768,防止界面变成适应手机
|
|
76
|
-
page = await bm.get_page()
|
|
77
|
-
df = await query(page, '收益最好的200只ETF', query_type=QueryType.ETF, max_page=1, site=Site.THS)
|
|
78
|
-
print(df.to_markdown())
|
|
79
|
-
df = await query(page, '年初至今收益率前50', query_type=QueryType.Fund, max_page=1, site=Site.TDX)
|
|
80
|
-
print(df.to_csv())
|
|
81
|
-
df = await query(page, '流通市值前10的行业板块', query_type=QueryType.Index, max_page=1, site=Site.TDX)
|
|
82
|
-
print(df.to_csv())
|
|
83
|
-
# TODO 东财翻页要提前登录
|
|
84
|
-
df = await query(page, '今日涨幅前5的概念板块;', query_type=QueryType.Board, max_page=3, site=Site.EastMoney)
|
|
85
|
-
print(df)
|
|
86
|
-
|
|
87
|
-
output = await chat(page, "1+2等于多少?", provider=Provider.YuanBao)
|
|
88
|
-
print(output)
|
|
89
|
-
output = await chat(page, "3+4等于多少?", provider=Provider.YuanBao, create=True)
|
|
90
|
-
print(output)
|
|
91
|
-
|
|
92
|
-
print('done')
|
|
93
|
-
bm.release_page(page)
|
|
94
|
-
await page.wait_for_timeout(2000)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if __name__ == '__main__':
|
|
98
|
-
asyncio.run(main())
|
|
99
|
-
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
## 注意事项
|
|
103
|
-
|
|
104
|
-
1. 浏览器最好是`Chrome`。如一定要使用`Edge`,除了关闭`Edge`所有窗口外,还要在任务管理器关闭`Microsoft Edge`
|
|
105
|
-
的所有进程,即`taskkill /f /im msedge.exe`
|
|
106
|
-
2. 浏览器要保证窗口宽度,防止部分网站自动适配成手机版,导致表格查询失败
|
|
107
|
-
3. 如有网站账号,请提前登录。此工具无自动登录功能
|
|
108
|
-
4. 不同网站的表格结构不同,同条件返回股票数量也不同。需要查询后做适配
|
|
109
|
-
|
|
110
|
-
## 工作原理
|
|
111
|
-
|
|
112
|
-
不同于`requests`,`playwright`是基于浏览器的,模拟用户在浏览器中的操作。
|
|
113
|
-
|
|
114
|
-
1. 不需要解决登录问题
|
|
115
|
-
2. 不需要解决请求构造、响应解析
|
|
116
|
-
3. 可以直接获取表格数据,所见即所得
|
|
117
|
-
4. 运行速度慢于`requests`,但开发效率高
|
|
118
|
-
|
|
119
|
-
数据的获取有:
|
|
120
|
-
|
|
121
|
-
1. 直接解析HTML表格
|
|
122
|
-
1. 数字文本化了,不利于后期研究
|
|
123
|
-
2. 适用性最强
|
|
124
|
-
2. 截获请求,获取返回的`json`数据
|
|
125
|
-
1. 类似于`requests`,需要做响应解析
|
|
126
|
-
2. 灵活性差点,网站改版后,需要重新做适配
|
|
127
|
-
|
|
128
|
-
此项目采用的是模拟点击浏览器来发送请求,使用截获响应并解析的方法来获取数据。
|
|
129
|
-
|
|
130
|
-
后期会根据不同的网站改版情况,使用更适合的方法。
|
|
131
|
-
|
|
132
|
-
## 无头模式
|
|
133
|
-
|
|
134
|
-
无头模式运行速度更快,但部分网站需要提前登录,所以,无头模式一定要指定`user_data_dir`,否则会出现需要登录的情况。
|
|
135
|
-
|
|
136
|
-
- `endpoint=None`时,`headless=True`可无头启动新浏览器实例。指定`executable_path`和`user_data_dir`,才能确保无头模式下正常运行。
|
|
137
|
-
- `endpoint`以`http://`开头,连接`CDP`模式启动的有头浏览器,参数必有`--remote-debugging-port`。`executable_path`为本地浏览器路径。
|
|
138
|
-
- `endpoint`以`ws://`开头,连接远程`Playwright Server`。也是无头模式,但无法指定`user_data_dir`,所以使用受限
|
|
139
|
-
- 参考:https://playwright.dev/python/docs/docker#running-the-playwright-server
|
|
140
|
-
|
|
141
|
-
## MCP支持
|
|
142
|
-
|
|
143
|
-
确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
|
|
144
|
-
|
|
145
|
-
在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`timeout`是超时时间,单位为秒。 在各`AI`
|
|
146
|
-
平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
|
|
147
|
-
|
|
148
|
-
### STDIO方式
|
|
149
|
-
|
|
150
|
-
```json
|
|
151
|
-
{
|
|
152
|
-
"mcpServers": {
|
|
153
|
-
"mcp_query_table": {
|
|
154
|
-
"timeout": 300,
|
|
155
|
-
"command": "D:\\Users\\Kan\\miniconda3\\envs\\py312\\python.exe",
|
|
156
|
-
"args": [
|
|
157
|
-
"-m",
|
|
158
|
-
"mcp_query_table",
|
|
159
|
-
"--format",
|
|
160
|
-
"markdown",
|
|
161
|
-
"--endpoint",
|
|
162
|
-
"http://127.0.0.1:9222",
|
|
163
|
-
"--executable_path",
|
|
164
|
-
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
|
|
165
|
-
]
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
```
|
|
170
|
-
|
|
171
|
-
### SSE方式
|
|
172
|
-
|
|
173
|
-
先在控制台中执行如下命令,启动`MCP`服务
|
|
174
|
-
|
|
175
|
-
```commandline
|
|
176
|
-
python -m mcp_query_table --format markdown --transport sse --port 8000 --endpoint http://127.0.0.1:9222
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
然后就可以连接到`MCP`服务了
|
|
180
|
-
|
|
181
|
-
```json
|
|
182
|
-
{
|
|
183
|
-
"mcpServers": {
|
|
184
|
-
"mcp_query_table": {
|
|
185
|
-
"timeout": 300,
|
|
186
|
-
"url": "http://127.0.0.1:8000/sse"
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
```
|
|
191
|
-
|
|
192
|
-
## 使用`MCP Inspector`进行调试
|
|
193
|
-
|
|
194
|
-
```commandline
|
|
195
|
-
npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown --endpoint http://127.0.0.1:9222
|
|
196
|
-
```
|
|
197
|
-
|
|
198
|
-
打开浏览器并翻页是一个比较耗时的操作,会导致`MCP Inspector`页面超时,可以`http://localhost:5173/?timeout=300000`
|
|
199
|
-
表示超时时间为300秒
|
|
200
|
-
|
|
201
|
-
第一次尝试编写`MCP`项目,可能会有各种问题,欢迎大家交流。
|
|
202
|
-
|
|
203
|
-
## `MCP`使用技巧
|
|
204
|
-
|
|
205
|
-
1. 2024年涨幅最大的100只股票按2024年12月31日总市值排名。三个网站的结果都不一样
|
|
206
|
-
- 同花顺:显示了2201只股票。前5个是工商银行、农业银行、中国移动、中国石油、建设银行
|
|
207
|
-
- 通达信:显示了100只股票,前5个是寒武纪、正丹股份,汇金科技、万丰奥威、艾融软件
|
|
208
|
-
- 东方财富:显示了100只股票,前5个是海光信息、寒武纪、光启技术、润泽科技、新易盛
|
|
209
|
-
|
|
210
|
-
2. 大语言模型对问题拆分能力弱,所以要能合理的提问,保证查询条件不会被改动。以下推荐第2、3种
|
|
211
|
-
- 2024年涨幅最大的100只股票按2024年12月31日总市值排名
|
|
212
|
-
> 大语言模型非常有可能拆分这句,导致一步查询被分成了多步查询
|
|
213
|
-
- 向东方财富查询“2024年涨幅最大的100只股票按2024年12月31日总市值排名”
|
|
214
|
-
> 用引号括起来,避免被拆分
|
|
215
|
-
- 向东方财富板块查询 “去年涨的最差的行业板块”,再查询此板块中去年涨的最好的5只股票
|
|
216
|
-
> 分成两步查询,先查询板块,再查询股票。但最好不要全自动,因为第一步的结果它不理解“今日涨幅”和“区间涨幅”,需要交互修正
|
|
217
|
-
|
|
218
|
-
## 支持`Streamlit`
|
|
219
|
-
|
|
220
|
-
实现在同一页面中查询金融数据,并手工输入到`AI`中进行深度分析。参考`streamlit`目录下的`README.md`文件。
|
|
221
|
-
|
|
222
|
-

|
|
223
|
-
|
|
224
|
-
## 参考
|
|
225
|
-
- [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
|
|
226
|
-
- https://github.com/AtuboDad/playwright_stealth/issues/31
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
LICENSE
|
|
2
|
-
README.md
|
|
3
|
-
pyproject.toml
|
|
4
|
-
mcp_query_table/__init__.py
|
|
5
|
-
mcp_query_table/__main__.py
|
|
6
|
-
mcp_query_table/_version.py
|
|
7
|
-
mcp_query_table/enums.py
|
|
8
|
-
mcp_query_table/server.py
|
|
9
|
-
mcp_query_table/tool.py
|
|
10
|
-
mcp_query_table/utils.py
|
|
11
|
-
mcp_query_table.egg-info/PKG-INFO
|
|
12
|
-
mcp_query_table.egg-info/SOURCES.txt
|
|
13
|
-
mcp_query_table.egg-info/dependency_links.txt
|
|
14
|
-
mcp_query_table.egg-info/requires.txt
|
|
15
|
-
mcp_query_table.egg-info/top_level.txt
|
|
16
|
-
mcp_query_table/providers/__init__.py
|
|
17
|
-
mcp_query_table/providers/baidu.py
|
|
18
|
-
mcp_query_table/providers/n.py
|
|
19
|
-
mcp_query_table/providers/yuanbao.py
|
|
20
|
-
mcp_query_table/sites/__init__.py
|
|
21
|
-
mcp_query_table/sites/eastmoney.py
|
|
22
|
-
mcp_query_table/sites/iwencai.py
|
|
23
|
-
mcp_query_table/sites/tdx.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
mcp_query_table
|
mcp_query_table-0.3.7/setup.cfg
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|