mcp-query-table 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. mcp_query_table-0.3.8/.gitignore +174 -0
  2. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/PKG-INFO +24 -22
  3. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/README.md +16 -15
  4. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/__init__.py +4 -0
  5. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/__main__.py +9 -5
  6. mcp_query_table-0.3.8/mcp_query_table/_version.py +1 -0
  7. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/enums.py +1 -1
  8. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/providers/baidu.py +1 -1
  9. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/providers/n.py +5 -1
  10. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/providers/yuanbao.py +4 -1
  11. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/server.py +16 -8
  12. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/sites/iwencai.py +5 -2
  13. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/tool.py +70 -54
  14. mcp_query_table-0.3.8/mcp_query_table/utils.py +51 -0
  15. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/pyproject.toml +14 -12
  16. mcp_query_table-0.3.6/mcp_query_table/_version.py +0 -1
  17. mcp_query_table-0.3.6/mcp_query_table.egg-info/PKG-INFO +0 -224
  18. mcp_query_table-0.3.6/mcp_query_table.egg-info/SOURCES.txt +0 -22
  19. mcp_query_table-0.3.6/mcp_query_table.egg-info/dependency_links.txt +0 -1
  20. mcp_query_table-0.3.6/mcp_query_table.egg-info/requires.txt +0 -4
  21. mcp_query_table-0.3.6/mcp_query_table.egg-info/top_level.txt +0 -1
  22. mcp_query_table-0.3.6/setup.cfg +0 -4
  23. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/LICENSE +0 -0
  24. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/providers/__init__.py +0 -0
  25. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/sites/__init__.py +0 -0
  26. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/sites/eastmoney.py +0 -0
  27. {mcp_query_table-0.3.6 → mcp_query_table-0.3.8}/mcp_query_table/sites/tdx.py +0 -0
@@ -0,0 +1,174 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp_query_table
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: query table from website, support MCP
5
5
  Author-email: wukan <wu-kan@163.com>
6
6
  License: MIT License
@@ -24,24 +24,25 @@ License: MIT License
24
24
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
25
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
26
  SOFTWARE.
27
-
28
- Keywords: playwright,mcp,table,iwencai,tdx,eastmoney
27
+ License-File: LICENSE
28
+ Keywords: eastmoney,iwencai,mcp,playwright,table,tdx
29
29
  Classifier: Development Status :: 4 - Beta
30
30
  Classifier: Programming Language :: Python
31
31
  Requires-Python: >=3.10
32
- Description-Content-Type: text/markdown
33
- License-File: LICENSE
34
- Requires-Dist: pandas
35
32
  Requires-Dist: loguru
36
- Requires-Dist: playwright
37
33
  Requires-Dist: mcp
38
- Dynamic: license-file
34
+ Requires-Dist: pandas
35
+ Requires-Dist: playwright
36
+ Requires-Dist: playwright-stealth
37
+ Requires-Dist: setuptools
38
+ Requires-Dist: tabulate
39
+ Description-Content-Type: text/markdown
39
40
 
40
41
  # mcp_query_table
41
42
 
42
43
  1. 基于`playwright`实现的财经网页表格爬虫,支持`Model Context Protocol (MCP) `。目前可查询来源为
43
44
 
44
- - [同花顺i问财](http://iwencai.com/)
45
+ - [同花顺问财](http://iwencai.com/)
45
46
  - [通达信问小达](https://wenda.tdx.com.cn/)
46
47
  - [东方财富条件选股](https://xuangu.eastmoney.com/)
47
48
 
@@ -70,7 +71,7 @@ from mcp_query_table import *
70
71
 
71
72
 
72
73
  async def main() -> None:
73
- async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, debug=True) as bm:
74
+ async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, devtools=True) as bm:
74
75
  # 问财需要保证浏览器宽度>768,防止界面变成适应手机
75
76
  page = await bm.get_page()
76
77
  df = await query(page, '收益最好的200只ETF', query_type=QueryType.ETF, max_page=1, site=Site.THS)
@@ -128,19 +129,21 @@ if __name__ == '__main__':
128
129
 
129
130
  后期会根据不同的网站改版情况,使用更适合的方法。
130
131
 
131
- ## MCP支持
132
+ ## 无头模式
132
133
 
133
- 确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
134
+ 无头模式运行速度更快,但部分网站需要提前登录,所以,无头模式一定要指定`user_data_dir`,否则会出现需要登录的情况。
134
135
 
135
- 在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`executable_path`是`Chrome`的绝对路径,`timeout`是超时时间,单位为秒。
136
- 在各`AI`平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
136
+ - `endpoint=None`时,`headless=True`可无头启动新浏览器实例。指定`executable_path`和`user_data_dir`,才能确保无头模式下正常运行。
137
+ - `endpoint`以`http://`开头,连接`CDP`模式启动的有头浏览器,参数必有`--remote-debugging-port`。`executable_path`为本地浏览器路径。
138
+ - `endpoint`以`ws://`开头,连接远程`Playwright Server`。也是无头模式,但无法指定`user_data_dir`,所以使用受限
139
+ - 参考:https://playwright.dev/python/docs/docker#running-the-playwright-server
137
140
 
138
- `endpoint`支持两方式,一种是`cdp_endpoint`方式,一种是`ws_endpoint`方式。
141
+ ## MCP支持
139
142
 
140
- - cdp方式:通过启动时加参数`--remote-debugging-port=9222`来启动浏览器
141
- - ws方式:服务器上`docker run -p 3000:3000 --rm --init -it --workdir /home/pwuser --user pwuser mcr.microsoft.com/playwright:v1.51.0-noble /bin/sh -c "npx -y playwright@1.51.0 run-server --port 3000 --host 0.0.0.0"`
143
+ 确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
142
144
 
143
- 参考:https://playwright.dev/python/docs/docker#remote-connection
145
+ 在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`timeout`是超时时间,单位为秒。 在各`AI`
146
+ 平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
144
147
 
145
148
  ### STDIO方式
146
149
 
@@ -170,7 +173,7 @@ if __name__ == '__main__':
170
173
  先在控制台中执行如下命令,启动`MCP`服务
171
174
 
172
175
  ```commandline
173
- python -m mcp_query_table --format markdown --transport sse --port 8000
176
+ python -m mcp_query_table --format markdown --transport sse --port 8000 --endpoint http://127.0.0.1:9222
174
177
  ```
175
178
 
176
179
  然后就可以连接到`MCP`服务了
@@ -189,7 +192,7 @@ python -m mcp_query_table --format markdown --transport sse --port 8000
189
192
  ## 使用`MCP Inspector`进行调试
190
193
 
191
194
  ```commandline
192
- npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
195
+ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown --endpoint http://127.0.0.1:9222
193
196
  ```
194
197
 
195
198
  打开浏览器并翻页是一个比较耗时的操作,会导致`MCP Inspector`页面超时,可以`http://localhost:5173/?timeout=300000`
@@ -219,6 +222,5 @@ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
219
222
  ![streamlit](docs/img/streamlit.png)
220
223
 
221
224
  ## 参考
222
-
223
- - [Playwright](https://playwright.dev/python/docs/intro)
224
225
  - [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
226
+ - https://github.com/AtuboDad/playwright_stealth/issues/31
@@ -2,7 +2,7 @@
2
2
 
3
3
  1. 基于`playwright`实现的财经网页表格爬虫,支持`Model Context Protocol (MCP) `。目前可查询来源为
4
4
 
5
- - [同花顺i问财](http://iwencai.com/)
5
+ - [同花顺问财](http://iwencai.com/)
6
6
  - [通达信问小达](https://wenda.tdx.com.cn/)
7
7
  - [东方财富条件选股](https://xuangu.eastmoney.com/)
8
8
 
@@ -31,7 +31,7 @@ from mcp_query_table import *
31
31
 
32
32
 
33
33
  async def main() -> None:
34
- async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, debug=True) as bm:
34
+ async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, devtools=True) as bm:
35
35
  # 问财需要保证浏览器宽度>768,防止界面变成适应手机
36
36
  page = await bm.get_page()
37
37
  df = await query(page, '收益最好的200只ETF', query_type=QueryType.ETF, max_page=1, site=Site.THS)
@@ -89,19 +89,21 @@ if __name__ == '__main__':
89
89
 
90
90
  后期会根据不同的网站改版情况,使用更适合的方法。
91
91
 
92
- ## MCP支持
92
+ ## 无头模式
93
93
 
94
- 确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
94
+ 无头模式运行速度更快,但部分网站需要提前登录,所以,无头模式一定要指定`user_data_dir`,否则会出现需要登录的情况。
95
95
 
96
- 在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`executable_path`是`Chrome`的绝对路径,`timeout`是超时时间,单位为秒。
97
- 在各`AI`平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
96
+ - `endpoint=None`时,`headless=True`可无头启动新浏览器实例。指定`executable_path`和`user_data_dir`,才能确保无头模式下正常运行。
97
+ - `endpoint`以`http://`开头,连接`CDP`模式启动的有头浏览器,参数必有`--remote-debugging-port`。`executable_path`为本地浏览器路径。
98
+ - `endpoint`以`ws://`开头,连接远程`Playwright Server`。也是无头模式,但无法指定`user_data_dir`,所以使用受限
99
+ - 参考:https://playwright.dev/python/docs/docker#running-the-playwright-server
98
100
 
99
- `endpoint`支持两方式,一种是`cdp_endpoint`方式,一种是`ws_endpoint`方式。
101
+ ## MCP支持
100
102
 
101
- - cdp方式:通过启动时加参数`--remote-debugging-port=9222`来启动浏览器
102
- - ws方式:服务器上`docker run -p 3000:3000 --rm --init -it --workdir /home/pwuser --user pwuser mcr.microsoft.com/playwright:v1.51.0-noble /bin/sh -c "npx -y playwright@1.51.0 run-server --port 3000 --host 0.0.0.0"`
103
+ 确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
103
104
 
104
- 参考:https://playwright.dev/python/docs/docker#remote-connection
105
+ 在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`timeout`是超时时间,单位为秒。 在各`AI`
106
+ 平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
105
107
 
106
108
  ### STDIO方式
107
109
 
@@ -131,7 +133,7 @@ if __name__ == '__main__':
131
133
  先在控制台中执行如下命令,启动`MCP`服务
132
134
 
133
135
  ```commandline
134
- python -m mcp_query_table --format markdown --transport sse --port 8000
136
+ python -m mcp_query_table --format markdown --transport sse --port 8000 --endpoint http://127.0.0.1:9222
135
137
  ```
136
138
 
137
139
  然后就可以连接到`MCP`服务了
@@ -150,7 +152,7 @@ python -m mcp_query_table --format markdown --transport sse --port 8000
150
152
  ## 使用`MCP Inspector`进行调试
151
153
 
152
154
  ```commandline
153
- npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
155
+ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown --endpoint http://127.0.0.1:9222
154
156
  ```
155
157
 
156
158
  打开浏览器并翻页是一个比较耗时的操作,会导致`MCP Inspector`页面超时,可以`http://localhost:5173/?timeout=300000`
@@ -180,6 +182,5 @@ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
180
182
  ![streamlit](docs/img/streamlit.png)
181
183
 
182
184
  ## 参考
183
-
184
- - [Playwright](https://playwright.dev/python/docs/intro)
185
- - [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
185
+ - [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
186
+ - https://github.com/AtuboDad/playwright_stealth/issues/31
@@ -5,3 +5,7 @@ from .tool import BrowserManager, query, chat
5
5
 
6
6
  TIMEOUT = 1000 * 60 * 3 # 3分钟,在抓取EventStream数据时等待数据返回,防止外层30秒超时
7
7
  TIMEOUT_60 = 1000 * 60 # 1分钟
8
+
9
+ # TODO 临时测试
10
+ # TIMEOUT = None
11
+ # TIMEOUT_60 = None
@@ -1,3 +1,5 @@
1
+ import getpass
2
+
1
3
  from mcp_query_table.server import serve
2
4
 
3
5
 
@@ -11,10 +13,11 @@ def main():
11
13
  parser.add_argument("--format", type=str, help="输出格式",
12
14
  default='markdown', choices=['markdown', 'csv', 'json'])
13
15
  parser.add_argument("--endpoint", type=str, help="浏览器CDP地址/WS地址",
14
- default="http://127.0.0.1:9222")
15
- parser.add_argument("--executable_path", type=str, help="浏览器类型",
16
- default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
17
-
16
+ nargs="?", default=r'http://127.0.0.1:9222')
17
+ parser.add_argument("--executable_path", type=str, help="浏览器路径",
18
+ nargs="?", default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
19
+ parser.add_argument("--user_data_dir", type=str, help="浏览器用户数据目录",
20
+ nargs="?", default=rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data')
18
21
  parser.add_argument("--transport", type=str, help="传输类型",
19
22
  default='stdio', choices=['stdio', 'sse'])
20
23
  parser.add_argument("--host", type=str, help="MCP服务端绑定地址",
@@ -22,7 +25,8 @@ def main():
22
25
  parser.add_argument("--port", type=int, help="MCP服务端绑定端口",
23
26
  default='8000')
24
27
  args = parser.parse_args()
25
- serve(args.format, args.endpoint, args.executable_path,
28
+ serve(args.format, args.endpoint,
29
+ args.executable_path, args.user_data_dir,
26
30
  args.transport, args.host, args.port)
27
31
 
28
32
 
@@ -0,0 +1 @@
1
+ __version__ = "0.3.8"
@@ -18,7 +18,7 @@ class Site(Enum):
18
18
  """站点"""
19
19
  EastMoney = '东方财富' # 东方财富 条件选股
20
20
  TDX = '通达信' # 通达信 问小达
21
- THS = '同花顺' # 同花顺 i问财
21
+ THS = '同花顺' # 同花顺 问财
22
22
 
23
23
 
24
24
  class Provider(Enum):
@@ -8,7 +8,7 @@ import json
8
8
  from playwright.async_api import Page
9
9
 
10
10
  import mcp_query_table
11
- from mcp_query_table.tool import GlobalVars, split_images
11
+ from mcp_query_table.utils import split_images, GlobalVars
12
12
 
13
13
  _PAGE0_ = "https://chat.baidu.com/search"
14
14
  _PAGE1_ = "https://chat.baidu.com/aichat/api/conversation"
@@ -3,10 +3,11 @@
3
3
  """
4
4
  import json
5
5
 
6
+ from loguru import logger
6
7
  from playwright.async_api import Page
7
8
 
8
9
  import mcp_query_table
9
- from mcp_query_table.tool import GlobalVars, is_image
10
+ from mcp_query_table.utils import is_image, GlobalVars
10
11
 
11
12
  _PAGE0_ = "https://www.n.cn"
12
13
  _PAGE1_ = "https://www.n.cn/search"
@@ -78,6 +79,8 @@ async def chat(page: Page,
78
79
  str
79
80
  回答
80
81
  """
82
+ logger.warning("纳米搜索。不登录可以使用。但无头模式要指定`user_data_dir`才能正常工作")
83
+
81
84
  if not create:
82
85
  if not page.url.startswith(_PAGE1_):
83
86
  create = True
@@ -102,6 +105,7 @@ async def chat(page: Page,
102
105
  textbox = page.get_by_role("textbox", name=name)
103
106
  await textbox.fill(prompt)
104
107
  await textbox.press("Enter")
108
+ # await page.screenshot(path="n.png")
105
109
  await on_response(await response_info.value)
106
110
 
107
111
  return G.get_text()
@@ -4,10 +4,11 @@
4
4
  import json
5
5
  import re
6
6
 
7
+ from loguru import logger
7
8
  from playwright.async_api import Page
8
9
 
9
10
  import mcp_query_table
10
- from mcp_query_table.tool import GlobalVars, split_images
11
+ from mcp_query_table.utils import split_images, GlobalVars
11
12
 
12
13
  _PAGE0_ = "https://yuanbao.tencent.com/"
13
14
  _PAGE1_ = "https://yuanbao.tencent.com/api/chat"
@@ -69,6 +70,8 @@ async def chat(page: Page,
69
70
  create: bool,
70
71
  files: list[str]
71
72
  ) -> str:
73
+ logger.info("腾讯元宝。登录才可以使用。无头模式时要指定`user_data_dir`才能正常工作")
74
+
72
75
  if not page.url.startswith(_PAGE0_):
73
76
  create = True
74
77
 
@@ -14,9 +14,13 @@ class QueryServer:
14
14
  self.format: str = "markdown"
15
15
  self.browser = None
16
16
 
17
- def start(self, format, endpoint, executable_path):
17
+ def start(self, format, endpoint, executable_path, user_data_dir):
18
18
  self.format: str = format
19
- self.browser = BrowserManager(endpoint=endpoint, executable_path=executable_path, debug=False)
19
+ self.browser = BrowserManager(endpoint=endpoint,
20
+ executable_path=executable_path,
21
+ user_data_dir=user_data_dir,
22
+ devtools=False,
23
+ headless=True)
20
24
 
21
25
  async def query(self, query_input: str, query_type: QueryType, max_page: int, site: Site):
22
26
  page = await self.browser.get_page()
@@ -54,7 +58,8 @@ async def query(
54
58
  return await qsv.query(query_input, query_type, max_page, site)
55
59
 
56
60
 
57
- @mcp.tool(description="大语言模型对话")
61
+ # chat功能不通过mcp暴露,因为在Cline等客户端中本就有LLM功能,反而导致返回的数据没有正确提交
62
+ # @mcp.tool(description="大语言模型对话")
58
63
  async def chat(
59
64
  prompt: Annotated[str, Field(description="提示词。如:`9.9大还是9.11大?`")],
60
65
  create: Annotated[bool, Field(default=False, description="是否创建新对话")],
@@ -65,12 +70,15 @@ async def chat(
65
70
  return await qsv.chat(prompt, create, files, provider)
66
71
 
67
72
 
68
- def serve(format, endpoint, executable_path, transport, host, port):
69
- qsv.start(format, endpoint, executable_path)
70
- logger.info(f"{format=},{transport=}")
71
- logger.info(f"{endpoint=},{executable_path=}")
73
+ def serve(format, endpoint, executable_path, user_data_dir, transport, host, port):
74
+ qsv.start(format, endpoint, executable_path, user_data_dir)
75
+ logger.info(f"{endpoint=}")
76
+ logger.info(f"{executable_path=}")
77
+ logger.info(f"{user_data_dir=}")
72
78
  if transport == 'sse':
73
- logger.info(f"{host=},{port=}", transport, host, port)
79
+ logger.info(f"{transport=},{format=},{host=},{port=}")
80
+ else:
81
+ logger.info(f"{transport=},{format=}")
74
82
 
75
83
  mcp.settings.host = host
76
84
  mcp.settings.port = port
@@ -1,5 +1,5 @@
1
1
  """
2
- 同花顺 i问财
2
+ 同花顺问财
3
3
  https://www.iwencai.com/
4
4
 
5
5
  1. 一定要保证浏览器宽度>768,防止界面变成适应手机
@@ -10,8 +10,10 @@ import re
10
10
  import pandas as pd
11
11
  from loguru import logger
12
12
  from playwright.async_api import Page
13
+ from playwright_stealth import stealth_async
13
14
 
14
15
  from mcp_query_table.enums import QueryType
16
+ from mcp_query_table.utils import FixedConfig
15
17
 
16
18
  # 初次查询页面
17
19
  _PAGE1_ = 'https://www.iwencai.com/customized/chart/get-robot-data'
@@ -140,7 +142,6 @@ json_data['answer']['components'][0]['data']['meta']['extra']['row_count']
140
142
 
141
143
  async def on_response(response):
142
144
  if response.url == _PAGE1_:
143
- # TODO 不支持headless模式,需要以后解决
144
145
  P.update(*get_robot_data(await response.json()))
145
146
  if response.url == _PAGE2_:
146
147
  P.update(*getDataList(await response.json()))
@@ -153,6 +154,8 @@ async def query(page: Page,
153
154
  querytype = _querytype_.get(type_, None)
154
155
  assert querytype is not None, f"不支持的类型:{type_}"
155
156
 
157
+ await stealth_async(page, FixedConfig())
158
+
156
159
  await page.route(re.compile(r'.*\.(?:jpg|jpeg|png|gif|webp)(?:$|\?)'), lambda route: route.abort())
157
160
 
158
161
  P.reset()
@@ -1,8 +1,9 @@
1
+ import getpass
1
2
  import subprocess
2
3
  import sys
3
4
  import time
4
5
  from pathlib import Path
5
- from typing import Optional, List, Tuple
6
+ from typing import Optional
6
7
  from urllib.parse import urlparse
7
8
 
8
9
  import pandas as pd
@@ -26,6 +27,7 @@ def create_detached_process(command):
26
27
  kwargs.update({
27
28
  'start_new_session': True # 创建新的会话
28
29
  })
30
+ logger.info(f"Popen: {command}")
29
31
  return subprocess.Popen(command, **kwargs)
30
32
 
31
33
 
@@ -59,6 +61,21 @@ def get_executable_path(executable_path) -> Optional[str]:
59
61
  return None
60
62
 
61
63
 
64
+ def get_user_data_dir(user_data_dir) -> Optional[str]:
65
+ """获取浏览器可用户目录"""
66
+ browsers = {
67
+ "default": user_data_dir,
68
+ "chrome.exe": rf'C:\Users\{getpass.getuser()}\AppData\Local\Google\Chrome\User Data\Default',
69
+ "msedge.exe": rf"C:\Users\{getpass.getuser()}\AppData\Local\Microsoft\Edge\User Data\Default",
70
+ }
71
+ for k, v in browsers.items():
72
+ if v is None:
73
+ continue
74
+ if Path(v).exists():
75
+ return v
76
+ return None
77
+
78
+
62
79
  class BrowserManager:
63
80
  async def __aenter__(self):
64
81
  return self
@@ -67,24 +84,36 @@ class BrowserManager:
67
84
  await self.cleanup()
68
85
 
69
86
  def __init__(self,
70
- endpoint: Optional[str] = None,
87
+ endpoint: Optional[str],
71
88
  executable_path: Optional[str] = None,
72
- debug: bool = False):
89
+ devtools: bool = False,
90
+ headless: bool = True,
91
+ user_data_dir: Optional[str] = None):
73
92
  """
74
93
 
75
94
  Parameters
76
95
  ----------
77
- endpoint:str
78
- 浏览器CDP地址/WS地址
96
+ endpoint:str or None
97
+ 浏览器CDP地址/WS地址。
98
+ 如果为None,则直接启动浏览器实例。可用无头模式。建议指定用户数据目录,否则可能无法使用某些需要登录的网站
79
99
  executable_path:str
80
100
  浏览器可执行文件路径。推荐使用chrome,因为Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
81
- debug:bool
101
+ devtools:bool
82
102
  是否显示开发者工具
103
+ headless:bool
104
+ 是否无头模式启动浏览器
105
+ user_data_dir:str
106
+ 浏览器用户数据目录。无头模式。强烈建议指定用户数据目录,否则可能无法使用某些需要登录的网站
83
107
 
84
108
  """
85
- self.endpoint = endpoint or 'http://127.0.0.1:9222'
109
+ if devtools:
110
+ headless = False
111
+
112
+ self.endpoint = endpoint
86
113
  self.executable_path = executable_path
87
- self.debug = debug
114
+ self.devtools = devtools
115
+ self.headless = headless
116
+ self.user_data_dir = user_data_dir
88
117
 
89
118
  self.playwright: Optional[Playwright] = None
90
119
  self.browser = None
@@ -102,8 +131,9 @@ class BrowserManager:
102
131
  """连接本地浏览器"""
103
132
  port = urlparse(self.endpoint).port
104
133
  executable_path = get_executable_path(self.executable_path)
134
+ name = Path(executable_path).name
105
135
  command = [executable_path, f'--remote-debugging-port={port}', '--start-maximized']
106
- if self.debug:
136
+ if self.devtools:
107
137
  command.append('--auto-open-devtools-for-tabs')
108
138
 
109
139
  for i in range(2):
@@ -113,13 +143,12 @@ class BrowserManager:
113
143
  break
114
144
  except:
115
145
  if i == 0:
116
- logger.info(f"start browser:{command}")
117
146
  create_detached_process(command)
118
- time.sleep(3)
147
+ time.sleep(5)
119
148
  continue
120
149
  if i == 1:
121
150
  raise ConnectionError(
122
- f"已提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试 `taskkill /f /im {Path(executable_path).name}`")
151
+ f"已提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试 `taskkill /f /im {name}`")
123
152
 
124
153
  async def _connect_to_remote(self) -> None:
125
154
  """连接远程浏览器"""
@@ -133,6 +162,26 @@ class BrowserManager:
133
162
  except:
134
163
  raise ConnectionError(f"连接远程浏览器失败,请检查CDP/WS地址和端口是否正确。{self.endpoint}")
135
164
 
165
+ async def _connect_to_launch(self) -> None:
166
+ logger.info("executable_path={}", self.executable_path)
167
+ if self.user_data_dir:
168
+ logger.info("user_data_dir={}", self.user_data_dir)
169
+ try:
170
+ self.context = await self.playwright.chromium.launch_persistent_context(
171
+ user_data_dir=self.user_data_dir,
172
+ executable_path=self.executable_path,
173
+ headless=self.headless,
174
+ devtools=self.devtools,
175
+ timeout=10000, slow_mo=1000)
176
+ except:
177
+ raise ConnectionError(f"launch失败,可能已经有浏览器已经打开了数据目录。{self.user_data_dir}")
178
+ else:
179
+ logger.warning("未指定浏览器用户数据目录,部分需要的网站可能无法使用")
180
+ self.browser = await self.playwright.chromium.launch(
181
+ executable_path=self.executable_path,
182
+ headless=self.headless,
183
+ devtools=self.devtools)
184
+
136
185
  async def _launch(self) -> None:
137
186
  """启动浏览器,并连接CDP协议
138
187
 
@@ -142,16 +191,20 @@ class BrowserManager:
142
191
 
143
192
  """
144
193
  self.playwright = await async_playwright().start()
145
-
146
- if is_local_url(self.endpoint) and is_cdp_url(self.endpoint):
194
+ if self.endpoint is None:
195
+ await self._connect_to_launch()
196
+ elif is_local_url(self.endpoint) and is_cdp_url(self.endpoint):
147
197
  await self._connect_to_local()
148
198
  else:
149
199
  await self._connect_to_remote()
150
200
 
151
- if len(self.browser.contexts) == 0:
201
+ if self.browser is None:
202
+ pass
203
+ elif len(self.browser.contexts) == 0:
152
204
  self.context = await self.browser.new_context()
153
205
  else:
154
206
  self.context = self.browser.contexts[0]
207
+
155
208
  # 复用打开的page
156
209
  for page in self.context.pages:
157
210
  # 防止开发者工具被使用
@@ -165,15 +218,10 @@ class BrowserManager:
165
218
  continue
166
219
  self.pages.append(page)
167
220
 
168
- async def _try_launch(self) -> None:
169
- if self.browser is None:
170
- await self._launch()
171
- if not self.browser.is_connected():
172
- await self._launch()
173
-
174
221
  async def get_page(self) -> Page:
175
222
  """获取可用Page。无空闲标签时会打开新标签"""
176
- await self._try_launch()
223
+ if self.context is None:
224
+ await self._launch()
177
225
 
178
226
  # 反复取第一个tab
179
227
  while len(self.pages) > 0:
@@ -193,19 +241,6 @@ class BrowserManager:
193
241
  self.pages.append(page)
194
242
 
195
243
 
196
- class GlobalVars:
197
- """全局变量"""
198
-
199
- def __init__(self):
200
- self.text = ""
201
-
202
- def set_text(self, text):
203
- self.text = text
204
-
205
- def get_text(self):
206
- return self.text
207
-
208
-
209
244
  async def query(
210
245
  page: Page,
211
246
  query_input: str = "收盘价>100元",
@@ -289,22 +324,3 @@ async def chat(
289
324
  return await chat(page, prompt, create, files)
290
325
 
291
326
  raise ValueError(f"未支持的提供商:{provider}")
292
-
293
-
294
- def is_image(path: str) -> bool:
295
- """判断是否是图片文件"""
296
- img_ext = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
297
- ext = Path(path).suffix.lower()
298
- return ext in img_ext
299
-
300
-
301
- def split_images(files: List[str]) -> Tuple[List[str], List[str]]:
302
- """图片列表分成两部分"""
303
- imgs = []
304
- docs = []
305
- for f in files:
306
- if is_image(f):
307
- imgs.append(f)
308
- else:
309
- docs.append(f)
310
- return imgs, docs
@@ -0,0 +1,51 @@
1
+ import random
2
+ import string
3
+ from pathlib import Path
4
+ from typing import List, Tuple
5
+
6
+ from playwright_stealth import StealthConfig
7
+
8
+
9
+ def is_image(path: str) -> bool:
10
+ """判断是否是图片文件"""
11
+ img_ext = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
12
+ ext = Path(path).suffix.lower()
13
+ return ext in img_ext
14
+
15
+
16
+ def split_images(files: List[str]) -> Tuple[List[str], List[str]]:
17
+ """图片列表分成两部分"""
18
+ imgs = []
19
+ docs = []
20
+ for f in files:
21
+ if is_image(f):
22
+ imgs.append(f)
23
+ else:
24
+ docs.append(f)
25
+ return imgs, docs
26
+
27
+
28
+ class GlobalVars:
29
+ """全局变量"""
30
+
31
+ def __init__(self):
32
+ self.text = ""
33
+
34
+ def set_text(self, text):
35
+ self.text = text
36
+
37
+ def get_text(self):
38
+ return self.text
39
+
40
+
41
+ # https://github.com/AtuboDad/playwright_stealth/issues/31#issuecomment-2342541305
42
+ class FixedConfig(StealthConfig):
43
+
44
+ @property
45
+ def enabled_scripts(self):
46
+ key = "".join(random.choices(string.ascii_letters, k=10))
47
+ for script in super().enabled_scripts:
48
+ if "const opts" in script:
49
+ yield script.replace("const opts", f"window.{key}")
50
+ continue
51
+ yield script.replace("opts", f"window.{key}")
@@ -1,7 +1,3 @@
1
- [build-system]
2
- requires = ["setuptools"]
3
- build-backend = "setuptools.build_meta"
4
-
5
1
  [project]
6
2
  name = "mcp_query_table"
7
3
  authors = [
@@ -20,17 +16,23 @@ dependencies = [
20
16
  "pandas",
21
17
  "loguru",
22
18
  "playwright",
19
+ "playwright-stealth",
23
20
  "mcp",
21
+ "setuptools", # playwright-stealth中要使用pkg_resources,而pkg_resources在setuptools中
22
+ "tabulate"
24
23
  ]
25
24
  dynamic = ["version"]
26
25
 
26
+ [build-system]
27
+ requires = ["hatchling"]
28
+ build-backend = "hatchling.build"
27
29
 
28
- [tool.setuptools]
29
- packages = [
30
- "mcp_query_table",
31
- "mcp_query_table.sites",
32
- "mcp_query_table.providers",
33
- ]
30
+ [tool.hatch.version]
31
+ path = "mcp_query_table/_version.py"
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = ["mcp_query_table"]
35
+ include-package-data = true
34
36
 
35
- [tool.setuptools.dynamic]
36
- version = { attr = "mcp_query_table._version.__version__" }
37
+ [tool.hatch.build.targets.sdist]
38
+ include = ["mcp_query_table*"]
@@ -1 +0,0 @@
1
- __version__ = "0.3.6"
@@ -1,224 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: mcp_query_table
3
- Version: 0.3.6
4
- Summary: query table from website, support MCP
5
- Author-email: wukan <wu-kan@163.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2025 伍侃
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
28
- Keywords: playwright,mcp,table,iwencai,tdx,eastmoney
29
- Classifier: Development Status :: 4 - Beta
30
- Classifier: Programming Language :: Python
31
- Requires-Python: >=3.10
32
- Description-Content-Type: text/markdown
33
- License-File: LICENSE
34
- Requires-Dist: pandas
35
- Requires-Dist: loguru
36
- Requires-Dist: playwright
37
- Requires-Dist: mcp
38
- Dynamic: license-file
39
-
40
- # mcp_query_table
41
-
42
- 1. 基于`playwright`实现的财经网页表格爬虫,支持`Model Context Protocol (MCP) `。目前可查询来源为
43
-
44
- - [同花顺i问财](http://iwencai.com/)
45
- - [通达信问小达](https://wenda.tdx.com.cn/)
46
- - [东方财富条件选股](https://xuangu.eastmoney.com/)
47
-
48
- 实盘时,如果某网站宕机或改版,可以立即切换到其他网站。(注意:不同网站的表格结构不同,需要提前做适配)
49
-
50
- 2. 基于`playwright`实现的大语言模型调用爬虫。目前可用来源为
51
- - [纳米搜索](https://www.n.cn/)
52
- - [腾讯元宝](https://yuanbao.tencent.com/)
53
- - [百度AI搜索](https://chat.baidu.com/)
54
-
55
- `RooCode`提供了`Human Reply`功能。但发现`纳米搜索`网页版复制时格式破坏,所以研发了此功能
56
-
57
- ## 安装
58
-
59
- ```commandline
60
- pip install -i https://pypi.org/simple --upgrade mcp_query_table
61
- pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade mcp_query_table
62
- ```
63
-
64
- ## 使用
65
-
66
- ```python
67
- import asyncio
68
-
69
- from mcp_query_table import *
70
-
71
-
72
- async def main() -> None:
73
- async with BrowserManager(endpoint="http://127.0.0.1:9222", executable_path=None, debug=True) as bm:
74
- # 问财需要保证浏览器宽度>768,防止界面变成适应手机
75
- page = await bm.get_page()
76
- df = await query(page, '收益最好的200只ETF', query_type=QueryType.ETF, max_page=1, site=Site.THS)
77
- print(df.to_markdown())
78
- df = await query(page, '年初至今收益率前50', query_type=QueryType.Fund, max_page=1, site=Site.TDX)
79
- print(df.to_csv())
80
- df = await query(page, '流通市值前10的行业板块', query_type=QueryType.Index, max_page=1, site=Site.TDX)
81
- print(df.to_csv())
82
- # TODO 东财翻页要提前登录
83
- df = await query(page, '今日涨幅前5的概念板块;', query_type=QueryType.Board, max_page=3, site=Site.EastMoney)
84
- print(df)
85
-
86
- output = await chat(page, "1+2等于多少?", provider=Provider.YuanBao)
87
- print(output)
88
- output = await chat(page, "3+4等于多少?", provider=Provider.YuanBao, create=True)
89
- print(output)
90
-
91
- print('done')
92
- bm.release_page(page)
93
- await page.wait_for_timeout(2000)
94
-
95
-
96
- if __name__ == '__main__':
97
- asyncio.run(main())
98
-
99
- ```
100
-
101
- ## 注意事项
102
-
103
- 1. 浏览器最好是`Chrome`。如一定要使用`Edge`,除了关闭`Edge`所有窗口外,还要在任务管理器关闭`Microsoft Edge`
104
- 的所有进程,即`taskkill /f /im msedge.exe`
105
- 2. 浏览器要保证窗口宽度,防止部分网站自动适配成手机版,导致表格查询失败
106
- 3. 如有网站账号,请提前登录。此工具无自动登录功能
107
- 4. 不同网站的表格结构不同,同条件返回股票数量也不同。需要查询后做适配
108
-
109
- ## 工作原理
110
-
111
- 不同于`requests`,`playwright`是基于浏览器的,模拟用户在浏览器中的操作。
112
-
113
- 1. 不需要解决登录问题
114
- 2. 不需要解决请求构造、响应解析
115
- 3. 可以直接获取表格数据,所见即所得
116
- 4. 运行速度慢于`requests`,但开发效率高
117
-
118
- 数据的获取有:
119
-
120
- 1. 直接解析HTML表格
121
- 1. 数字文本化了,不利于后期研究
122
- 2. 适用性最强
123
- 2. 截获请求,获取返回的`json`数据
124
- 1. 类似于`requests`,需要做响应解析
125
- 2. 灵活性差点,网站改版后,需要重新做适配
126
-
127
- 此项目采用的是模拟点击浏览器来发送请求,使用截获响应并解析的方法来获取数据。
128
-
129
- 后期会根据不同的网站改版情况,使用更适合的方法。
130
-
131
- ## MCP支持
132
-
133
- 确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
134
-
135
- 在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`executable_path`是`Chrome`的绝对路径,`timeout`是超时时间,单位为秒。
136
- 在各`AI`平台中由于返回时间常需1分钟以上,所以需要设置大的超时时间。
137
-
138
- `endpoint`支持两方式,一种是`cdp_endpoint`方式,一种是`ws_endpoint`方式。
139
-
140
- - cdp方式:通过启动时加参数`--remote-debugging-port=9222`来启动浏览器
141
- - ws方式:服务器上`docker run -p 3000:3000 --rm --init -it --workdir /home/pwuser --user pwuser mcr.microsoft.com/playwright:v1.51.0-noble /bin/sh -c "npx -y playwright@1.51.0 run-server --port 3000 --host 0.0.0.0"`
142
-
143
- 参考:https://playwright.dev/python/docs/docker#remote-connection
144
-
145
- ### STDIO方式
146
-
147
- ```json
148
- {
149
- "mcpServers": {
150
- "mcp_query_table": {
151
- "timeout": 300,
152
- "command": "D:\\Users\\Kan\\miniconda3\\envs\\py312\\python.exe",
153
- "args": [
154
- "-m",
155
- "mcp_query_table",
156
- "--format",
157
- "markdown",
158
- "--endpoint",
159
- "http://127.0.0.1:9222",
160
- "--executable_path",
161
- "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
162
- ]
163
- }
164
- }
165
- }
166
- ```
167
-
168
- ### SSE方式
169
-
170
- 先在控制台中执行如下命令,启动`MCP`服务
171
-
172
- ```commandline
173
- python -m mcp_query_table --format markdown --transport sse --port 8000
174
- ```
175
-
176
- 然后就可以连接到`MCP`服务了
177
-
178
- ```json
179
- {
180
- "mcpServers": {
181
- "mcp_query_table": {
182
- "timeout": 300,
183
- "url": "http://127.0.0.1:8000/sse"
184
- }
185
- }
186
- }
187
- ```
188
-
189
- ## 使用`MCP Inspector`进行调试
190
-
191
- ```commandline
192
- npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
193
- ```
194
-
195
- 打开浏览器并翻页是一个比较耗时的操作,会导致`MCP Inspector`页面超时,可以`http://localhost:5173/?timeout=300000`
196
- 表示超时时间为300秒
197
-
198
- 第一次尝试编写`MCP`项目,可能会有各种问题,欢迎大家交流。
199
-
200
- ## `MCP`使用技巧
201
-
202
- 1. 2024年涨幅最大的100只股票按2024年12月31日总市值排名。三个网站的结果都不一样
203
- - 同花顺:显示了2201只股票。前5个是工商银行、农业银行、中国移动、中国石油、建设银行
204
- - 通达信:显示了100只股票,前5个是寒武纪、正丹股份,汇金科技、万丰奥威、艾融软件
205
- - 东方财富:显示了100只股票,前5个是海光信息、寒武纪、光启技术、润泽科技、新易盛
206
-
207
- 2. 大语言模型对问题拆分能力弱,所以要能合理的提问,保证查询条件不会被改动。以下推荐第2、3种
208
- - 2024年涨幅最大的100只股票按2024年12月31日总市值排名
209
- > 大语言模型非常有可能拆分这句,导致一步查询被分成了多步查询
210
- - 向东方财富查询“2024年涨幅最大的100只股票按2024年12月31日总市值排名”
211
- > 用引号括起来,避免被拆分
212
- - 向东方财富板块查询 “去年涨的最差的行业板块”,再查询此板块中去年涨的最好的5只股票
213
- > 分成两步查询,先查询板块,再查询股票。但最好不要全自动,因为第一步的结果它不理解“今日涨幅”和“区间涨幅”,需要交互修正
214
-
215
- ## 支持`Streamlit`
216
-
217
- 实现在同一页面中查询金融数据,并手工输入到`AI`中进行深度分析。参考`streamlit`目录下的`README.md`文件。
218
-
219
- ![streamlit](docs/img/streamlit.png)
220
-
221
- ## 参考
222
-
223
- - [Playwright](https://playwright.dev/python/docs/intro)
224
- - [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
@@ -1,22 +0,0 @@
1
- LICENSE
2
- README.md
3
- pyproject.toml
4
- mcp_query_table/__init__.py
5
- mcp_query_table/__main__.py
6
- mcp_query_table/_version.py
7
- mcp_query_table/enums.py
8
- mcp_query_table/server.py
9
- mcp_query_table/tool.py
10
- mcp_query_table.egg-info/PKG-INFO
11
- mcp_query_table.egg-info/SOURCES.txt
12
- mcp_query_table.egg-info/dependency_links.txt
13
- mcp_query_table.egg-info/requires.txt
14
- mcp_query_table.egg-info/top_level.txt
15
- mcp_query_table/providers/__init__.py
16
- mcp_query_table/providers/baidu.py
17
- mcp_query_table/providers/n.py
18
- mcp_query_table/providers/yuanbao.py
19
- mcp_query_table/sites/__init__.py
20
- mcp_query_table/sites/eastmoney.py
21
- mcp_query_table/sites/iwencai.py
22
- mcp_query_table/sites/tdx.py
@@ -1,4 +0,0 @@
1
- pandas
2
- loguru
3
- playwright
4
- mcp
@@ -1 +0,0 @@
1
- mcp_query_table
@@ -1,4 +0,0 @@
1
- [egg_info]
2
- tag_build =
3
- tag_date = 0
4
-
File without changes