mcp-query-table 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from ._version import __version__
2
+
3
+ from .enums import QueryType, Site
4
+ from .tool import BrowserManager, query
@@ -0,0 +1,29 @@
1
+ from mcp_query_table.server import serve
2
+
3
+
4
+ def main():
5
+ import argparse
6
+
7
+ parser = argparse.ArgumentParser(
8
+ description="query table from website",
9
+ )
10
+
11
+ parser.add_argument("--format", type=str, help="输出格式",
12
+ default='markdown', choices=['markdown', 'csv', 'json'])
13
+ parser.add_argument("--cdp_port", type=int, help="浏览器远程调试端口",
14
+ default=9222)
15
+ parser.add_argument("--browser_path", type=str, help="浏览器类型",
16
+ default=r'C:\Program Files\Google\Chrome\Application\chrome.exe')
17
+
18
+ parser.add_argument("--transport", type=str, help="传输类型",
19
+ default='stdio', choices=['stdio', 'sse'])
20
+ parser.add_argument("--mcp_host", type=str, help="MCP服务端地址",
21
+ default='0.0.0.0')
22
+ parser.add_argument("--mcp_port", type=int, help="MCP服务端端口",
23
+ default='8000')
24
+ args = parser.parse_args()
25
+ serve(args.format, args.cdp_port, args.browser_path, args.transport, args.mcp_host, args.mcp_port)
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
@@ -0,0 +1 @@
1
+ __version__ = "0.2.6"
@@ -0,0 +1,21 @@
1
+ from enum import Enum
2
+
3
+
4
+ class QueryType(Enum):
5
+ """查询类型"""
6
+ CNStock = 'A股'
7
+ HKStock = '港股'
8
+ USStock = '美股'
9
+ Index = '指数'
10
+ Fund = '基金'
11
+ ETF = 'ETF'
12
+ ConBond = '可转债'
13
+ Board = '板块'
14
+ Info = '资讯'
15
+
16
+
17
+ class Site(Enum):
18
+ """站点"""
19
+ EastMoney = '东方财富' # 东方财富 条件选股
20
+ TDX = '通达信' # 通达信 问小达
21
+ THS = '同花顺' # 同花顺 i问财
@@ -0,0 +1,56 @@
1
+ from typing import Annotated, Optional
2
+
3
+ from loguru import logger
4
+ from mcp.server.fastmcp import FastMCP
5
+ from pydantic import Field
6
+
7
+ from mcp_query_table import QueryType, Site, query as query_table_query
8
+ from mcp_query_table.tool import BrowserManager
9
+
10
+
11
+ class QueryServer:
12
+ def __init__(self, format: str = 'markdown', port: int = 9222, browser_path: Optional[str] = None) -> None:
13
+ self.format: str = format
14
+ self.browser = BrowserManager(port=port, browser_path=browser_path, debug=False)
15
+
16
+ async def query(self, query_input: str, query_type: QueryType, max_page: int, site: Site):
17
+ page = await self.browser.get_page()
18
+ df = await query_table_query(page, query_input, query_type, max_page, site)
19
+ self.browser.release_page(page)
20
+
21
+ if self.format == 'csv':
22
+ return df.to_csv()
23
+ if self.format == 'markdown':
24
+ return df.to_markdown()
25
+ if self.format == 'json':
26
+ return df.to_json(force_ascii=False, indent=2)
27
+
28
+
29
+ # !!!log_level这一句非常重要,否则Cline/MCP Server/Tools工作不正常
30
+ mcp = FastMCP("query_table_mcp", log_level="ERROR")
31
+ qsv = QueryServer()
32
+
33
+
34
+ @mcp.tool(description="查询金融表格数据")
35
+ async def query(
36
+ query_input: Annotated[
37
+ str, Field(description="查询条件。支持复杂查询,如:`2024年涨幅最大的100只股票按市值排名`")],
38
+ query_type: Annotated[QueryType, Field(default=QueryType.CNStock,
39
+ description="查询类型。支持`A股`、`指数`、`基金`、`港股`、`美股`等")],
40
+ max_page: Annotated[int, Field(default=1, ge=1, le=10, description="最大页数。只查第一页即可")],
41
+ site: Annotated[Site, Field(default=Site.THS, description="站点。支持`东方财富`、`通达信`、`同花顺`")]
42
+ ) -> str:
43
+ return await qsv.query(query_input, query_type, max_page, site)
44
+
45
+
46
+ def serve(format, cdp_port, browser_path, transport, mcp_host, mcp_port):
47
+ qsv.format = format
48
+ qsv.port = cdp_port
49
+ qsv.browser_path = browser_path
50
+ logger.info("serve:{},{},{},{}", qsv.format, qsv.port, qsv.browser_path, transport)
51
+ if transport == 'sse':
52
+ logger.info("mcp:{},{}:{}", transport, mcp_host, mcp_port)
53
+
54
+ mcp.settings.host = mcp_host
55
+ mcp.settings.port = mcp_port
56
+ mcp.run(transport=transport)
File without changes
@@ -0,0 +1,144 @@
1
+ """
2
+ 东方财富 条件选股
3
+ https://xuangu.eastmoney.com/
4
+
5
+ 1. 部分数据中包含中文单位,如万亿等,导致无法转换为数字,如VOLUME
6
+ 2. 东财翻页需要提前手工登录
7
+ 3. 东财翻页是页面已经翻了,然后等数据来更新,懒加载
8
+ """
9
+ import re
10
+
11
+ import pandas as pd
12
+ from loguru import logger
13
+ from playwright.async_api import Page
14
+
15
+ from mcp_query_table.enums import QueryType
16
+
17
+ # 查询结果
18
+ # 'https://np-pick-b.eastmoney.com/api/smart-tag/stock/v3/pw/search-code'
19
+ # 'https://np-pick-b.eastmoney.com/api/smart-tag/fund/v3/pw/search-code'
20
+ # 'https://np-pick-b.eastmoney.com/api/smart-tag/hk/v3/pw/search-code'
21
+ # 'https://np-pick-b.eastmoney.com/api/smart-tag/cb/v3/pw/search-code'
22
+ # 'https://np-pick-b.eastmoney.com/api/smart-tag/etf/v3/pw/search-code'
23
+ # 'https://np-pick-b.eastmoney.com/api/smart-tag/bkc/v3/pw/search-code'
24
+ # 'https://np-tjxg-b.eastmoney.com/api/smart-tag/bkc/v3/pw/search-code'
25
+ _PAGE1_ = 'https://*.eastmoney.com/api/smart-tag/*/v3/pw/search-code'
26
+
27
+ _type_ = {
28
+ QueryType.CNStock: 'stock',
29
+ QueryType.Fund: 'fund',
30
+ QueryType.HKStock: 'hk',
31
+ QueryType.ConBond: 'cb',
32
+ QueryType.ETF: 'etf',
33
+ QueryType.Board: 'bk', # 比较坑,bkc和bkc的区别
34
+ }
35
+
36
+
37
+ def convert_type(type):
38
+ if type == 'Double':
39
+ return float
40
+ if type == 'String':
41
+ return str
42
+ if type == 'Long':
43
+ return int
44
+ if type == 'Boolean':
45
+ return bool
46
+ if type == 'INT': # TODO 好像未出现过
47
+ return int
48
+ return type
49
+
50
+
51
+ class Pagination:
52
+ def __init__(self):
53
+ self.datas = {}
54
+ self.pageNo = 1
55
+ self.pageSize = 100
56
+ self.total = 1024
57
+ self.columns = []
58
+ self.datas = {}
59
+
60
+ def reset(self):
61
+ self.datas = {}
62
+
63
+ def update(self, pageNo, pageSize, total, columns, dataList):
64
+ self.pageNo = pageNo
65
+ self.pageSize = pageSize
66
+ self.total = total
67
+ self.columns = columns
68
+ self.datas[self.pageNo] = dataList
69
+
70
+ def has_next(self, max_page):
71
+ c1 = self.pageNo * self.pageSize < self.total
72
+ c2 = self.pageNo < max_page
73
+ return c1 & c2
74
+
75
+ def current(self):
76
+ return self.pageNo
77
+
78
+ def get_list(self):
79
+ datas = []
80
+ for k, v in self.datas.items():
81
+ datas.extend(v)
82
+ return datas
83
+
84
+ def get_dataframe(self):
85
+ columns = {x['key']: x['title'] for x in self.columns}
86
+ dtypes = {x['key']: convert_type(x['dataType']) for x in self.columns}
87
+
88
+ df = pd.DataFrame(self.get_list())
89
+ for k, v in dtypes.items():
90
+ if k == 'SERIAL':
91
+ df[k] = df[k].astype(int)
92
+ continue
93
+ if isinstance(v, str):
94
+ logger.info("未识别的数据类型 {}:{}", k, v)
95
+ continue
96
+ try:
97
+ df[k] = df[k].astype(v)
98
+ except ValueError:
99
+ logger.info("转换失败 {}:{}", k, v)
100
+
101
+ return df.rename(columns=columns)
102
+
103
+
104
+ P = Pagination()
105
+
106
+
107
+ def search_code(json_data):
108
+ total = json_data['data']['result']['total']
109
+ columns = json_data['data']['result']['columns']
110
+ dataList = json_data['data']['result']['dataList']
111
+ return total, columns, dataList
112
+
113
+
114
+ async def on_response(response):
115
+ post_data_json = response.request.post_data_json
116
+ pageNo = post_data_json['pageNo']
117
+ pageSize = post_data_json['pageSize']
118
+ P.update(pageNo, pageSize, *search_code(await response.json()))
119
+
120
+
121
+ async def query(page: Page,
122
+ q: str = "收盘价>100元",
123
+ type_: QueryType = 'stock',
124
+ max_page: int = 5) -> pd.DataFrame:
125
+ type = _type_.get(type_, None)
126
+ assert type is not None, f"不支持的类型:{type_}"
127
+
128
+ await page.route(re.compile(r'.*\.(?:jpg|jpeg|png|gif|webp)(?:$|\?)'), lambda route: route.abort())
129
+
130
+ P.reset()
131
+ async with page.expect_response(_PAGE1_) as response_info:
132
+ # 这里不用处理输入编码问题
133
+ await page.goto(f"https://xuangu.eastmoney.com/Result?q={q}&type={type}", wait_until="load")
134
+ await on_response(await response_info.value)
135
+
136
+ while P.has_next(max_page):
137
+ logger.info("当前页为:{}, 点击`下一页`", P.current())
138
+
139
+ # 这种写法解决了懒加载问题
140
+ async with page.expect_response(_PAGE1_) as response_info:
141
+ await page.get_by_role("button", name="下一页").click()
142
+ await on_response(await response_info.value)
143
+
144
+ return P.get_dataframe()
@@ -0,0 +1,171 @@
1
+ """
2
+ 同花顺 i问财
3
+ https://www.iwencai.com/
4
+
5
+ 1. 一定要保证浏览器宽度>768,防止界面变成适应手机
6
+
7
+ """
8
+ import re
9
+
10
+ import pandas as pd
11
+ from loguru import logger
12
+ from playwright.async_api import Page
13
+
14
+ from mcp_query_table.enums import QueryType
15
+
16
+ # 初次查询页面
17
+ _PAGE1_ = 'https://www.iwencai.com/customized/chart/get-robot-data'
18
+ # 翻页
19
+ _PAGE2_ = 'https://www.iwencai.com/gateway/urp/v7/landing/getDataList'
20
+
21
+ _querytype_ = {
22
+ QueryType.CNStock: 'stock',
23
+ QueryType.Index: 'zhishu',
24
+ QueryType.Fund: 'fund',
25
+ QueryType.HKStock: 'hkstock',
26
+ QueryType.USStock: 'usstock',
27
+ '新三板': 'threeboard',
28
+ QueryType.ConBond: 'conbond',
29
+ '保险': 'insurance',
30
+ '期货': 'futures',
31
+ '理财': 'lccp',
32
+ '外汇': 'foreign_exchange',
33
+ '宏观': 'macro',
34
+ #
35
+ QueryType.ETF: 'fund', # 查ETF定位到基金
36
+ }
37
+
38
+
39
+ def convert_type(type):
40
+ if type == 'LONG':
41
+ return int
42
+ if type == 'DOUBLE':
43
+ return float
44
+ if type == 'STR':
45
+ return str
46
+ if type == 'INT': # TODO 好像未出现过
47
+ return int
48
+ return type
49
+
50
+
51
+ class Pagination:
52
+ def __init__(self):
53
+ self.datas = {}
54
+ self.limit = 100
55
+ self.page = 1
56
+ self.row_count = 1024
57
+ self.columns = []
58
+
59
+ def reset(self):
60
+ self.datas = {}
61
+
62
+ def update(self, datas, columns, page, limit, row_count):
63
+ self.datas[page] = datas
64
+ self.columns = columns
65
+ self.limit = limit
66
+ self.page = page
67
+ self.row_count = row_count
68
+
69
+ def has_next(self, max_page):
70
+ c1 = self.page * self.limit < self.row_count
71
+ c2 = self.page < max_page
72
+ return c1 & c2
73
+
74
+ def current(self):
75
+ return self.page
76
+
77
+ def get_list(self):
78
+ datas = []
79
+ for k, v in self.datas.items():
80
+ datas.extend(v)
81
+ return datas
82
+
83
+ def get_dataframe(self):
84
+ columns = {x['key']: x['index_name'] for x in self.columns}
85
+ dtypes = {x['key']: convert_type(x['type']) for x in self.columns}
86
+
87
+ df = pd.DataFrame(self.get_list())
88
+ for k, v in dtypes.items():
89
+ if isinstance(v, str):
90
+ logger.info("未识别的数据类型 {}:{}", k, v)
91
+ continue
92
+ try:
93
+ df[k] = df[k].astype(v)
94
+ except ValueError:
95
+ logger.info("转换失败 {}:{}", k, v)
96
+
97
+ return df.rename(columns=columns)
98
+
99
+
100
+ P = Pagination()
101
+
102
+
103
+ def get_robot_data(json_data):
104
+ """
105
+ json_data['data']['answer'][0]['txt'][0]['content']['components'][0]['data']['datas']
106
+ json_data['data']['answer'][0]['txt'][0]['content']['components'][0]['data']['meta']['limit'] 100
107
+ json_data['data']['answer'][0]['txt'][0]['content']['components'][0]['data']['meta']['page'] 1
108
+ json_data['data']['answer'][0]['txt'][0]['content']['components'][0]['data']['meta']['extra']['row_count'] 1364
109
+ """
110
+ _1 = json_data['data']['answer'][0]['txt'][0]['content']['components'][0]['data']
111
+ _2 = _1['meta']
112
+
113
+ datas = _1['datas']
114
+ columns = _1['columns']
115
+ page = _2['page']
116
+ limit = _2['limit']
117
+ row_count = _2['extra']['row_count']
118
+
119
+ return datas, columns, page, limit, row_count
120
+
121
+
122
+ def getDataList(json_data):
123
+ """
124
+ json_data['answer']['components'][0]['data']['datas']
125
+ json_data['answer']['components'][0]['data']['meta']['page']
126
+ json_data['answer']['components'][0]['data']['meta']['limit']
127
+ json_data['answer']['components'][0]['data']['meta']['extra']['row_count']
128
+ """
129
+ _1 = json_data['answer']['components'][0]['data']
130
+ _2 = _1['meta']
131
+
132
+ datas = _1['datas']
133
+ columns = _1['columns']
134
+ page = _2['page']
135
+ limit = _2['limit']
136
+ row_count = _2['extra']['row_count']
137
+
138
+ return datas, columns, int(page), int(limit), row_count
139
+
140
+
141
+ async def on_response(response):
142
+ if response.url == _PAGE1_:
143
+ P.update(*get_robot_data(await response.json()))
144
+ if response.url == _PAGE2_:
145
+ P.update(*getDataList(await response.json()))
146
+
147
+
148
+ async def query(page: Page,
149
+ w: str = "收盘价>1000元",
150
+ type_: QueryType = 'stock',
151
+ max_page: int = 5) -> pd.DataFrame:
152
+ querytype = _querytype_.get(type_, None)
153
+ assert querytype is not None, f"不支持的类型:{type_}"
154
+
155
+ await page.route(re.compile(r'.*\.(?:jpg|jpeg|png|gif|webp)(?:$|\?)'), lambda route: route.abort())
156
+
157
+ P.reset()
158
+ # page.viewport_size # 取出来是None
159
+ # 宽度<=768会认为是手机,>768是PC
160
+ await page.set_viewport_size({"width": 1280, "height": 800})
161
+ async with page.expect_response(_PAGE1_) as response_info:
162
+ await page.goto(f"https://www.iwencai.com/unifiedwap/result?w={w}&querytype={querytype}", wait_until="load")
163
+ await on_response(await response_info.value)
164
+
165
+ while P.has_next(max_page):
166
+ logger.info("当前页为:{}, 点击`下页`", P.current())
167
+ async with page.expect_response(_PAGE2_) as response_info:
168
+ await page.get_by_text("下页").click()
169
+ await on_response(await response_info.value)
170
+
171
+ return P.get_dataframe()
@@ -0,0 +1,150 @@
1
+ """
2
+ 通达信 小达
3
+ https://wenda.tdx.com.cn/
4
+ """
5
+ import math
6
+ import re
7
+
8
+ import pandas as pd
9
+ from loguru import logger
10
+ from playwright.async_api import Page
11
+
12
+ from mcp_query_table.enums import QueryType
13
+
14
+ # 查询结果
15
+ _PAGE1_ = 'https://wenda.tdx.com.cn/TQL?Entry=NLPSE.NLPQuery'
16
+ # 代码数量
17
+ _PAGE2_ = 'https://wenda.tdx.com.cn/TQL?Entry=JNLPSE.getAllCode'
18
+
19
+ _queryType_ = {
20
+ QueryType.CNStock: 'AG',
21
+ QueryType.Fund: 'JJ',
22
+ QueryType.Index: 'ZS',
23
+ QueryType.Info: 'ZX',
24
+ QueryType.Board: 'ZX', # 板块也走指数
25
+ }
26
+
27
+
28
+ def convert_type(type):
29
+ if type == '':
30
+ return str
31
+ if type == '0|0|0':
32
+ return str
33
+ if type == '2|0|0':
34
+ return float
35
+ if type == '0|9|1':
36
+ return float
37
+ if type == '1|9|1':
38
+ return float
39
+ if type == '2|9|1':
40
+ return float
41
+ return type
42
+
43
+
44
+ class Pagination:
45
+ def __init__(self):
46
+ self.datas = {}
47
+ self.last_count = 1
48
+ self.limit = 100
49
+ self.row_count = 1024
50
+ self.dtypes = []
51
+ self.columns = []
52
+
53
+ def reset(self):
54
+ self.datas = {}
55
+
56
+ def update_row_count(self, row_count):
57
+ self.row_count = row_count
58
+
59
+ def update_last_count(self, limit, last_count, columns, dtypes, datas):
60
+ self.limit = limit
61
+ self.last_count = last_count
62
+ self.columns = columns
63
+ self.dtypes = dtypes
64
+ self.datas[last_count] = datas
65
+
66
+ def has_next(self, max_page):
67
+ page = math.ceil(self.last_count / self.limit)
68
+ c1 = self.last_count < self.row_count
69
+ c2 = page < max_page
70
+ return c1 & c2
71
+
72
+ def current(self):
73
+ return self.last_count
74
+
75
+ def get_list(self):
76
+ datas = []
77
+ for k, v in self.datas.items():
78
+ datas.extend(v)
79
+ return datas
80
+
81
+ def get_dataframe(self):
82
+ dtypes = [convert_type(x) for x in self.dtypes]
83
+ df = pd.DataFrame(self.get_list(), columns=self.columns)
84
+ for i, v in enumerate(dtypes):
85
+ k = self.columns[i]
86
+ if k == 'POS':
87
+ df[k] = df[k].astype(int)
88
+ continue
89
+ if isinstance(v, str):
90
+ logger.info("未识别的数据类型 {}:{}", k, v)
91
+ continue
92
+ try:
93
+ df[k] = df[k].astype(v)
94
+ except ValueError:
95
+ logger.info("转换失败 {}:{}", k, v)
96
+ return df
97
+
98
+
99
+ P = Pagination()
100
+
101
+
102
+ def NLPQuery(json_data):
103
+ limit = json_data[0][2]
104
+ last_count = int(json_data[0][4])
105
+ columns = json_data[1]
106
+ dtypes = json_data[2]
107
+ datas = json_data[3:]
108
+
109
+ return limit, last_count, columns, dtypes, datas
110
+
111
+
112
+ def getAllCode(json_data):
113
+ row_count = json_data[0][2]
114
+
115
+ return row_count
116
+
117
+
118
+ async def on_response1(response):
119
+ if response.url.startswith(_PAGE1_):
120
+ P.update_last_count(*NLPQuery(await response.json()))
121
+
122
+
123
+ async def on_response2(response):
124
+ if response.url.startswith(_PAGE2_):
125
+ P.update_row_count(getAllCode(await response.json()))
126
+
127
+
128
+ async def query(page: Page,
129
+ message: str = "收盘价>100元",
130
+ type_: QueryType = 'AG',
131
+ max_page: int = 5) -> pd.DataFrame:
132
+ queryType = _queryType_.get(type_, None)
133
+ assert queryType is not None, f"不支持的类型:{type_}"
134
+
135
+ await page.route(re.compile(r'.*\.(?:jpg|jpeg|png|gif|webp)(?:$|\?)'), lambda route: route.abort())
136
+ page.on("response", on_response2)
137
+
138
+ P.reset()
139
+ async with page.expect_response(lambda response: response.url.startswith(_PAGE1_)) as response_info:
140
+ await page.goto(f"https://wenda.tdx.com.cn/site/wenda/stock_index.html?message={message}&queryType={queryType}",
141
+ wait_until="load")
142
+ await on_response1(await response_info.value)
143
+
144
+ while P.has_next(max_page):
145
+ logger.info("当前序号为:{}, 点击`下一页`", P.current())
146
+ async with page.expect_response(lambda response: response.url.startswith(_PAGE1_)) as response_info:
147
+ await page.get_by_role("button", name="下一页").click()
148
+ await on_response1(await response_info.value)
149
+
150
+ return P.get_dataframe()
@@ -0,0 +1,165 @@
1
+ import subprocess
2
+ import time
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ import pandas as pd
7
+ from loguru import logger
8
+ from playwright.async_api import async_playwright, Playwright, Page
9
+
10
+ from mcp_query_table.enums import QueryType, Site
11
+
12
+
13
+ class BrowserManager:
14
+ async def __aenter__(self):
15
+ return self
16
+
17
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
18
+ await self.cleanup()
19
+
20
+ def __init__(self, port: int = 9222, browser_path: Optional[str] = None, debug: bool = False):
21
+ """
22
+
23
+ Parameters
24
+ ----------
25
+ port:int
26
+ 浏览器调试端口
27
+ browser_path
28
+ 浏览器可执行路径。推荐使用chrome,因为Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
29
+ debug:bool
30
+ 是否显示开发者工具
31
+
32
+ """
33
+ if browser_path is None:
34
+ browser_path = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
35
+ if not Path(browser_path).exists():
36
+ # Microsoft Edge必须在任务管理器中完全退出才能启动调试端口
37
+ browser_path = r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe'
38
+ if not Path(browser_path).exists():
39
+ raise ValueError("未找到浏览器可执行文件")
40
+
41
+ self.port = port
42
+ self.browser_path = browser_path
43
+ self.debug = debug
44
+
45
+ self.playwright: Optional[Playwright] = None
46
+ self.browser = None
47
+ self.context = None
48
+ # 空闲page池
49
+ self.pages = []
50
+
51
+ async def cleanup(self):
52
+ if self.browser:
53
+ await self.browser.close()
54
+ if self.playwright:
55
+ await self.playwright.stop()
56
+
57
+ async def _launch(self) -> None:
58
+ """启动浏览器,并连接CDP协议
59
+
60
+ References
61
+ ----------
62
+ https://blog.csdn.net/qq_30576521/article/details/142370538
63
+
64
+ """
65
+ self.playwright = await async_playwright().start()
66
+
67
+ try:
68
+ # 尝试连接已打开的浏览器
69
+ self.browser = await self.playwright.chromium.connect_over_cdp(f"http://127.0.0.1:{self.port}",
70
+ slow_mo=1000,
71
+ timeout=5000)
72
+ except:
73
+
74
+ # 执行完成后不会关闭浏览器
75
+ if self.debug:
76
+ command = f'"{self.browser_path}" --remote-debugging-port={self.port} --start-maximized --auto-open-devtools-for-tabs'
77
+ else:
78
+ command = f'"{self.browser_path}" --remote-debugging-port={self.port} --start-maximized'
79
+ logger.info(f"start browser:{command}")
80
+ subprocess.Popen(command, shell=True)
81
+ time.sleep(3)
82
+
83
+ try:
84
+ self.browser = await self.playwright.chromium.connect_over_cdp(f"http://127.0.0.1:{self.port}",
85
+ slow_mo=1000,
86
+ timeout=5000)
87
+ except:
88
+ logger.warning("是否提前打开了浏览器,但未开启远程调试端口?请关闭浏览器全部进程后重试")
89
+ raise
90
+
91
+ self.context = self.browser.contexts[0]
92
+ # 复用打开的page
93
+ for page in self.context.pages:
94
+ # 防止开发者工具被使用
95
+ if page.url.startswith("devtools://"):
96
+ continue
97
+ self.pages.append(page)
98
+
99
+ async def _try_launch(self) -> None:
100
+ if self.browser is None:
101
+ await self._launch()
102
+ if not self.browser.is_connected():
103
+ await self._launch()
104
+
105
+ async def get_page(self) -> Page:
106
+ """获取可用Page。无空闲标签时会打开新标签"""
107
+ await self._try_launch()
108
+
109
+ # 反复取第一个tab
110
+ while len(self.pages) > 0:
111
+ page = self.pages.pop()
112
+ if page.is_closed():
113
+ continue
114
+ return page
115
+
116
+ # 不够,新建一个
117
+ return await self.context.new_page()
118
+
119
+ def release_page(self, page) -> None:
120
+ """用完的Page释放到池中。如果用完不放回,get_page会一直打开新标签"""
121
+ if page.is_closed():
122
+ return
123
+ # 放回
124
+ self.pages.append(page)
125
+
126
+
127
+ async def query(
128
+ page: Page,
129
+ query_input: str = "收盘价>100元",
130
+ query_type: QueryType = QueryType.CNStock,
131
+ max_page: int = 5,
132
+ site: Site = Site.THS) -> pd.DataFrame:
133
+ """查询表格
134
+
135
+ Parameters
136
+ ----------
137
+ page : playwright.sync_api.Page
138
+ 页面
139
+ query_input : str, optional
140
+ 查询条件, by default "收盘价>100元"
141
+ query_type : QueryType, optional
142
+ 查询类型, by default QueryType.astock
143
+ max_page : int, optional
144
+ 最大页数, by default 5
145
+ site : Site, optional
146
+ 站点, by default Site.iwencai
147
+
148
+ Returns
149
+ -------
150
+ pd.DataFrame
151
+ 查询结果
152
+
153
+ """
154
+
155
+ if site == Site.EastMoney:
156
+ from mcp_query_table.sites.eastmoney import query
157
+ return await query(page, query_input, query_type, max_page)
158
+ if site == Site.THS:
159
+ from mcp_query_table.sites.iwencai import query
160
+ return await query(page, query_input, query_type, max_page)
161
+ if site == Site.TDX:
162
+ from mcp_query_table.sites.tdx import query
163
+ return await query(page, query_input, query_type, max_page)
164
+
165
+ raise ValueError(f"未支持的站点:{site}")
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp_query_table
3
+ Version: 0.2.6
4
+ Summary: query table from website, support MCP
5
+ Author-email: wukan <wu-kan@163.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 伍侃
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Keywords: playwright,mcp,table,iwencai,tdx,eastmoney
29
+ Classifier: Development Status :: 4 - Beta
30
+ Classifier: Programming Language :: Python
31
+ Requires-Python: >=3.10
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: pandas
35
+ Requires-Dist: loguru
36
+ Requires-Dist: playwright
37
+ Requires-Dist: mcp
38
+ Dynamic: license-file
39
+
40
+ # mcp_query_table
41
+
42
+ 基于`playwright`实现的财经网页表格爬虫,支持`Model Context Protocol (MCP) `。目前可查询来源为
43
+
44
+ - [同花顺i问财](http://iwencai.com/)
45
+ - [通达信问小达](https://wenda.tdx.com.cn/)
46
+ - [东方财富条件选股](https://xuangu.eastmoney.com/)
47
+
48
+ 实盘时,如果某网站宕机或改版,可以立即切换到其他网站。(注意:不同网站的表格结构不同,需要提前做适配)
49
+
50
+ ## 安装
51
+
52
+ ```commandline
53
+ pip install -i https://pypi.org/simple --upgrade mcp_query_table
54
+ pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade mcp_query_table
55
+ ```
56
+
57
+ ## 使用
58
+
59
+ ```python
60
+ import asyncio
61
+
62
+ from mcp_query_table import *
63
+
64
+
65
+ async def main() -> None:
66
+ async with BrowserManager(port=9222, browser_path=None, debug=True) as bm:
67
+ # 问财需要保证浏览器宽度>768,防止界面变成适应手机
68
+ page = await bm.get_page()
69
+ df = await query(page, '收益最好的200只ETF', query_type=QueryType.ETF, max_page=1, site=Site.THS)
70
+ print(df.to_markdown())
71
+ df = await query(page, '年初至今收益率前50', query_type=QueryType.Fund, max_page=1, site=Site.TDX)
72
+ print(df.to_csv())
73
+ df = await query(page, '流通市值前10的行业板块', query_type=QueryType.Index, max_page=1, site=Site.TDX)
74
+ print(df.to_csv())
75
+ # TODO 东财翻页要提前登录
76
+ df = await query(page, '今日涨幅前5的概念板块;', query_type=QueryType.Board, max_page=3, site=Site.EastMoney)
77
+ print(df)
78
+ bm.release_page(page)
79
+ print('done')
80
+ await page.wait_for_timeout(2000)
81
+
82
+
83
+ if __name__ == '__main__':
84
+ asyncio.run(main())
85
+
86
+ ```
87
+
88
+ ## 注意事项
89
+
90
+ 1. 浏览器最好是`Chrome`。如一定要使用`Edge`,除了关闭`Edge`所有窗口外,还要在任务管理器关闭`Microsoft Edge`
91
+ 的所有进程,即`taskkill /f /im msedge.exe`
92
+ 2. 浏览器要保证窗口宽度,防止部分网站自动适配成手机版,导致表格查询失败
93
+ 3. 如有网站账号,请提前登录。此工具无自动登录功能
94
+ 4. 不同网站的表格结构不同,同条件返回股票数量也不同。需要查询后做适配
95
+
96
+ ## 工作原理
97
+
98
+ 不同于`requests`,`playwright`是基于浏览器的,模拟用户在浏览器中的操作。
99
+
100
+ 1. 不需要解决登录问题
101
+ 2. 不需要解决请求构造、响应解析
102
+ 3. 可以直接获取表格数据,所见即所得
103
+ 4. 运行速度慢于`requests`,但开发效率高
104
+
105
+ 数据的获取有:
106
+
107
+ 1. 直接解析HTML表格
108
+ 1. 数字文本化了,不利于后期研究
109
+ 2. 适用性最强
110
+ 2. 截获请求,获取返回的`json`数据
111
+ 1. 类似于`requests`,需要做响应解析
112
+ 2. 灵活性差点,网站改版后,需要重新做适配
113
+
114
+ 此项目采用的是模拟点击浏览器来发送请求,使用截获响应并解析的方法来获取数据。
115
+
116
+ 后期会根据不同的网站改版情况,使用更适合的方法。
117
+
118
+ ## MCP支持
119
+
120
+ 确保可以在控制台中执行`python -m mcp_query_table -h`。如果不能,可能要先`pip install mcp_query_table`
121
+
122
+ 在`Cline`中可以配置如下。其中`command`是`python`的绝对路径,`browser_path`是`Chrome`的绝对路径。
123
+
124
+ ### STDIO方式
125
+
126
+ ```json
127
+ {
128
+ "mcpServers": {
129
+ "mcp_query_table": {
130
+ "command": "D:\\Users\\Kan\\miniconda3\\envs\\py312\\python.exe",
131
+ "args": [
132
+ "-m",
133
+ "mcp_query_table",
134
+ "--format",
135
+ "markdown",
136
+ "--browser_path",
137
+ "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
138
+ ]
139
+ }
140
+ }
141
+ }
142
+ ```
143
+
144
+ ### SSE方式
145
+
146
+ 先在控制台中执行如下命令,启动`MCP`服务
147
+
148
+ ```commandline
149
+ python -m mcp_query_table --format markdown --browser_path "C:\Program Files\Google\Chrome\Application\chrome.exe" --transport sse --mcp_port 8000
150
+ ```
151
+
152
+ 然后就可以连接到`MCP`服务了
153
+ http://localhost:8000/sse
154
+
155
+ ## 使用`MCP Inspector`进行调试
156
+
157
+ ```commandline
158
+ npx @modelcontextprotocol/inspector python -m mcp_query_table --format markdown
159
+ ```
160
+
161
+ 打开浏览器并翻页是一个比较耗时的操作,会导致`MCP Inspector`页面超时,可以`http://localhost:5173/?timeout=60000` 表示超时时间为60秒
162
+
163
+ 第一次尝试编写`MCP`项目,可能会有各种问题,欢迎大家交流。
164
+
165
+ ## `MCP`使用技巧
166
+
167
+ 1. 2024年涨幅最大的100只股票按2024年12月31日总市值排名。三个网站的结果都不一样
168
+ - 同花顺:显示了2201只股票。前5个是工商银行、农业银行、中国移动、中国石油、建设银行
169
+ - 通达信:显示了100只股票,前5个是寒武纪、正丹股份,汇金科技、万丰奥威、艾融软件
170
+ - 东方财富:显示了100只股票,前5个是海光信息、寒武纪、光启技术、润泽科技、新易盛
171
+
172
+ 2. 大语言模型对问题拆分能力弱,所以要能合理的提问,保证查询条件不会被改动。以下推荐第2、3种
173
+ - 2024年涨幅最大的100只股票按2024年12月31日总市值排名
174
+ > 大语言模型非常有可能拆分这句,导致一步查询被分成了多步查询
175
+ - 向东方财富查询“2024年涨幅最大的100只股票按2024年12月31日总市值排名”
176
+ > 用引号括起来,避免被拆分
177
+ - 向东方财富板块查询 “去年涨的最差的行业板块”,再查询此板块中去年涨的最好的5只股票
178
+ > 分成两步查询,先查询板块,再查询股票。但最好不要全自动,因为第一步的结果它不理解“今日涨幅”和“区间涨幅”,需要交互修正
179
+
180
+ ## 参考
181
+
182
+ - [Playwright](https://playwright.dev/python/docs/intro)
183
+ - [Selenium webdriver无法附加到edge实例,edge的--remote-debugging-port选项无效](https://blog.csdn.net/qq_30576521/article/details/142370538)
@@ -0,0 +1,15 @@
1
+ mcp_query_table/__init__.py,sha256=nHQOsX_pO9mwyq4a77irNpgvIXmc8NArQnj5-nEMW6k,110
2
+ mcp_query_table/__main__.py,sha256=SmOaVfIDX4dcF-SV_xcn-Sfn3io-XunsdwKPnGfzwAU,1150
3
+ mcp_query_table/_version.py,sha256=Oz5HbwHMyE87nmwV80AZzpkJPf-wBg7eDuJr_BXZkhU,22
4
+ mcp_query_table/enums.py,sha256=gVioM6lyI6lGfBBS9KY2IxCh4QKdLVAqXAgo19aAg6U,446
5
+ mcp_query_table/server.py,sha256=Hi9qUMH9vvhLwcgsnCGORjQB9Ac_N-SM0a-hcC2QB8w,2375
6
+ mcp_query_table/tool.py,sha256=_PjyjsgjDvwAGKVFv4W-Olim_qB7jGpg4CZxnsXzNDk,5641
7
+ mcp_query_table/sites/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ mcp_query_table/sites/eastmoney.py,sha256=LImjpYVuM5YnXwnNzB2hkKfHofocZZScetGqMOCHZpk,4477
9
+ mcp_query_table/sites/iwencai.py,sha256=g56pj3pbxu4mXLNnaaS3Hdx-DvEy_9OBrQJe26z4z08,5059
10
+ mcp_query_table/sites/tdx.py,sha256=RIUQaB7Tn4AVyWaevk9SzTKIDwVO2f9erIlI-adXPLY,4126
11
+ mcp_query_table-0.2.6.dist-info/licenses/LICENSE,sha256=rbvv_CTd7biGwT21tvhgQ2zkbPFXOoON7WFQWEdElBA,1063
12
+ mcp_query_table-0.2.6.dist-info/METADATA,sha256=gmVXsQZi17_AAJi9StVSPpIpcrrdNwj51OOVFdne7Ww,7490
13
+ mcp_query_table-0.2.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
14
+ mcp_query_table-0.2.6.dist-info/top_level.txt,sha256=5M_8dkO1USOX7_EWbWS6O_TEsZ5yo-AodFNKeUEgvEQ,16
15
+ mcp_query_table-0.2.6.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (78.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 伍侃
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ mcp_query_table