mm-qa-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ """
2
+ coding:utf-8
3
+ @Software: PyCharm
4
+ @Time: 2025/4/27 18:18
5
+ @Author: xingyun
6
+ """
@@ -0,0 +1,298 @@
1
+ """
2
+ coding:utf-8
3
+ @Software: PyCharm
4
+ @Time: 2025/4/27 18:19
5
+ @Author: xingyun
6
+ """
7
+ import json
8
+ import logging
9
+ import requests
10
+ import os
11
+
12
+ from minimax_qa_mcp.utils.utils import Utils
13
+
14
+ # 设置日志
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
+ logger = logging.getLogger('知识库检索')
17
+
18
+ # ===== Weaviate参数配置 =====
19
+ os.environ["WEAVIATE_GRPC_ENABLED"] = "False" # 禁用gRPC,使用HTTP协议
20
+ HTTP_HOST = Utils.get_conf("weaviate_url", "url")
21
+ HTTP_PORT = Utils.get_conf("weaviate_url", "port")
22
+ COLLECTION_NAME = "BusinessDocs_Auto"
23
+
24
+
25
+ class GetWeaviateInfo:
26
+ """
27
+ 获取Weaviate信息的类
28
+ """
29
+
30
+ def __init__(self, input_question, is_need_module: bool = False):
31
+ """
32
+ 初始化Weaviate信息获取器
33
+ :param input_question: 用户输入
34
+ :param is_need_module: 是否调用模型分析
35
+ """
36
+ self.input_question = input_question
37
+ self.is_need_module = is_need_module
38
+ # 模型API配置
39
+ self.api_url = Utils.get_conf("generator_case_conf", "module_api_url")
40
+ self.timeout = 120 # 设置超时时间
41
+
42
+ # 懒加载Weaviate客户端
43
+ self.client = None
44
+ self._init_weaviate_client()
45
+
46
+ def _init_weaviate_client(self):
47
+ """
48
+ 初始化Weaviate客户端,延迟导入以避免循环导入问题
49
+ 使用Weaviate 3.x版本的API
50
+ """
51
+ try:
52
+ # 在方法内部延迟导入weaviate和其他需要的模块
53
+ import weaviate
54
+ from urllib.parse import urlparse
55
+
56
+ # 获取主机地址,确保不重复http://
57
+ http_host_value = HTTP_HOST
58
+ if http_host_value and (http_host_value.startswith('http://') or http_host_value.startswith('https://')):
59
+ # 如果配置已经包含协议,则提取主机部分
60
+ parsed_url = urlparse(http_host_value)
61
+ http_host_value = parsed_url.netloc
62
+ logger.info(f"从URL '{HTTP_HOST}'中提取主机部分: '{http_host_value}'")
63
+
64
+ logger.info(f"尝试连接到Weaviate服务器: {http_host_value}:{HTTP_PORT}")
65
+
66
+ # 使用Weaviate 3.x的客户端连接方式
67
+ self.client = weaviate.Client(f"http://{http_host_value}:{HTTP_PORT}")
68
+
69
+ # 检查连接
70
+ self.client.is_ready()
71
+ logger.info("Weaviate客户端初始化成功")
72
+ except Exception as e:
73
+ logger.error(f"Weaviate客户端初始化失败: {e}")
74
+ raise
75
+
76
+ def get_info(self):
77
+ """
78
+ 获取Weaviate信息
79
+ :return: Weaviate信息字典
80
+ """
81
+ try:
82
+ # 使用3.x版本的方式获取元数据
83
+ meta = self.client.get_meta()
84
+
85
+ info = {
86
+ "version": meta.get("version", "未知"),
87
+ "schema": self.client.schema.get(),
88
+ "status": "已连接" if self.client.is_ready() else "未连接"
89
+ }
90
+ return info
91
+ except Exception as e:
92
+ logger.error(f"获取Weaviate信息失败: {e}")
93
+ return {
94
+ "version": "未知",
95
+ "schema": {},
96
+ "status": "错误",
97
+ "error": str(e)
98
+ }
99
+
100
+ def search_documents(self, limit=5):
101
+ """
102
+ 基于相似度的语义搜索
103
+
104
+ Args:
105
+ limit: 返回结果数量
106
+
107
+ Returns:
108
+ 包含查询结果的字典列表
109
+ """
110
+ try:
111
+ # 使用Weaviate 3.x版本的查询方式
112
+ properties = [
113
+ "title", "summary", "content", "category", "doc_id", "file_path", "doc_type",
114
+ "submitter", "business_tags" # 添加其他可能的属性字段,但不包含时间字段
115
+ ]
116
+
117
+ # 创建查询构建器
118
+ query_builder = (
119
+ self.client.query
120
+ .get(COLLECTION_NAME, properties)
121
+ .with_limit(limit)
122
+ .with_near_text({"concepts": [self.input_question]})
123
+ .with_additional(["certainty"])
124
+ )
125
+
126
+ # 执行查询
127
+ results = query_builder.do()
128
+
129
+ # 处理结果
130
+ data = results.get("data", {}).get("Get", {}).get(COLLECTION_NAME, [])
131
+ logger.info(f"搜索到 {len(data)} 个结果")
132
+
133
+ processed_results = []
134
+ for item in data:
135
+ result = dict(item)
136
+
137
+ # 添加相似度分数
138
+ if "_additional" in item and "certainty" in item["_additional"]:
139
+ result["similarity"] = item["_additional"]["certainty"]
140
+ del result["_additional"]
141
+
142
+ processed_results.append(result)
143
+
144
+ return processed_results
145
+
146
+ except Exception as e:
147
+ logger.error(f"搜索失败: {e}")
148
+ return []
149
+
150
+ def call_model(self, content_list):
151
+ """调用模型API,添加了重试机制
152
+
153
+ Args:
154
+ content_list: 待总结的内容列表
155
+
156
+ Returns:
157
+ 模型返回的结果
158
+ """
159
+ # 构建给模型的输入
160
+ prompt = f"请对以下文档内容进行总结,提取关键信息:\n\n"
161
+ for i, doc in enumerate(content_list):
162
+ prompt += f"文档{i + 1}:{doc.get('title', 'N/A')}\n"
163
+ prompt += f"内容:{doc.get('content', '')[:1000]}...\n\n"
164
+
165
+ prompt += "请提供一个简洁的总结,包含这些文档的核心要点。"
166
+
167
+ # 使用更简单的字符串替换处理
168
+ clean_params = prompt.replace('\\"', "'") # 替换嵌套双引号为单引号
169
+ clean_params = clean_params.replace("\n", " ").strip()
170
+
171
+ payload = {
172
+ "scene": "qa_agent",
173
+ "params": {
174
+ "user_content": clean_params
175
+ }
176
+ }
177
+
178
+ # 使用线程锁保护日志
179
+ logger.info(f"==== 发送请求调用模型 ======")
180
+
181
+ try:
182
+ # 添加timeout参数,增加请求超时控制
183
+ response = requests.post(
184
+ self.api_url,
185
+ json=payload,
186
+ headers={'Content-Type': 'application/json'},
187
+ verify=False,
188
+ timeout=self.timeout
189
+ )
190
+
191
+ logger.info(f"API响应状态码: {response.status_code}")
192
+ logger.info(f"API响应内容: {response.text}")
193
+
194
+ # 检查响应状态
195
+ if response.status_code != 200:
196
+ logger.error(f"API请求失败,状态码: {response.status_code}")
197
+ return None
198
+
199
+ # 尝试解析JSON响应
200
+ try:
201
+ resp_json = response.json()
202
+
203
+ if 'response' in resp_json:
204
+ # 解析二层JSON
205
+ try:
206
+ model_response = json.loads(resp_json['response'])
207
+
208
+ # 从content中提取文本
209
+ if 'content' in model_response and isinstance(model_response['content'], list):
210
+ text_content = ""
211
+ for item in model_response['content']:
212
+ if item.get('type') == 'text':
213
+ text_content += item.get('text', '')
214
+ return text_content
215
+ return str(model_response)
216
+ except Exception as e:
217
+ logger.error(f"解析二层JSON失败: {e}")
218
+ return resp_json['response']
219
+ return response.text
220
+ except Exception as e:
221
+ logger.error(f"解析JSON失败: {e}")
222
+ return response.text
223
+
224
+ except requests.RequestException as e:
225
+ logger.error(f"请求异常: {e}")
226
+ return None
227
+
228
+ def get_knowledge(self, limit=5):
229
+ """
230
+ 获取知识库信息
231
+
232
+ Args:
233
+ limit: 检索结果数量限制
234
+
235
+ Returns:
236
+ 检索结果或模型总结的JSON
237
+ """
238
+ # 首先检索文档
239
+ search_results = self.search_documents(limit=limit)
240
+
241
+ # 构建基本返回结果
242
+ result = {
243
+ "query": self.input_question,
244
+ "result_count": len(search_results),
245
+ "results": search_results
246
+ }
247
+
248
+ # 根据is_need_module决定是否调用模型总结
249
+ if self.is_need_module and search_results:
250
+ try:
251
+ summary = self.call_model(search_results)
252
+ if summary:
253
+ result["model_summary"] = summary
254
+ except Exception as e:
255
+ logger.error(f"调用模型总结失败: {e}")
256
+ result["model_summary_error"] = str(e)
257
+
258
+ return result
259
+
260
+ def __del__(self):
261
+ """析构函数,确保在对象销毁时关闭连接"""
262
+ if hasattr(self, 'client') and self.client is not None:
263
+ try:
264
+ # 在3.x版本中可能没有显式的close方法
265
+ pass
266
+ except:
267
+ pass
268
+
269
+
270
+ if __name__ == "__main__":
271
+ print("开始测试GetWeaviateInfo...")
272
+
273
+ try:
274
+ # 确保连接成功
275
+ print("实例化GetWeaviateInfo...")
276
+ get_weaviate_info = GetWeaviateInfo("海螺视频 图生视频 测试case", is_need_module=True)
277
+ print("已成功实例化GetWeaviateInfo")
278
+
279
+ # 测试获取基本信息
280
+ print("测试获取Weaviate基本信息...")
281
+ basic_info = get_weaviate_info.get_info()
282
+ print(f"Weaviate版本: {basic_info.get('version')}")
283
+ print(f"Weaviate状态: {basic_info.get('status')}")
284
+
285
+ # 测试知识库搜索
286
+ print("\n测试知识库搜索...")
287
+ search_results = get_weaviate_info.get_knowledge(limit=3)
288
+ print(f"查询: {search_results.get('query')}")
289
+ print(f"结果数量: {search_results.get('result_count')}")
290
+
291
+ # 打印每个结果的标题和相似度分数
292
+ for i, result in enumerate(search_results.get('results', [])):
293
+ print(f"结果 {i + 1}: {result.get('title')} (相似度: {result.get('similarity', 0):.3f})")
294
+
295
+ print("\n完整结果:")
296
+ print(search_results)
297
+ except Exception as e:
298
+ print(f"测试过程中发生错误: {e}")
File without changes
@@ -0,0 +1,104 @@
1
+ import requests
2
+ from datetime import datetime, timedelta, timezone
3
+ from minimax_qa_mcp.utils.logger import logger
4
+ from minimax_qa_mcp.utils.utils import Utils
5
+
6
+
7
+ class GetFromGrafana:
8
+ def __init__(self, scene, psm="", from_time=None, to_time=None):
9
+
10
+ self.cluster = Utils.get_conf(f"{scene}_business_info", "grafana_cluster")
11
+ self.name_space = Utils.get_conf(f"{scene}_business_info", "grafana_name_space")
12
+ self.psm = psm.replace('.', '-')
13
+ # 拉取一天前的日志
14
+ self.url = Utils.get_conf('common', 'grafana_url')
15
+ if to_time is None and from_time is None:
16
+ to_formatted_time = datetime.now(timezone(timedelta(hours=8))).isoformat()
17
+ # 获取一天前的时间
18
+ # 格式化为 ISO 8601 格式的字符串,包含微秒和时区信息
19
+ from_formatted_time = (datetime.now(timezone(timedelta(days=1))) - timedelta(hours=1)).isoformat()
20
+
21
+ self.to_time = str(to_formatted_time)
22
+ self.from_time = str(from_formatted_time)
23
+ else:
24
+ self.to_time = to_time
25
+ self.from_time = from_time
26
+
27
+ def post_grafana(self, msgs: list):
28
+ query = f"_namespace_:\"{self.name_space}\" "
29
+ if self.psm:
30
+ query += f"and app:\"{self.psm}\" "
31
+ if len(msgs) > 0:
32
+ for msg in msgs:
33
+ query += f"and msg:\"{msg}\" "
34
+ data = {
35
+ "from": self.from_time,
36
+ "to": self.to_time,
37
+ "query": query,
38
+ "limit": 20,
39
+ "topic_name": f"_mlogs_{self.cluster}/{self.name_space}"
40
+ }
41
+ logger.info(f"grafana的入参为:{data}")
42
+ try:
43
+ grafana_resp = requests.post(self.url, json=data)
44
+ if grafana_resp.status_code == 200:
45
+ return grafana_resp.json()['data']['items']
46
+ except Exception as e:
47
+ logger.error(f'get grafana resp error, psm is:{self.psm}, method is: {msgs}, error is: {e}')
48
+ return []
49
+
50
+
51
+ class GetApiFromGrafana:
52
+
53
+ def __init__(self, scene, psm):
54
+ self.psm = psm
55
+ self.scene = scene
56
+ self.url = Utils.get_conf('common', 'swing_url')
57
+ logger.info(f"GetApiFromGrafana init psm:{psm},scene:{scene}")
58
+
59
+ def get_method_list(self):
60
+ try:
61
+ res = requests.get(url=self.url + "/swing/api/fetch_api_by_psm?psm=" + str(self.psm.replace("-", ".")))
62
+ if res.status_code == 200:
63
+ return res.json()["data"]["apis"]
64
+ except Exception as e:
65
+ logger.error(f"get_method_list error: {e}")
66
+ return [e]
67
+
68
+ def get_top_qps(self):
69
+ try:
70
+ res = requests.get(url=self.url + "/swing/api/get_top_qps?scene=" + str(self.scene))
71
+ if self.psm is None or len(self.psm) <= 0:
72
+ res_data = res.json()["data"]
73
+ return {key: value for psm in res_data for key, value in res_data[psm].items()}
74
+ else:
75
+ return res.json()["data"][str(self.psm)]
76
+ except Exception as e:
77
+ logger.error(f"get_top_qps error: {e}")
78
+ return [e]
79
+
80
+ def get_need_method(self):
81
+ try:
82
+ qps_method_list = self.get_top_qps()
83
+ # 如果是明确增加某个psm的rpc方法,则关注psm idl,并返回接口定义
84
+ if "rpc" in self.scene and self.psm is not None and len(self.psm) > 0:
85
+ res_list = []
86
+ psm_method_list = self.get_method_list()
87
+ for method in psm_method_list:
88
+ if method["method"] in list(qps_method_list.keys()):
89
+ method["qps"] = qps_method_list[method["method"]]
90
+ res_list.append(method)
91
+ return res_list
92
+ else:
93
+ return list([{"method": key, "qps": value} for key, value in qps_method_list.items()])
94
+
95
+ except Exception as e:
96
+ logger.error(f"get_top_qps error: {e}")
97
+ return [e]
98
+
99
+
100
+ if __name__ == '__main__':
101
+ print("test")
102
+ # print(GetFromGrafana("xingye_test").post_grafana())
103
+ # print(GetApiFromGrafana("hailuo_video_us_http","").get_need_method())
104
+ print(GetApiFromGrafana("xingye_prod", "weaver-account-account").get_need_method())
@@ -0,0 +1,6 @@
1
+ """
2
+ coding:utf-8
3
+ @Software: PyCharm
4
+ @Time: 2025/3/20 17:33
5
+ @Author: xingyun
6
+ """