union-app-chat-stream 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/.gitignore +16 -0
  2. package/PROJECT_OVERVIEW.md +187 -0
  3. package/app/.env +63 -0
  4. package/app/.env.dev +63 -0
  5. package/app/.env.prod.bj11 +63 -0
  6. package/app/.env.prod.sh20 +63 -0
  7. package/app/.env.prod.sz31 +63 -0
  8. package/app/.env.test.bj12 +63 -0
  9. package/app/__init__.py +42 -0
  10. package/app/__pycache__/__init__.cpython-312.pyc +0 -0
  11. package/app/__pycache__/authenticated_user.cpython-312.pyc +0 -0
  12. package/app/__pycache__/extensions.cpython-312.pyc +0 -0
  13. package/app/__pycache__/wsgi.cpython-312.pyc +0 -0
  14. package/app/authenticated_user.py +77 -0
  15. package/app/config/__pycache__/config_loader.cpython-312.pyc +0 -0
  16. package/app/config/__pycache__/env_config.cpython-312.pyc +0 -0
  17. package/app/config/__pycache__/logger_config.cpython-312.pyc +0 -0
  18. package/app/config/env_config.py +96 -0
  19. package/app/config/logger_config.py +46 -0
  20. package/app/manager/__init__.py +4 -0
  21. package/app/manager/__pycache__/__init__.cpython-312.pyc +0 -0
  22. package/app/manager/__pycache__/chatstream_manager.cpython-312.pyc +0 -0
  23. package/app/manager/__pycache__/prompts.cpython-312.pyc +0 -0
  24. package/app/manager/__pycache__/runtime_manager.cpython-312.pyc +0 -0
  25. package/app/manager/__pycache__/toolcall_manager.cpython-312.pyc +0 -0
  26. package/app/manager/chatstream_manager.py +90 -0
  27. package/app/manager/prompts.py +62 -0
  28. package/app/manager/runtime_manager.py +552 -0
  29. package/app/models/__pycache__/schemas.cpython-312.pyc +0 -0
  30. package/app/models/schemas.py +30 -0
  31. package/app/service/__init__.py +4 -0
  32. package/app/service/__pycache__/__init__.cpython-312.pyc +0 -0
  33. package/app/service/__pycache__/chat_service.cpython-312.pyc +0 -0
  34. package/app/service/__pycache__/llm_service.cpython-312.pyc +0 -0
  35. package/app/service/__pycache__/rag_service.cpython-312.pyc +0 -0
  36. package/app/service/__pycache__/tool_call_service.cpython-312.pyc +0 -0
  37. package/app/service/__pycache__/union_service.cpython-312.pyc +0 -0
  38. package/app/service/chat_service.py +228 -0
  39. package/app/service/llm_service.py +214 -0
  40. package/app/service/rag_service.py +866 -0
  41. package/app/service/union_service.py +201 -0
  42. package/app/utils/__init__.py +5 -0
  43. package/app/utils/__pycache__/__init__.cpython-312.pyc +0 -0
  44. package/app/utils/__pycache__/common_utils.cpython-312.pyc +0 -0
  45. package/app/utils/__pycache__/debug_context.cpython-312.pyc +0 -0
  46. package/app/utils/__pycache__/function_utils.cpython-312.pyc +0 -0
  47. package/app/utils/__pycache__/jwt_utils.cpython-312.pyc +0 -0
  48. package/app/utils/common_utils.py +169 -0
  49. package/app/utils/debug_context.py +16 -0
  50. package/app/utils/function_utils.py +274 -0
  51. package/app/utils/jwt_utils.py +39 -0
  52. package/app/views/__init__.py +6 -0
  53. package/app/views/__pycache__/__init__.cpython-312.pyc +0 -0
  54. package/app/views/__pycache__/view_chatstream.cpython-312.pyc +0 -0
  55. package/app/views/__pycache__/view_healthcheck.cpython-312.pyc +0 -0
  56. package/app/views/__pycache__/view_runtime.cpython-312.pyc +0 -0
  57. package/app/views/view_chatstream.py +53 -0
  58. package/app/views/view_healthcheck.py +14 -0
  59. package/app/views/view_runtime.py +72 -0
  60. package/app/wsgi.py +37 -0
  61. package/ci.yml +14 -0
  62. package/deploy/autoconf/templates/env.j2 +25 -0
  63. package/deploy/autoconf.yml +15 -0
  64. package/deploy/scripts/healthcheck.sh +0 -0
  65. package/deploy/scripts/requirements.txt +53 -0
  66. package/deploy/scripts/start.sh +75 -0
  67. package/deploy/scripts/stop.sh +31 -0
  68. package/knowledge/.gitkeep +0 -0
  69. package/knowledge/000001-biz-offline-85b99bd43b-v1.md +88 -0
  70. package/knowledge/000002-biz-offline-717e8d823e-v1.md +90 -0
  71. package/knowledge/000003-biz-offline-c963227cc8-v1.md +84 -0
  72. package/knowledge/000004-biz-offline-2a5868e7da-v1.md +92 -0
  73. package/knowledge/000005-biz-offline-f9d9cf1a88-v1.md +79 -0
  74. package/knowledge/000006-biz-offline-c4fa2df3bd-v1.md +77 -0
  75. package/knowledge/000007-biz-offline-78304b70ca-v1.md +76 -0
  76. package/knowledge/000008-biz-offline-987ae67b35-v1.md +75 -0
  77. package/knowledge/000009-biz-offline-4d656bcea3-v1.md +85 -0
  78. package/knowledge/000010-sop-offline-a9e1050719-v1.md +100 -0
  79. package/knowledge/000011-biz-offline-5de0624891-v1.md +86 -0
  80. package/knowledge/000012-biz-offline-7dfacccba3-v1.md +82 -0
  81. package/knowledge/000013-biz-offline-5e1d29d2ed-v1.md +81 -0
  82. package/knowledge/000014-biz-offline-1d0ed8b841-v1.md +68 -0
  83. package/knowledge/000015-biz-offline-8a1376ee3e-v1.md +78 -0
  84. package/knowledge/000016-biz-offline-c8bfc2aa08-v1.md +99 -0
  85. package/knowledge/000017-biz-offline-9dffb28032-v1.md +88 -0
  86. package/knowledge/000018-biz-offline-f935bc9a6a-v1.md +80 -0
  87. package/knowledge/000019-biz-offline-858b3ecd89-v1.md +86 -0
  88. package/knowledge/000020-biz-offline-65cb5c4f40-v1.md +113 -0
  89. package/knowledge/000021-biz-offline-1bf211639c-v1.md +148 -0
  90. package/knowledge/000022-biz-offline-8c5a637879-v1.md +140 -0
  91. package/knowledge/000023-biz-offline-fe872b8712-v1.md +188 -0
  92. package/knowledge/000024-biz-offline-a85010c500-v1.md +133 -0
  93. package/knowledge/000025-biz-offline-8af58a3638-v1.md +136 -0
  94. package/knowledge/000026-biz-offline-6754102e93-v1.md +142 -0
  95. package/knowledge/000027-biz-offline-ea2e5ca5f9-v1.md +150 -0
  96. package/knowledge/000028-scenario-offline-dab45cebb4-v1.md +136 -0
  97. package/knowledge/000029-scenario-offline-5b8ae5ea9f-v1.md +143 -0
  98. package/knowledge/000030-scenario-offline-9a82d42f3f-v1.md +136 -0
  99. package/knowledge/000031-scenario-offline-cc2edc0197-v1.md +122 -0
  100. package/knowledge/000032-scenario-offline-e5f6e5cbfa-v1.md +122 -0
  101. package/knowledge/000033-scenario-offline-e1955849aa-v1.md +135 -0
  102. package/knowledge/000034-scenario-offline-3a13d49a3a-v1.md +138 -0
  103. package/knowledge/000035-scenario-offline-fd5560211f-v1.md +147 -0
  104. package/knowledge/000036-scenario-offline-function-call-mock-v1.md +134 -0
  105. package/package.json +18 -0
  106. package/requirements.txt +53 -0
  107. package/tools/prompts.yaml +10 -0
  108. package/tools/tool_definitions.yaml +303 -0
@@ -0,0 +1,228 @@
1
+ from pathlib import Path
2
+ from typing import Dict, Generator, List
3
+
4
+ import yaml
5
+
6
+ from zai import ZhipuAiClient
7
+
8
+ from app.models.schemas import ChatResponse
9
+ from app.utils.function_utils import ToolContext, call_function
10
+ from loguru import logger
11
+
12
+
13
+ def _preview(text: str, limit: int = 300) -> str:
14
+ return str(text).replace("\n", " ")[:limit]
15
+
16
+
17
+ def _load_prompts() -> Dict[str, str]:
18
+ path = Path(__file__).resolve().parents[2] / "tools" / "prompts.yaml"
19
+ if not path.exists():
20
+ return {}
21
+ with path.open("r", encoding="utf-8") as f:
22
+ data = yaml.safe_load(f) or {}
23
+ return data if isinstance(data, dict) else {}
24
+
25
+
26
+ _PROMPTS = _load_prompts()
27
+
28
+ TOOL_ROUTING_PROMPT = _PROMPTS.get(
29
+ "tool_routing_prompt",
30
+ "请根据用户问题和可用 tools 选择合适工具;不要调用未出现在 tools 列表中的函数。",
31
+ ).strip()
32
+
33
+ class ChatService:
34
+ """
35
+ 聊天服务层
36
+ 整合工具调用流式 LLM + 业务过滤
37
+ """
38
+
39
+ def __init__(self, config, rag_service, union_service):
40
+ self._config = config
41
+
42
+ # ---- 过滤配置 ----
43
+ self._filter_enabled = config["FILTER_ENABLED"]
44
+ self._allowed_keywords = config["FILTER_ALLOWED_KEYWORDS"]
45
+ self._rejection_message = config["FILTER_REJECTION_MESSAGE"]
46
+
47
+ # ---- LLM 配置 ----
48
+ self._client = ZhipuAiClient(
49
+ api_key=config["LLM_KEY"],
50
+ base_url=config["LLM_URL"],
51
+ )
52
+ self._model = config["LLM_MODEL"]
53
+ self._max_tokens = config["LLM_MAX_TOKENS"]
54
+ self._temperature = config["LLM_TEMPERATURE"]
55
+ self._top_p = config["LLM_TOP_P"]
56
+ self._system_prompt = config["SYSTEM_PROMPT"]
57
+ self._tools_max_rounds = config["TOOLS_MAX_ROUNDS"]
58
+ self._rag = rag_service
59
+ self._union_service = union_service
60
+
61
+ # ========== 过滤 ==========
62
+
63
+ def _check_question_valid(self, question: str) -> bool:
64
+ if not self._filter_enabled or not self._allowed_keywords:
65
+ return True
66
+ lower_question = question.lower()
67
+ return any(k.lower() in lower_question for k in self._allowed_keywords)
68
+
69
+ # ========== LLM ==========
70
+
71
+ def _build_messages(
72
+ self,
73
+ history: List[Dict[str, str]],
74
+ user_question: str,
75
+ ) -> List[Dict[str, str]]:
76
+ messages = []
77
+ if self._system_prompt:
78
+ messages.append({"role": "system", "content": self._system_prompt})
79
+ messages.extend(history)
80
+ messages.append({"role": "user", "content": user_question})
81
+ return messages
82
+
83
+ def _build_tool_messages(self, history: List[Dict[str, str]], user_question: str) -> List[Dict[str, str]]:
84
+ messages = self._build_messages(history, user_question)
85
+ insert_at = 1 if messages and messages[0].get("role") == "system" else 0
86
+ messages.insert(insert_at, {"role": "system", "content": TOOL_ROUTING_PROMPT})
87
+ return messages
88
+
89
+ def tool_call_stream(
90
+ self,
91
+ conversation_id: str,
92
+ question: str,
93
+ tools,
94
+ history: List[Dict[str, str]],
95
+ jsessionid: str,
96
+ ) -> Generator[ChatResponse, None, None]:
97
+ """
98
+ 带工具调用的流式对话(支持交错思考与工具调用)
99
+ - stream=True + tool_stream=True:模型在流式输出中同时返回推理过程、回答内容与工具调用
100
+ - 工具执行结果回传模型后继续流式生成,循环直至模型不再调用工具或达到最大轮次
101
+ """
102
+ if not self._check_question_valid(question):
103
+ logger.info(f"问题未通过业务过滤。conversation_id={conversation_id} question={_preview(question, 120)}")
104
+ yield ChatResponse(
105
+ conversationId=conversation_id,
106
+ content=self._rejection_message,
107
+ finish_reason="rejected",
108
+ )
109
+ return
110
+
111
+ def content_event(content: str) -> ChatResponse:
112
+ return ChatResponse(conversationId=conversation_id, content=content)
113
+
114
+ def reasoning_event(reasoning_content: str) -> ChatResponse:
115
+ return ChatResponse(conversationId=conversation_id, reasoning_content=reasoning_content)
116
+
117
+ def tool_call_event(tool_call: str) -> ChatResponse:
118
+ return ChatResponse(conversationId=conversation_id, tool_call=tool_call)
119
+
120
+ def tool_result_event(tool_result: str) -> ChatResponse:
121
+ return ChatResponse(conversationId=conversation_id, tool_result=tool_result)
122
+
123
+ try:
124
+ messages = self._build_tool_messages(history, question)
125
+ max_rounds = self._tools_max_rounds
126
+ final_answer = ""
127
+
128
+ logger.info(f"开始模型流式调用。conversation_id={conversation_id} model={self._model} question={_preview(question, 120)}")
129
+ for round_idx in range(max_rounds):
130
+ response = self._client.chat.completions.create(
131
+ model=self._model,
132
+ messages=messages,
133
+ tools=tools,
134
+ tool_choice="auto",
135
+ stream=True,
136
+ tool_stream=True,
137
+ thinking={"type": "enabled", "clear_thinking": False},
138
+ max_tokens=self._max_tokens,
139
+ temperature=self._temperature,
140
+ top_p=self._top_p,
141
+ )
142
+
143
+ current_content = ""
144
+ current_reasoning = ""
145
+ reasoning_len = 0
146
+ tool_calls_map: Dict[int, Dict] = {}
147
+
148
+ for chunk in response:
149
+ if not chunk.choices:
150
+ continue
151
+ delta = chunk.choices[0].delta
152
+
153
+ reasoning = getattr(delta, "reasoning_content", None)
154
+ if reasoning:
155
+ current_reasoning += reasoning
156
+ reasoning_len += len(reasoning)
157
+ yield reasoning_event(reasoning)
158
+
159
+ content = getattr(delta, "content", None)
160
+ if content:
161
+ current_content += content
162
+ yield content_event(content)
163
+
164
+ for tc in getattr(delta, "tool_calls", None) or []:
165
+ self._merge_tool_call_delta(tool_calls_map, tc)
166
+
167
+ logger.info(f"模型流式返回完成。conversation_id={conversation_id} round={round_idx + 1} content_chars={len(current_content)} reasoning_chars={reasoning_len} tool_calls={len(tool_calls_map)} content_preview={_preview(current_content)}")
168
+
169
+ if not tool_calls_map:
170
+ final_answer = current_content
171
+ break
172
+
173
+ assistant_tool_calls = [tool_calls_map[i] for i in sorted(tool_calls_map)]
174
+ messages.append({
175
+ "role": "assistant",
176
+ "content": current_content or None,
177
+ "reasoning_content": current_reasoning,
178
+ "tool_calls": assistant_tool_calls,
179
+ })
180
+
181
+ for tc in assistant_tool_calls:
182
+ name = tc["function"]["name"]
183
+ args = tc["function"]["arguments"]
184
+ logger.info(f"执行工具调用。conversation_id={conversation_id} tool={name} args={_preview(args, 200)}")
185
+ yield tool_call_event(f"\n[调用工具: {name}({args})]\n")
186
+
187
+ tool_context = ToolContext(
188
+ union_service=self._union_service,
189
+ rag_service=self._rag,
190
+ jsessionid=jsessionid,
191
+ )
192
+ result = call_function(name, args, tool_context)
193
+ logger.info(f"工具调用完成。conversation_id={conversation_id} tool={name} result_preview={_preview(result, 300)}")
194
+ yield tool_result_event(result)
195
+
196
+ messages.append({
197
+ "role": "tool",
198
+ "content": result,
199
+ "tool_call_id": tc["id"],
200
+ })
201
+
202
+ logger.info(f"对话完成。conversation_id={conversation_id} final_answer_chars={len(final_answer)} final_answer_preview={_preview(final_answer)}")
203
+ yield ChatResponse(conversationId=conversation_id, finish_reason="stop")
204
+
205
+ except Exception as e:
206
+ logger.exception(f"模型调用异常。conversation_id={conversation_id} question={_preview(question, 120)}")
207
+ yield ChatResponse(
208
+ conversationId=conversation_id,
209
+ content=f"[错误] 模型调用异常: {str(e)}",
210
+ finish_reason="error",
211
+ )
212
+
213
+ @staticmethod
214
+ def _merge_tool_call_delta(tool_calls_map: Dict[int, Dict], tc) -> None:
215
+ """将单个流式 tool_call 增量按 index 合并到累积字典中"""
216
+ slot = tool_calls_map.setdefault(tc.index, {
217
+ "id": "",
218
+ "type": "function",
219
+ "function": {"name": "", "arguments": ""},
220
+ })
221
+ if tc.id:
222
+ slot["id"] = tc.id
223
+ fn = getattr(tc, "function", None)
224
+ if fn is not None:
225
+ if getattr(fn, "name", None):
226
+ slot["function"]["name"] += fn.name
227
+ if getattr(fn, "arguments", None):
228
+ slot["function"]["arguments"] += fn.arguments
@@ -0,0 +1,214 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import httpx
5
+ import json
6
+ from typing import Optional, Generator, Dict, Any, List
7
+ from openai import OpenAI
8
+ from loguru import logger
9
+
10
+ from app.utils import common_utils
11
+
12
+
13
+ class LLMService:
14
+ # 常量定义
15
+ DEFAULT_TEMPERATURE = 0.7
16
+ DEFAULT_MAX_TOKENS = 30000
17
+ STREAM_MAX_TOKENS = 30000
18
+ JSON_RESPONSE_FORMAT = {"type": "json_object"}
19
+
20
+ def __init__(self):
21
+ """初始化LLM服务实例"""
22
+ self._client: Optional[OpenAI] = None
23
+ self._model_name: Optional[str] = None
24
+
25
+ def initialize(self, config) -> None:
26
+ """初始化LLM客户端"""
27
+ if self._client is not None:
28
+ logger.debug("LLM客户端已经初始化,跳过重复初始化")
29
+ return
30
+
31
+ llm_url = config["LLM_URL"]
32
+ llm_key = config["LLM_KEY"]
33
+ self._model_name = config["LLM_MODEL"]
34
+
35
+ if not llm_url:
36
+ error_msg = "LLM_URL 配置为空,请检查环境变量配置"
37
+ logger.error(error_msg)
38
+ raise RuntimeError(error_msg)
39
+
40
+ if not llm_key:
41
+ error_msg = "LLM_KEY 配置为空,请检查环境变量配置"
42
+ logger.error(error_msg)
43
+ raise RuntimeError(error_msg)
44
+
45
+ try:
46
+ self._client = self._create_client(llm_url, llm_key)
47
+ logger.info(f"LLM客户端初始化成功: {llm_url}")
48
+ except Exception as e:
49
+ error_msg = f"LLM客户端初始化失败: {str(e)}"
50
+ logger.error(error_msg)
51
+ raise RuntimeError(error_msg) from e
52
+
53
+ @property
54
+ def model_name(self) -> str:
55
+ """获取模型名称"""
56
+ return self._model_name
57
+
58
+ def _create_client(self, llm_url: str, llm_key: str) -> OpenAI:
59
+ return OpenAI(
60
+ base_url=llm_url,
61
+ api_key=llm_key,
62
+ http_client=httpx.Client(verify=False)
63
+ )
64
+
65
+ def _ensure_client(self) -> None:
66
+ """
67
+ 确保客户端已初始化
68
+
69
+ Raises:
70
+ RuntimeError: 当客户端未初始化时
71
+ """
72
+ if self._client is None:
73
+ raise RuntimeError("LLM客户端未初始化,请先调用 initialize() 方法")
74
+
75
+ def _build_messages(
76
+ self,
77
+ system_prompt: str,
78
+ user_prompt: Optional[str] = None,
79
+ query_text: Optional[str] = None,
80
+ data_json: Optional[str] = None
81
+ ) -> List[Dict[str, str]]:
82
+ """
83
+ 构建LLM消息列表
84
+
85
+ Args:
86
+ system_prompt: 系统提示词
87
+ user_prompt: 用户提示词模板(可选)
88
+ query_text: 查询文本(可选)
89
+ data_json: 数据JSON字符串(可选)
90
+
91
+ Returns:
92
+ 消息列表
93
+ """
94
+ messages = [{"role": "system", "content": system_prompt}]
95
+
96
+ if user_prompt:
97
+ # 将用户模板中的占位符替换为实际值
98
+ content = user_prompt
99
+ if query_text is not None:
100
+ content = content.replace("{query_text}", str(query_text))
101
+ if data_json is not None:
102
+ content = content.replace("{data_json}", str(data_json))
103
+ messages.append({"role": "user", "content": content})
104
+ elif query_text:
105
+ messages.append({"role": "user", "content": query_text})
106
+ else:
107
+ messages.append({"role": "user", "content": system_prompt})
108
+ return messages
109
+
110
+ def execute_llm(
111
+ self,
112
+ system_prompt: str,
113
+ user_prompt: Optional[str] = None,
114
+ query_text: Optional[str] = None,
115
+ data_json: Optional[str] = None,
116
+ temperature: Optional[float] = None,
117
+ max_tokens: Optional[int] = None,
118
+ use_json_format: bool = False
119
+ ) -> Optional[Dict[str, Any] | str]:
120
+ """
121
+ 通用非流式LLM执行方法
122
+
123
+ Args:
124
+ system_prompt: 系统提示词
125
+ user_prompt: 用户提示词模板(可选)
126
+ query_text: 查询文本(可选)
127
+ data_json: 数据JSON字符串(可选)
128
+ temperature: 温度参数,默认为 DEFAULT_TEMPERATURE
129
+ max_tokens: 最大token数,默认为 DEFAULT_MAX_TOKENS
130
+ use_json_format: 是否使用JSON格式响应
131
+
132
+ Returns:
133
+ 解析后的结果字典或原始字符串,失败返回None
134
+ """
135
+ self._ensure_client()
136
+
137
+ try:
138
+ messages = self._build_messages(system_prompt, user_prompt, query_text, data_json)
139
+
140
+ request_params = {
141
+ "model": self.model_name,
142
+ "messages": messages,
143
+ "temperature": temperature or self.DEFAULT_TEMPERATURE,
144
+ "max_tokens": max_tokens or self.DEFAULT_MAX_TOKENS
145
+ }
146
+
147
+ if use_json_format:
148
+ request_params["response_format"] = self.JSON_RESPONSE_FORMAT
149
+
150
+ llm_response = self._client.chat.completions.create(**request_params)
151
+ content = common_utils.remove_think_tag(llm_response.choices[0].message.content)
152
+
153
+ if use_json_format:
154
+ result = json.loads(content)
155
+ logger.debug(f"LLM JSON响应成功")
156
+ return result
157
+ else:
158
+ logger.debug("LLM文本响应成功")
159
+ return content
160
+ except Exception as e:
161
+ logger.error(f"LLM调用失败: {e.args}")
162
+ return None
163
+
164
+ def execute_llm_stream(
165
+ self,
166
+ system_prompt: str,
167
+ user_prompt: Optional[str] = None,
168
+ query_text: Optional[str] = None,
169
+ data_json: Optional[str] = None,
170
+ temperature: Optional[float] = None,
171
+ max_tokens: Optional[int] = None
172
+ ) -> Generator[str, None, None]:
173
+ """
174
+ 通用流式LLM执行方法
175
+
176
+ Args:
177
+ system_prompt: 系统提示词
178
+ user_prompt: 用户提示词模板(可选)
179
+ query_text: 查询文本(可选)
180
+ data_json: 数据JSON字符串(可选)
181
+ temperature: 温度参数,默认为 DEFAULT_TEMPERATURE
182
+ max_tokens: 最大token数,默认为 STREAM_MAX_TOKENS
183
+
184
+ Yields:
185
+ LLM流式响应内容片段
186
+ """
187
+ self._ensure_client()
188
+ logger.debug("开始LLM流式调用")
189
+
190
+ try:
191
+ messages = self._build_messages(system_prompt, user_prompt, query_text, data_json)
192
+ logger.info(f"LLM请求: {messages[:2000]}...")
193
+ llm_stream_response = self._client.chat.completions.create(
194
+ model=self.model_name,
195
+ messages=messages,
196
+ temperature=temperature or self.DEFAULT_TEMPERATURE,
197
+ stream=True,
198
+ max_tokens=max_tokens or self.STREAM_MAX_TOKENS
199
+ )
200
+
201
+ for chunk in llm_stream_response:
202
+ content = getattr(chunk.choices[0].delta, 'content', None) or getattr(chunk.choices[0].delta, 'reasoning_content', None) or ''
203
+ if content:
204
+ logger.info(f"LLM流式响应: {content[:50]}...")
205
+ yield content
206
+
207
+ except Exception as e:
208
+ error_msg = f"[ERROR] llm 调用失败:{str(e)}"
209
+ logger.error(error_msg)
210
+ yield error_msg
211
+
212
+
213
+ # 全局单例实例
214
+ llm_service = LLMService()