npm - union-app-chat-stream - Versions diffs - 1.0.3 - Mend

union-app-chat-stream 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/.gitignore +16 -0
package/PROJECT_OVERVIEW.md +187 -0
package/app/.env +63 -0
package/app/.env.dev +63 -0
package/app/.env.prod.bj11 +63 -0
package/app/.env.prod.sh20 +63 -0
package/app/.env.prod.sz31 +63 -0
package/app/.env.test.bj12 +63 -0
package/app/__init__.py +42 -0
package/app/__pycache__/__init__.cpython-312.pyc +0 -0
package/app/__pycache__/authenticated_user.cpython-312.pyc +0 -0
package/app/__pycache__/extensions.cpython-312.pyc +0 -0
package/app/__pycache__/wsgi.cpython-312.pyc +0 -0
package/app/authenticated_user.py +77 -0
package/app/config/__pycache__/config_loader.cpython-312.pyc +0 -0
package/app/config/__pycache__/env_config.cpython-312.pyc +0 -0
package/app/config/__pycache__/logger_config.cpython-312.pyc +0 -0
package/app/config/env_config.py +96 -0
package/app/config/logger_config.py +46 -0
package/app/manager/__init__.py +4 -0
package/app/manager/__pycache__/__init__.cpython-312.pyc +0 -0
package/app/manager/__pycache__/chatstream_manager.cpython-312.pyc +0 -0
package/app/manager/__pycache__/prompts.cpython-312.pyc +0 -0
package/app/manager/__pycache__/runtime_manager.cpython-312.pyc +0 -0
package/app/manager/__pycache__/toolcall_manager.cpython-312.pyc +0 -0
package/app/manager/chatstream_manager.py +90 -0
package/app/manager/prompts.py +62 -0
package/app/manager/runtime_manager.py +552 -0
package/app/models/__pycache__/schemas.cpython-312.pyc +0 -0
package/app/models/schemas.py +30 -0
package/app/service/__init__.py +4 -0
package/app/service/__pycache__/__init__.cpython-312.pyc +0 -0
package/app/service/__pycache__/chat_service.cpython-312.pyc +0 -0
package/app/service/__pycache__/llm_service.cpython-312.pyc +0 -0
package/app/service/__pycache__/rag_service.cpython-312.pyc +0 -0
package/app/service/__pycache__/tool_call_service.cpython-312.pyc +0 -0
package/app/service/__pycache__/union_service.cpython-312.pyc +0 -0
package/app/service/chat_service.py +228 -0
package/app/service/llm_service.py +214 -0
package/app/service/rag_service.py +866 -0
package/app/service/union_service.py +201 -0
package/app/utils/__init__.py +5 -0
package/app/utils/__pycache__/__init__.cpython-312.pyc +0 -0
package/app/utils/__pycache__/common_utils.cpython-312.pyc +0 -0
package/app/utils/__pycache__/debug_context.cpython-312.pyc +0 -0
package/app/utils/__pycache__/function_utils.cpython-312.pyc +0 -0
package/app/utils/__pycache__/jwt_utils.cpython-312.pyc +0 -0
package/app/utils/common_utils.py +169 -0
package/app/utils/debug_context.py +16 -0
package/app/utils/function_utils.py +274 -0
package/app/utils/jwt_utils.py +39 -0
package/app/views/__init__.py +6 -0
package/app/views/__pycache__/__init__.cpython-312.pyc +0 -0
package/app/views/__pycache__/view_chatstream.cpython-312.pyc +0 -0
package/app/views/__pycache__/view_healthcheck.cpython-312.pyc +0 -0
package/app/views/__pycache__/view_runtime.cpython-312.pyc +0 -0
package/app/views/view_chatstream.py +53 -0
package/app/views/view_healthcheck.py +14 -0
package/app/views/view_runtime.py +72 -0
package/app/wsgi.py +37 -0
package/ci.yml +14 -0
package/deploy/autoconf/templates/env.j2 +25 -0
package/deploy/autoconf.yml +15 -0
package/deploy/scripts/healthcheck.sh +0 -0
package/deploy/scripts/requirements.txt +53 -0
package/deploy/scripts/start.sh +75 -0
package/deploy/scripts/stop.sh +31 -0
package/knowledge/.gitkeep +0 -0
package/knowledge/000001-biz-offline-85b99bd43b-v1.md +88 -0
package/knowledge/000002-biz-offline-717e8d823e-v1.md +90 -0
package/knowledge/000003-biz-offline-c963227cc8-v1.md +84 -0
package/knowledge/000004-biz-offline-2a5868e7da-v1.md +92 -0
package/knowledge/000005-biz-offline-f9d9cf1a88-v1.md +79 -0
package/knowledge/000006-biz-offline-c4fa2df3bd-v1.md +77 -0
package/knowledge/000007-biz-offline-78304b70ca-v1.md +76 -0
package/knowledge/000008-biz-offline-987ae67b35-v1.md +75 -0
package/knowledge/000009-biz-offline-4d656bcea3-v1.md +85 -0
package/knowledge/000010-sop-offline-a9e1050719-v1.md +100 -0
package/knowledge/000011-biz-offline-5de0624891-v1.md +86 -0
package/knowledge/000012-biz-offline-7dfacccba3-v1.md +82 -0
package/knowledge/000013-biz-offline-5e1d29d2ed-v1.md +81 -0
package/knowledge/000014-biz-offline-1d0ed8b841-v1.md +68 -0
package/knowledge/000015-biz-offline-8a1376ee3e-v1.md +78 -0
package/knowledge/000016-biz-offline-c8bfc2aa08-v1.md +99 -0
package/knowledge/000017-biz-offline-9dffb28032-v1.md +88 -0
package/knowledge/000018-biz-offline-f935bc9a6a-v1.md +80 -0
package/knowledge/000019-biz-offline-858b3ecd89-v1.md +86 -0
package/knowledge/000020-biz-offline-65cb5c4f40-v1.md +113 -0
package/knowledge/000021-biz-offline-1bf211639c-v1.md +148 -0
package/knowledge/000022-biz-offline-8c5a637879-v1.md +140 -0
package/knowledge/000023-biz-offline-fe872b8712-v1.md +188 -0
package/knowledge/000024-biz-offline-a85010c500-v1.md +133 -0
package/knowledge/000025-biz-offline-8af58a3638-v1.md +136 -0
package/knowledge/000026-biz-offline-6754102e93-v1.md +142 -0
package/knowledge/000027-biz-offline-ea2e5ca5f9-v1.md +150 -0
package/knowledge/000028-scenario-offline-dab45cebb4-v1.md +136 -0
package/knowledge/000029-scenario-offline-5b8ae5ea9f-v1.md +143 -0
package/knowledge/000030-scenario-offline-9a82d42f3f-v1.md +136 -0
package/knowledge/000031-scenario-offline-cc2edc0197-v1.md +122 -0
package/knowledge/000032-scenario-offline-e5f6e5cbfa-v1.md +122 -0
package/knowledge/000033-scenario-offline-e1955849aa-v1.md +135 -0
package/knowledge/000034-scenario-offline-3a13d49a3a-v1.md +138 -0
package/knowledge/000035-scenario-offline-fd5560211f-v1.md +147 -0
package/knowledge/000036-scenario-offline-function-call-mock-v1.md +134 -0
package/package.json +18 -0
package/requirements.txt +53 -0
package/tools/prompts.yaml +10 -0
package/tools/tool_definitions.yaml +303 -0

package/app/service/chat_service.py ADDED Viewed

@@ -0,0 +1,228 @@
+from pathlib import Path
+from typing import Dict, Generator, List
+import yaml
+from zai import ZhipuAiClient
+from app.models.schemas import ChatResponse
+from app.utils.function_utils import ToolContext, call_function
+from loguru import logger
+def _preview(text: str, limit: int = 300) -> str:
+    return str(text).replace("\n", " ")[:limit]
+def _load_prompts() -> Dict[str, str]:
+    path = Path(__file__).resolve().parents[2] / "tools" / "prompts.yaml"
+    if not path.exists():
+        return {}
+    with path.open("r", encoding="utf-8") as f:
+        data = yaml.safe_load(f) or {}
+    return data if isinstance(data, dict) else {}
+_PROMPTS = _load_prompts()
+TOOL_ROUTING_PROMPT = _PROMPTS.get(
+    "tool_routing_prompt",
+    "请根据用户问题和可用 tools 选择合适工具；不要调用未出现在 tools 列表中的函数。",
+).strip()
+class ChatService:
+    """
+    聊天服务层
+    整合工具调用流式 LLM + 业务过滤
+    """
+    def __init__(self, config, rag_service, union_service):
+        self._config = config
+        # ---- 过滤配置 ----
+        self._filter_enabled = config["FILTER_ENABLED"]
+        self._allowed_keywords = config["FILTER_ALLOWED_KEYWORDS"]
+        self._rejection_message = config["FILTER_REJECTION_MESSAGE"]
+        # ---- LLM 配置 ----
+        self._client = ZhipuAiClient(
+            api_key=config["LLM_KEY"],
+            base_url=config["LLM_URL"],
+        )
+        self._model = config["LLM_MODEL"]
+        self._max_tokens = config["LLM_MAX_TOKENS"]
+        self._temperature = config["LLM_TEMPERATURE"]
+        self._top_p = config["LLM_TOP_P"]
+        self._system_prompt = config["SYSTEM_PROMPT"]
+        self._tools_max_rounds = config["TOOLS_MAX_ROUNDS"]
+        self._rag = rag_service
+        self._union_service = union_service
+    # ========== 过滤 ==========
+    def _check_question_valid(self, question: str) -> bool:
+        if not self._filter_enabled or not self._allowed_keywords:
+            return True
+        lower_question = question.lower()
+        return any(k.lower() in lower_question for k in self._allowed_keywords)
+    # ========== LLM ==========
+    def _build_messages(
+        self,
+        history: List[Dict[str, str]],
+        user_question: str,
+    ) -> List[Dict[str, str]]:
+        messages = []
+        if self._system_prompt:
+            messages.append({"role": "system", "content": self._system_prompt})
+        messages.extend(history)
+        messages.append({"role": "user", "content": user_question})
+        return messages
+    def _build_tool_messages(self, history: List[Dict[str, str]], user_question: str) -> List[Dict[str, str]]:
+        messages = self._build_messages(history, user_question)
+        insert_at = 1 if messages and messages[0].get("role") == "system" else 0
+        messages.insert(insert_at, {"role": "system", "content": TOOL_ROUTING_PROMPT})
+        return messages
+    def tool_call_stream(
+        self,
+        conversation_id: str,
+        question: str,
+        tools,
+        history: List[Dict[str, str]],
+        jsessionid: str,
+    ) -> Generator[ChatResponse, None, None]:
+        """
+        带工具调用的流式对话（支持交错思考与工具调用）
+        - stream=True + tool_stream=True：模型在流式输出中同时返回推理过程、回答内容与工具调用
+        - 工具执行结果回传模型后继续流式生成，循环直至模型不再调用工具或达到最大轮次
+        """
+        if not self._check_question_valid(question):
+            logger.info(f"问题未通过业务过滤。conversation_id={conversation_id} question={_preview(question, 120)}")
+            yield ChatResponse(
+                conversationId=conversation_id,
+                content=self._rejection_message,
+                finish_reason="rejected",
+            )
+            return
+        def content_event(content: str) -> ChatResponse:
+            return ChatResponse(conversationId=conversation_id, content=content)
+        def reasoning_event(reasoning_content: str) -> ChatResponse:
+            return ChatResponse(conversationId=conversation_id, reasoning_content=reasoning_content)
+        def tool_call_event(tool_call: str) -> ChatResponse:
+            return ChatResponse(conversationId=conversation_id, tool_call=tool_call)
+        def tool_result_event(tool_result: str) -> ChatResponse:
+            return ChatResponse(conversationId=conversation_id, tool_result=tool_result)
+        try:
+            messages = self._build_tool_messages(history, question)
+            max_rounds = self._tools_max_rounds
+            final_answer = ""
+            logger.info(f"开始模型流式调用。conversation_id={conversation_id} model={self._model} question={_preview(question, 120)}")
+            for round_idx in range(max_rounds):
+                response = self._client.chat.completions.create(
+                    model=self._model,
+                    messages=messages,
+                    tools=tools,
+                    tool_choice="auto",
+                    stream=True,
+                    tool_stream=True,
+                    thinking={"type": "enabled", "clear_thinking": False},
+                    max_tokens=self._max_tokens,
+                    temperature=self._temperature,
+                    top_p=self._top_p,
+                )
+                current_content = ""
+                current_reasoning = ""
+                reasoning_len = 0
+                tool_calls_map: Dict[int, Dict] = {}
+                for chunk in response:
+                    if not chunk.choices:
+                        continue
+                    delta = chunk.choices[0].delta
+                    reasoning = getattr(delta, "reasoning_content", None)
+                    if reasoning:
+                        current_reasoning += reasoning
+                        reasoning_len += len(reasoning)
+                        yield reasoning_event(reasoning)
+                    content = getattr(delta, "content", None)
+                    if content:
+                        current_content += content
+                        yield content_event(content)
+                    for tc in getattr(delta, "tool_calls", None) or []:
+                        self._merge_tool_call_delta(tool_calls_map, tc)
+                logger.info(f"模型流式返回完成。conversation_id={conversation_id} round={round_idx + 1} content_chars={len(current_content)} reasoning_chars={reasoning_len} tool_calls={len(tool_calls_map)} content_preview={_preview(current_content)}")
+                if not tool_calls_map:
+                    final_answer = current_content
+                    break
+                assistant_tool_calls = [tool_calls_map[i] for i in sorted(tool_calls_map)]
+                messages.append({
+                    "role": "assistant",
+                    "content": current_content or None,
+                    "reasoning_content": current_reasoning,
+                    "tool_calls": assistant_tool_calls,
+                })
+                for tc in assistant_tool_calls:
+                    name = tc["function"]["name"]
+                    args = tc["function"]["arguments"]
+                    logger.info(f"执行工具调用。conversation_id={conversation_id} tool={name} args={_preview(args, 200)}")
+                    yield tool_call_event(f"\n[调用工具: {name}({args})]\n")
+                    tool_context = ToolContext(
+                        union_service=self._union_service,
+                        rag_service=self._rag,
+                        jsessionid=jsessionid,
+                    )
+                    result = call_function(name, args, tool_context)
+                    logger.info(f"工具调用完成。conversation_id={conversation_id} tool={name} result_preview={_preview(result, 300)}")
+                    yield tool_result_event(result)
+                    messages.append({
+                        "role": "tool",
+                        "content": result,
+                        "tool_call_id": tc["id"],
+                    })
+            logger.info(f"对话完成。conversation_id={conversation_id} final_answer_chars={len(final_answer)} final_answer_preview={_preview(final_answer)}")
+            yield ChatResponse(conversationId=conversation_id, finish_reason="stop")
+        except Exception as e:
+            logger.exception(f"模型调用异常。conversation_id={conversation_id} question={_preview(question, 120)}")
+            yield ChatResponse(
+                conversationId=conversation_id,
+                content=f"[错误] 模型调用异常: {str(e)}",
+                finish_reason="error",
+            )
+    @staticmethod
+    def _merge_tool_call_delta(tool_calls_map: Dict[int, Dict], tc) -> None:
+        """将单个流式 tool_call 增量按 index 合并到累积字典中"""
+        slot = tool_calls_map.setdefault(tc.index, {
+            "id": "",
+            "type": "function",
+            "function": {"name": "", "arguments": ""},
+        })
+        if tc.id:
+            slot["id"] = tc.id
+        fn = getattr(tc, "function", None)
+        if fn is not None:
+            if getattr(fn, "name", None):
+                slot["function"]["name"] += fn.name
+            if getattr(fn, "arguments", None):
+                slot["function"]["arguments"] += fn.arguments

package/app/service/llm_service.py ADDED Viewed

@@ -0,0 +1,214 @@
+from __future__ import annotations
+import os
+import httpx
+import json
+from typing import Optional, Generator, Dict, Any, List
+from openai import OpenAI
+from loguru import logger
+from app.utils import common_utils
+class LLMService:
+    # 常量定义
+    DEFAULT_TEMPERATURE = 0.7
+    DEFAULT_MAX_TOKENS = 30000
+    STREAM_MAX_TOKENS = 30000
+    JSON_RESPONSE_FORMAT = {"type": "json_object"}
+    def __init__(self):
+        """初始化LLM服务实例"""
+        self._client: Optional[OpenAI] = None
+        self._model_name: Optional[str] = None
+    def initialize(self, config) -> None:
+        """初始化LLM客户端"""
+        if self._client is not None:
+            logger.debug("LLM客户端已经初始化，跳过重复初始化")
+            return
+        llm_url = config["LLM_URL"]
+        llm_key = config["LLM_KEY"]
+        self._model_name = config["LLM_MODEL"]
+        if not llm_url:
+            error_msg = "LLM_URL 配置为空，请检查环境变量配置"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg)
+        if not llm_key:
+            error_msg = "LLM_KEY 配置为空，请检查环境变量配置"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg)
+        try:
+            self._client = self._create_client(llm_url, llm_key)
+            logger.info(f"LLM客户端初始化成功: {llm_url}")
+        except Exception as e:
+            error_msg = f"LLM客户端初始化失败: {str(e)}"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg) from e
+    @property
+    def model_name(self) -> str:
+        """获取模型名称"""
+        return self._model_name
+    def _create_client(self, llm_url: str, llm_key: str) -> OpenAI:
+        return OpenAI(
+            base_url=llm_url,
+            api_key=llm_key,
+            http_client=httpx.Client(verify=False)
+        )
+    def _ensure_client(self) -> None:
+        """
+        确保客户端已初始化
+        Raises:
+            RuntimeError: 当客户端未初始化时
+        """
+        if self._client is None:
+            raise RuntimeError("LLM客户端未初始化，请先调用 initialize() 方法")
+    def _build_messages(
+        self,
+        system_prompt: str,
+        user_prompt: Optional[str] = None,
+        query_text: Optional[str] = None,
+        data_json: Optional[str] = None
+    ) -> List[Dict[str, str]]:
+        """
+        构建LLM消息列表
+        Args:
+            system_prompt: 系统提示词
+            user_prompt: 用户提示词模板（可选）
+            query_text: 查询文本（可选）
+            data_json: 数据JSON字符串（可选）
+        Returns:
+            消息列表
+        """
+        messages = [{"role": "system", "content": system_prompt}]
+        if user_prompt:
+            # 将用户模板中的占位符替换为实际值
+            content = user_prompt
+            if query_text is not None:
+                content = content.replace("{query_text}", str(query_text))
+            if data_json is not None:
+                content = content.replace("{data_json}", str(data_json))
+            messages.append({"role": "user", "content": content})
+        elif query_text:
+            messages.append({"role": "user", "content": query_text})
+        else:
+            messages.append({"role": "user", "content": system_prompt})
+        return messages
+    def execute_llm(
+        self,
+        system_prompt: str,
+        user_prompt: Optional[str] = None,
+        query_text: Optional[str] = None,
+        data_json: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        use_json_format: bool = False
+    ) -> Optional[Dict[str, Any] | str]:
+        """
+        通用非流式LLM执行方法
+        Args:
+            system_prompt: 系统提示词
+            user_prompt: 用户提示词模板（可选）
+            query_text: 查询文本（可选）
+            data_json: 数据JSON字符串（可选）
+            temperature: 温度参数，默认为 DEFAULT_TEMPERATURE
+            max_tokens: 最大token数，默认为 DEFAULT_MAX_TOKENS
+            use_json_format: 是否使用JSON格式响应
+        Returns:
+            解析后的结果字典或原始字符串，失败返回None
+        """
+        self._ensure_client()
+        try:
+            messages = self._build_messages(system_prompt, user_prompt, query_text, data_json)
+            request_params = {
+                "model": self.model_name,
+                "messages": messages,
+                "temperature": temperature or self.DEFAULT_TEMPERATURE,
+                "max_tokens": max_tokens or self.DEFAULT_MAX_TOKENS
+            }
+            if use_json_format:
+                request_params["response_format"] = self.JSON_RESPONSE_FORMAT
+            llm_response = self._client.chat.completions.create(**request_params)
+            content = common_utils.remove_think_tag(llm_response.choices[0].message.content)
+            if use_json_format:
+                result = json.loads(content)
+                logger.debug(f"LLM JSON响应成功")
+                return result
+            else:
+                logger.debug("LLM文本响应成功")
+                return content
+        except Exception as e:
+            logger.error(f"LLM调用失败: {e.args}")
+            return None
+    def execute_llm_stream(
+        self,
+        system_prompt: str,
+        user_prompt: Optional[str] = None,
+        query_text: Optional[str] = None,
+        data_json: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None
+    ) -> Generator[str, None, None]:
+        """
+        通用流式LLM执行方法
+        Args:
+            system_prompt: 系统提示词
+            user_prompt: 用户提示词模板（可选）
+            query_text: 查询文本（可选）
+            data_json: 数据JSON字符串（可选）
+            temperature: 温度参数，默认为 DEFAULT_TEMPERATURE
+            max_tokens: 最大token数，默认为 STREAM_MAX_TOKENS
+        Yields:
+            LLM流式响应内容片段
+        """
+        self._ensure_client()
+        logger.debug("开始LLM流式调用")
+        try:
+            messages = self._build_messages(system_prompt, user_prompt, query_text, data_json)
+            logger.info(f"LLM请求: {messages[:2000]}...")
+            llm_stream_response = self._client.chat.completions.create(
+                model=self.model_name,
+                messages=messages,
+                temperature=temperature or self.DEFAULT_TEMPERATURE,
+                stream=True,
+                max_tokens=max_tokens or self.STREAM_MAX_TOKENS
+            )
+            for chunk in llm_stream_response:
+                content = getattr(chunk.choices[0].delta, 'content', None) or getattr(chunk.choices[0].delta, 'reasoning_content', None) or ''
+                if content:
+                    logger.info(f"LLM流式响应: {content[:50]}...")
+                    yield content
+        except Exception as e:
+            error_msg = f"[ERROR] llm 调用失败：{str(e)}"
+            logger.error(error_msg)
+            yield error_msg
+# 全局单例实例
+llm_service = LLMService()