PyPI - flexllm - Versions diffs - 0.3.3__py3-none-any.whl - Mend

flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

flexllm/__init__.py +224 -0
flexllm/__main__.py +1096 -0
flexllm/async_api/__init__.py +9 -0
flexllm/async_api/concurrent_call.py +100 -0
flexllm/async_api/concurrent_executor.py +1036 -0
flexllm/async_api/core.py +373 -0
flexllm/async_api/interface.py +12 -0
flexllm/async_api/progress.py +277 -0
flexllm/base_client.py +988 -0
flexllm/batch_tools/__init__.py +16 -0
flexllm/batch_tools/folder_processor.py +317 -0
flexllm/batch_tools/table_processor.py +363 -0
flexllm/cache/__init__.py +10 -0
flexllm/cache/response_cache.py +293 -0
flexllm/chain_of_thought_client.py +1120 -0
flexllm/claudeclient.py +402 -0
flexllm/client_pool.py +698 -0
flexllm/geminiclient.py +563 -0
flexllm/llm_client.py +523 -0
flexllm/llm_parser.py +60 -0
flexllm/mllm_client.py +559 -0
flexllm/msg_processors/__init__.py +174 -0
flexllm/msg_processors/image_processor.py +729 -0
flexllm/msg_processors/image_processor_helper.py +485 -0
flexllm/msg_processors/messages_processor.py +341 -0
flexllm/msg_processors/unified_processor.py +1404 -0
flexllm/openaiclient.py +256 -0
flexllm/pricing/__init__.py +104 -0
flexllm/pricing/data.json +1201 -0
flexllm/pricing/updater.py +223 -0
flexllm/provider_router.py +213 -0
flexllm/token_counter.py +270 -0
flexllm/utils/__init__.py +1 -0
flexllm/utils/core.py +41 -0
flexllm-0.3.3.dist-info/METADATA +573 -0
flexllm-0.3.3.dist-info/RECORD +39 -0
flexllm-0.3.3.dist-info/WHEEL +4 -0
flexllm-0.3.3.dist-info/entry_points.txt +3 -0
flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0

flexllm/openaiclient.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""
+OpenAI 兼容 API 客户端
+支持 OpenAI、vLLM、通义千问、DeepSeek 等兼容 OpenAI API 的服务。
+"""
+from typing import TYPE_CHECKING, List, Optional, Union
+from loguru import logger
+from .base_client import LLMClientBase
+from .cache import ResponseCacheConfig
+if TYPE_CHECKING:
+    from .async_api.interface import RequestResult
+class OpenAIClient(LLMClientBase):
+    """
+    OpenAI 兼容 API 客户端
+    支持 OpenAI、vLLM、Ollama、DeepSeek 等兼容 OpenAI API 的服务。
+    Example:
+        >>> client = OpenAIClient(
+        ...     base_url="https://api.openai.com/v1",
+        ...     api_key="your-key",
+        ...     model="gpt-4",
+        ... )
+        >>> result = await client.chat_completions(messages)
+    Example (Ollama/vLLM 本地模型):
+        >>> client = OpenAIClient(
+        ...     base_url="http://localhost:11434/v1",  # Ollama
+        ...     model="qwen3:4b",
+        ... )
+    Example (thinking 参数 - 统一的思考控制):
+        >>> # 禁用思考（快速响应）
+        >>> result = client.chat_completions_sync(
+        ...     messages=[{"role": "user", "content": "1+1=?"}],
+        ...     thinking=False,
+        ... )
+        >>> # 启用思考并获取思考内容
+        >>> result = client.chat_completions_sync(
+        ...     messages=[{"role": "user", "content": "1+1=?"}],
+        ...     thinking=True,
+        ...     return_raw=True,
+        ... )
+        >>> parsed = OpenAIClient.parse_thoughts(result.data)
+        >>> print("思考:", parsed["thought"])
+        >>> print("答案:", parsed["answer"])
+    thinking 参数值:
+        - False: 禁用思考（Ollama: think=False, vLLM/Qwen3: /no_think）
+        - True: 启用思考（Ollama: think=True）
+        - None: 使用模型默认行为
+    """
+    def __init__(
+        self,
+        base_url: str,
+        api_key: str = "EMPTY",
+        model: str = None,
+        concurrency_limit: int = 10,
+        max_qps: int = 1000,
+        timeout: int = 100,
+        retry_times: int = 3,
+        retry_delay: float = 0.55,
+        cache_image: bool = False,
+        cache_dir: str = "image_cache",
+        cache: Optional[ResponseCacheConfig] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            base_url=base_url,
+            api_key=api_key,
+            model=model,
+            concurrency_limit=concurrency_limit,
+            max_qps=max_qps,
+            timeout=timeout,
+            retry_times=retry_times,
+            retry_delay=retry_delay,
+            cache_image=cache_image,
+            cache_dir=cache_dir,
+            cache=cache,
+            **kwargs,
+        )
+        self._headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        }
+    # ========== 实现基类核心方法 ==========
+    def _get_url(self, model: str, stream: bool = False) -> str:
+        return f"{self._base_url}/chat/completions"
+    def _get_headers(self) -> dict:
+        return self._headers
+    def _build_request_body(
+        self,
+        messages: List[dict],
+        model: str,
+        stream: bool = False,
+        max_tokens: int = None,
+        thinking: Union[bool, None] = None,
+        **kwargs,
+    ) -> dict:
+        """
+        构建请求体
+        Args:
+            thinking: 统一的思考控制参数
+                - False: 禁用思考（Ollama: think=False, vLLM/Qwen3: /no_think）
+                - True: 启用思考（Ollama: think=True）
+                - None: 使用模型默认行为
+        """
+        processed_messages = messages
+        # 禁用思考时，添加 /no_think 标签（vLLM/Qwen3 格式）
+        if thinking is False:
+            processed_messages = [m.copy() for m in messages]
+            for i in range(len(processed_messages) - 1, -1, -1):
+                if processed_messages[i].get("role") == "user":
+                    content = processed_messages[i].get("content", "")
+                    if isinstance(content, str) and "/no_think" not in content:
+                        processed_messages[i]["content"] = content + " /no_think"
+                    break
+        body = {"messages": processed_messages, "model": model, "stream": stream}
+        if max_tokens is not None:
+            body["max_tokens"] = max_tokens
+        # Ollama 格式：think 参数
+        if thinking is True:
+            body["think"] = True
+        elif thinking is False:
+            body["think"] = False
+        body.update(kwargs)
+        return body
+    def _extract_content(self, response_data: dict) -> Optional[str]:
+        try:
+            return response_data["choices"][0]["message"]["content"]
+        except (KeyError, IndexError) as e:
+            logger.warning(f"Failed to extract content: {e}")
+            return None
+    def _extract_stream_content(self, data: dict) -> Optional[str]:
+        try:
+            if "choices" in data and len(data["choices"]) > 0:
+                return data["choices"][0].get("delta", {}).get("content")
+        except Exception:
+            pass
+        return None
+    def _extract_tool_calls(self, response_data: dict):
+        """提取 OpenAI 格式的 tool_calls"""
+        from .base_client import ToolCall
+        if not response_data:
+            return None
+        try:
+            message = response_data["choices"][0]["message"]
+            tool_calls_data = message.get("tool_calls")
+            if not tool_calls_data:
+                return None
+            return [
+                ToolCall(
+                    id=tc["id"],
+                    type=tc["type"],
+                    function=tc["function"]
+                )
+                for tc in tool_calls_data
+            ]
+        except (KeyError, IndexError):
+            return None
+    @staticmethod
+    def parse_thoughts(response_data: dict) -> dict:
+        """
+        从响应中解析思考内容和答案
+        支持两种格式：
+        1. reasoning 字段格式（Ollama DeepSeek-R1/Qwen3 等）
+        2. 内嵌标签格式（vLLM Qwen3 等）：<think>...</think> 标签
+        Args:
+            response_data: 原始响应数据（通过 return_raw=True 获取）
+        Returns:
+            dict: {
+                "thought": str,  # 思考过程（可能为空）
+                "answer": str,   # 最终答案
+            }
+        Example:
+            >>> result = client.chat_completions_sync(
+            ...     messages=[...],
+            ...     thinking=True,
+            ...     return_raw=True,
+            ... )
+            >>> parsed = OpenAIClient.parse_thoughts(result.data)
+            >>> print("思考:", parsed["thought"])
+            >>> print("答案:", parsed["answer"])
+        """
+        import re
+        try:
+            message = response_data.get("choices", [{}])[0].get("message", {})
+            content = message.get("content", "")
+            reasoning = message.get("reasoning", "")
+            # 如果有 reasoning 字段，直接使用
+            if reasoning:
+                return {
+                    "thought": reasoning,
+                    "answer": content,
+                }
+            # 否则尝试解析内嵌的 <think>...</think> 标签（Qwen3 格式）
+            think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
+            if think_match:
+                thought = think_match.group(1).strip()
+                # 移除 <think> 标签后的内容作为答案
+                answer = re.sub(r"<think>.*?</think>\s*", "", content, flags=re.DOTALL).strip()
+                return {
+                    "thought": thought,
+                    "answer": answer,
+                }
+            # 没有思考内容
+            return {
+                "thought": "",
+                "answer": content,
+            }
+        except Exception as e:
+            logger.warning(f"Failed to parse thoughts: {e}")
+            return {"thought": "", "answer": ""}
+    # ========== OpenAI 特有方法 ==========
+    def model_list(self) -> List[str]:
+        """获取可用模型列表"""
+        import requests
+        response = requests.get(
+            f"{self._base_url}/models",
+            headers={"Authorization": f"Bearer {self._api_key}"},
+        )
+        response.raise_for_status()
+        return [m["id"] for m in response.json()["data"]]

flexllm/pricing/__init__.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""
+模型定价模块
+提供模型定价数据加载和成本估算功能。
+定价数据存储在 data.json 中，可通过 `flexllm pricing --update` 更新。
+"""
+import json
+from pathlib import Path
+from typing import Dict, Optional
+# 定价文件路径
+PRICING_FILE = Path(__file__).parent / "data.json"
+# 缓存的定价数据
+_pricing_cache: Optional[Dict[str, Dict[str, float]]] = None
+def _load_pricing() -> Dict[str, Dict[str, float]]:
+    """
+    从 data.json 加载定价数据
+    Returns:
+        {model_name: {"input": price_per_token, "output": price_per_token}}
+    """
+    if not PRICING_FILE.exists():
+        return {}
+    try:
+        with open(PRICING_FILE, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        models = data.get("models", {})
+        # 将 $/1M tokens 转换为 $/token
+        return {
+            name: {"input": p["input"] / 1e6, "output": p["output"] / 1e6}
+            for name, p in models.items()
+        }
+    except (json.JSONDecodeError, KeyError, TypeError):
+        return {}
+def get_pricing() -> Dict[str, Dict[str, float]]:
+    """获取定价数据（带缓存）"""
+    global _pricing_cache
+    if _pricing_cache is None:
+        _pricing_cache = _load_pricing()
+    return _pricing_cache
+def reload_pricing():
+    """重新加载定价数据（用于更新后刷新）"""
+    global _pricing_cache
+    _pricing_cache = _load_pricing()
+def get_model_pricing(model: str) -> Optional[Dict[str, float]]:
+    """
+    获取指定模型的定价
+    Args:
+        model: 模型名称（支持模糊匹配）
+    Returns:
+        {"input": price_per_token, "output": price_per_token} 或 None
+    """
+    pricing = get_pricing()
+    # 精确匹配
+    if model in pricing:
+        return pricing[model]
+    # 模糊匹配：检查模型名是否包含在 key 中
+    model_lower = model.lower()
+    for key in pricing:
+        if model_lower in key.lower():
+            return pricing[key]
+    return None
+def estimate_cost(
+    input_tokens: int,
+    output_tokens: int = 0,
+    model: str = "gpt-4o"
+) -> float:
+    """
+    估算 API 调用成本
+    Args:
+        input_tokens: 输入 token 数
+        output_tokens: 输出 token 数
+        model: 模型名称
+    Returns:
+        估算成本 (美元)
+    """
+    pricing = get_model_pricing(model)
+    if not pricing:
+        # 默认使用 gpt-4o-mini 的价格
+        pricing = get_model_pricing("gpt-4o-mini") or {"input": 0.15e-6, "output": 0.6e-6}
+    return input_tokens * pricing["input"] + output_tokens * pricing["output"]