flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,256 @@
1
+ """
2
+ OpenAI 兼容 API 客户端
3
+
4
+ 支持 OpenAI、vLLM、通义千问、DeepSeek 等兼容 OpenAI API 的服务。
5
+ """
6
+
7
+ from typing import TYPE_CHECKING, List, Optional, Union
8
+
9
+ from loguru import logger
10
+
11
+ from .base_client import LLMClientBase
12
+ from .cache import ResponseCacheConfig
13
+
14
+ if TYPE_CHECKING:
15
+ from .async_api.interface import RequestResult
16
+
17
+
18
+ class OpenAIClient(LLMClientBase):
19
+ """
20
+ OpenAI 兼容 API 客户端
21
+
22
+ 支持 OpenAI、vLLM、Ollama、DeepSeek 等兼容 OpenAI API 的服务。
23
+
24
+ Example:
25
+ >>> client = OpenAIClient(
26
+ ... base_url="https://api.openai.com/v1",
27
+ ... api_key="your-key",
28
+ ... model="gpt-4",
29
+ ... )
30
+ >>> result = await client.chat_completions(messages)
31
+
32
+ Example (Ollama/vLLM 本地模型):
33
+ >>> client = OpenAIClient(
34
+ ... base_url="http://localhost:11434/v1", # Ollama
35
+ ... model="qwen3:4b",
36
+ ... )
37
+
38
+ Example (thinking 参数 - 统一的思考控制):
39
+ >>> # 禁用思考(快速响应)
40
+ >>> result = client.chat_completions_sync(
41
+ ... messages=[{"role": "user", "content": "1+1=?"}],
42
+ ... thinking=False,
43
+ ... )
44
+ >>> # 启用思考并获取思考内容
45
+ >>> result = client.chat_completions_sync(
46
+ ... messages=[{"role": "user", "content": "1+1=?"}],
47
+ ... thinking=True,
48
+ ... return_raw=True,
49
+ ... )
50
+ >>> parsed = OpenAIClient.parse_thoughts(result.data)
51
+ >>> print("思考:", parsed["thought"])
52
+ >>> print("答案:", parsed["answer"])
53
+
54
+ thinking 参数值:
55
+ - False: 禁用思考(Ollama: think=False, vLLM/Qwen3: /no_think)
56
+ - True: 启用思考(Ollama: think=True)
57
+ - None: 使用模型默认行为
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ base_url: str,
63
+ api_key: str = "EMPTY",
64
+ model: str = None,
65
+ concurrency_limit: int = 10,
66
+ max_qps: int = 1000,
67
+ timeout: int = 100,
68
+ retry_times: int = 3,
69
+ retry_delay: float = 0.55,
70
+ cache_image: bool = False,
71
+ cache_dir: str = "image_cache",
72
+ cache: Optional[ResponseCacheConfig] = None,
73
+ **kwargs,
74
+ ):
75
+ super().__init__(
76
+ base_url=base_url,
77
+ api_key=api_key,
78
+ model=model,
79
+ concurrency_limit=concurrency_limit,
80
+ max_qps=max_qps,
81
+ timeout=timeout,
82
+ retry_times=retry_times,
83
+ retry_delay=retry_delay,
84
+ cache_image=cache_image,
85
+ cache_dir=cache_dir,
86
+ cache=cache,
87
+ **kwargs,
88
+ )
89
+ self._headers = {
90
+ "Content-Type": "application/json",
91
+ "Authorization": f"Bearer {api_key}",
92
+ }
93
+
94
+ # ========== 实现基类核心方法 ==========
95
+
96
+ def _get_url(self, model: str, stream: bool = False) -> str:
97
+ return f"{self._base_url}/chat/completions"
98
+
99
+ def _get_headers(self) -> dict:
100
+ return self._headers
101
+
102
+ def _build_request_body(
103
+ self,
104
+ messages: List[dict],
105
+ model: str,
106
+ stream: bool = False,
107
+ max_tokens: int = None,
108
+ thinking: Union[bool, None] = None,
109
+ **kwargs,
110
+ ) -> dict:
111
+ """
112
+ 构建请求体
113
+
114
+ Args:
115
+ thinking: 统一的思考控制参数
116
+ - False: 禁用思考(Ollama: think=False, vLLM/Qwen3: /no_think)
117
+ - True: 启用思考(Ollama: think=True)
118
+ - None: 使用模型默认行为
119
+ """
120
+ processed_messages = messages
121
+
122
+ # 禁用思考时,添加 /no_think 标签(vLLM/Qwen3 格式)
123
+ if thinking is False:
124
+ processed_messages = [m.copy() for m in messages]
125
+ for i in range(len(processed_messages) - 1, -1, -1):
126
+ if processed_messages[i].get("role") == "user":
127
+ content = processed_messages[i].get("content", "")
128
+ if isinstance(content, str) and "/no_think" not in content:
129
+ processed_messages[i]["content"] = content + " /no_think"
130
+ break
131
+
132
+ body = {"messages": processed_messages, "model": model, "stream": stream}
133
+ if max_tokens is not None:
134
+ body["max_tokens"] = max_tokens
135
+
136
+ # Ollama 格式:think 参数
137
+ if thinking is True:
138
+ body["think"] = True
139
+ elif thinking is False:
140
+ body["think"] = False
141
+
142
+ body.update(kwargs)
143
+ return body
144
+
145
+ def _extract_content(self, response_data: dict) -> Optional[str]:
146
+ try:
147
+ return response_data["choices"][0]["message"]["content"]
148
+ except (KeyError, IndexError) as e:
149
+ logger.warning(f"Failed to extract content: {e}")
150
+ return None
151
+
152
+ def _extract_stream_content(self, data: dict) -> Optional[str]:
153
+ try:
154
+ if "choices" in data and len(data["choices"]) > 0:
155
+ return data["choices"][0].get("delta", {}).get("content")
156
+ except Exception:
157
+ pass
158
+ return None
159
+
160
+ def _extract_tool_calls(self, response_data: dict):
161
+ """提取 OpenAI 格式的 tool_calls"""
162
+ from .base_client import ToolCall
163
+
164
+ if not response_data:
165
+ return None
166
+
167
+ try:
168
+ message = response_data["choices"][0]["message"]
169
+ tool_calls_data = message.get("tool_calls")
170
+ if not tool_calls_data:
171
+ return None
172
+ return [
173
+ ToolCall(
174
+ id=tc["id"],
175
+ type=tc["type"],
176
+ function=tc["function"]
177
+ )
178
+ for tc in tool_calls_data
179
+ ]
180
+ except (KeyError, IndexError):
181
+ return None
182
+
183
+ @staticmethod
184
+ def parse_thoughts(response_data: dict) -> dict:
185
+ """
186
+ 从响应中解析思考内容和答案
187
+
188
+ 支持两种格式:
189
+ 1. reasoning 字段格式(Ollama DeepSeek-R1/Qwen3 等)
190
+ 2. 内嵌标签格式(vLLM Qwen3 等):<think>...</think> 标签
191
+
192
+ Args:
193
+ response_data: 原始响应数据(通过 return_raw=True 获取)
194
+
195
+ Returns:
196
+ dict: {
197
+ "thought": str, # 思考过程(可能为空)
198
+ "answer": str, # 最终答案
199
+ }
200
+
201
+ Example:
202
+ >>> result = client.chat_completions_sync(
203
+ ... messages=[...],
204
+ ... thinking=True,
205
+ ... return_raw=True,
206
+ ... )
207
+ >>> parsed = OpenAIClient.parse_thoughts(result.data)
208
+ >>> print("思考:", parsed["thought"])
209
+ >>> print("答案:", parsed["answer"])
210
+ """
211
+ import re
212
+
213
+ try:
214
+ message = response_data.get("choices", [{}])[0].get("message", {})
215
+ content = message.get("content", "")
216
+ reasoning = message.get("reasoning", "")
217
+
218
+ # 如果有 reasoning 字段,直接使用
219
+ if reasoning:
220
+ return {
221
+ "thought": reasoning,
222
+ "answer": content,
223
+ }
224
+
225
+ # 否则尝试解析内嵌的 <think>...</think> 标签(Qwen3 格式)
226
+ think_match = re.search(r"<think>(.*?)</think>", content, re.DOTALL)
227
+ if think_match:
228
+ thought = think_match.group(1).strip()
229
+ # 移除 <think> 标签后的内容作为答案
230
+ answer = re.sub(r"<think>.*?</think>\s*", "", content, flags=re.DOTALL).strip()
231
+ return {
232
+ "thought": thought,
233
+ "answer": answer,
234
+ }
235
+
236
+ # 没有思考内容
237
+ return {
238
+ "thought": "",
239
+ "answer": content,
240
+ }
241
+ except Exception as e:
242
+ logger.warning(f"Failed to parse thoughts: {e}")
243
+ return {"thought": "", "answer": ""}
244
+
245
+ # ========== OpenAI 特有方法 ==========
246
+
247
+ def model_list(self) -> List[str]:
248
+ """获取可用模型列表"""
249
+ import requests
250
+
251
+ response = requests.get(
252
+ f"{self._base_url}/models",
253
+ headers={"Authorization": f"Bearer {self._api_key}"},
254
+ )
255
+ response.raise_for_status()
256
+ return [m["id"] for m in response.json()["data"]]
@@ -0,0 +1,104 @@
1
+ """
2
+ 模型定价模块
3
+
4
+ 提供模型定价数据加载和成本估算功能。
5
+ 定价数据存储在 data.json 中,可通过 `flexllm pricing --update` 更新。
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Dict, Optional
11
+
12
+ # 定价文件路径
13
+ PRICING_FILE = Path(__file__).parent / "data.json"
14
+
15
+ # 缓存的定价数据
16
+ _pricing_cache: Optional[Dict[str, Dict[str, float]]] = None
17
+
18
+
19
+ def _load_pricing() -> Dict[str, Dict[str, float]]:
20
+ """
21
+ 从 data.json 加载定价数据
22
+
23
+ Returns:
24
+ {model_name: {"input": price_per_token, "output": price_per_token}}
25
+ """
26
+ if not PRICING_FILE.exists():
27
+ return {}
28
+
29
+ try:
30
+ with open(PRICING_FILE, "r", encoding="utf-8") as f:
31
+ data = json.load(f)
32
+
33
+ models = data.get("models", {})
34
+ # 将 $/1M tokens 转换为 $/token
35
+ return {
36
+ name: {"input": p["input"] / 1e6, "output": p["output"] / 1e6}
37
+ for name, p in models.items()
38
+ }
39
+ except (json.JSONDecodeError, KeyError, TypeError):
40
+ return {}
41
+
42
+
43
+ def get_pricing() -> Dict[str, Dict[str, float]]:
44
+ """获取定价数据(带缓存)"""
45
+ global _pricing_cache
46
+ if _pricing_cache is None:
47
+ _pricing_cache = _load_pricing()
48
+ return _pricing_cache
49
+
50
+
51
+ def reload_pricing():
52
+ """重新加载定价数据(用于更新后刷新)"""
53
+ global _pricing_cache
54
+ _pricing_cache = _load_pricing()
55
+
56
+
57
+ def get_model_pricing(model: str) -> Optional[Dict[str, float]]:
58
+ """
59
+ 获取指定模型的定价
60
+
61
+ Args:
62
+ model: 模型名称(支持模糊匹配)
63
+
64
+ Returns:
65
+ {"input": price_per_token, "output": price_per_token} 或 None
66
+ """
67
+ pricing = get_pricing()
68
+
69
+ # 精确匹配
70
+ if model in pricing:
71
+ return pricing[model]
72
+
73
+ # 模糊匹配:检查模型名是否包含在 key 中
74
+ model_lower = model.lower()
75
+ for key in pricing:
76
+ if model_lower in key.lower():
77
+ return pricing[key]
78
+
79
+ return None
80
+
81
+
82
+ def estimate_cost(
83
+ input_tokens: int,
84
+ output_tokens: int = 0,
85
+ model: str = "gpt-4o"
86
+ ) -> float:
87
+ """
88
+ 估算 API 调用成本
89
+
90
+ Args:
91
+ input_tokens: 输入 token 数
92
+ output_tokens: 输出 token 数
93
+ model: 模型名称
94
+
95
+ Returns:
96
+ 估算成本 (美元)
97
+ """
98
+ pricing = get_model_pricing(model)
99
+
100
+ if not pricing:
101
+ # 默认使用 gpt-4o-mini 的价格
102
+ pricing = get_model_pricing("gpt-4o-mini") or {"input": 0.15e-6, "output": 0.6e-6}
103
+
104
+ return input_tokens * pricing["input"] + output_tokens * pricing["output"]