flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,402 @@
1
+ """
2
+ Anthropic Claude API Client
3
+
4
+ 支持 Claude 系列模型(claude-3-opus, claude-3-sonnet, claude-3-haiku 等)
5
+ """
6
+
7
+ import json
8
+ import re
9
+ from typing import List, Optional, Union
10
+
11
+ import aiohttp
12
+ from loguru import logger
13
+
14
+ from .base_client import LLMClientBase, ToolCall
15
+ from .cache import ResponseCacheConfig
16
+
17
+
18
+ class ClaudeClient(LLMClientBase):
19
+ """
20
+ Anthropic Claude API 客户端
21
+
22
+ Example:
23
+ >>> client = ClaudeClient(
24
+ ... api_key="your-anthropic-key",
25
+ ... model="claude-3-5-sonnet-20241022",
26
+ ... )
27
+ >>> result = await client.chat_completions(messages)
28
+
29
+ Example (thinking 参数 - 扩展思考模式):
30
+ >>> # 启用扩展思考
31
+ >>> result = client.chat_completions_sync(
32
+ ... messages=[{"role": "user", "content": "复杂推理问题"}],
33
+ ... thinking=True,
34
+ ... return_raw=True,
35
+ ... )
36
+ >>> parsed = ClaudeClient.parse_thoughts(result.data)
37
+ >>> print("思考:", parsed["thought"])
38
+ >>> print("答案:", parsed["answer"])
39
+
40
+ thinking 参数值:
41
+ - False: 禁用扩展思考
42
+ - True: 启用扩展思考(默认 budget_tokens=10000)
43
+ - int: 启用扩展思考并指定 budget_tokens
44
+ - None: 使用模型默认行为
45
+ """
46
+
47
+ DEFAULT_BASE_URL = "https://api.anthropic.com/v1"
48
+ DEFAULT_API_VERSION = "2023-06-01"
49
+
50
+ def __init__(
51
+ self,
52
+ api_key: str,
53
+ model: str = None,
54
+ base_url: str = None,
55
+ api_version: str = None,
56
+ concurrency_limit: int = 10,
57
+ max_qps: int = 60,
58
+ timeout: int = 120,
59
+ retry_times: int = 3,
60
+ retry_delay: float = 1.0,
61
+ cache_image: bool = False,
62
+ cache_dir: str = "image_cache",
63
+ cache: Optional[ResponseCacheConfig] = None,
64
+ **kwargs,
65
+ ):
66
+ self._api_version = api_version or self.DEFAULT_API_VERSION
67
+
68
+ super().__init__(
69
+ base_url=base_url or self.DEFAULT_BASE_URL,
70
+ api_key=api_key,
71
+ model=model,
72
+ concurrency_limit=concurrency_limit,
73
+ max_qps=max_qps,
74
+ timeout=timeout,
75
+ retry_times=retry_times,
76
+ retry_delay=retry_delay,
77
+ cache_image=cache_image,
78
+ cache_dir=cache_dir,
79
+ cache=cache,
80
+ **kwargs,
81
+ )
82
+
83
+ # ========== 实现基类核心方法 ==========
84
+
85
+ def _get_url(self, model: str, stream: bool = False) -> str:
86
+ return f"{self._base_url}/messages"
87
+
88
+ def _get_headers(self) -> dict:
89
+ return {
90
+ "Content-Type": "application/json",
91
+ "x-api-key": self._api_key,
92
+ "anthropic-version": self._api_version,
93
+ }
94
+
95
+ def _build_request_body(
96
+ self,
97
+ messages: List[dict],
98
+ model: str,
99
+ stream: bool = False,
100
+ max_tokens: int = 4096, # Claude 必需参数
101
+ temperature: float = None,
102
+ top_p: float = None,
103
+ top_k: int = None,
104
+ thinking: Union[bool, int, None] = None,
105
+ **kwargs,
106
+ ) -> dict:
107
+ """
108
+ 构建 Claude API 请求体
109
+
110
+ Args:
111
+ thinking: 扩展思考控制参数
112
+ - False: 禁用扩展思考
113
+ - True: 启用扩展思考(默认 budget_tokens=10000)
114
+ - int: 启用扩展思考并指定 budget_tokens
115
+ - None: 使用模型默认行为
116
+ """
117
+ # 分离 system message
118
+ system_content = None
119
+ user_messages = []
120
+
121
+ for msg in messages:
122
+ if msg.get("role") == "system":
123
+ # 合并多个 system messages
124
+ content = msg.get("content", "")
125
+ if isinstance(content, list):
126
+ content = " ".join(
127
+ p.get("text", "") for p in content if p.get("type") == "text"
128
+ )
129
+ system_content = (
130
+ (system_content + "\n" + content) if system_content else content
131
+ )
132
+ else:
133
+ user_messages.append(self._convert_message(msg))
134
+
135
+ body = {
136
+ "model": model,
137
+ "max_tokens": max_tokens,
138
+ "messages": user_messages,
139
+ }
140
+
141
+ if system_content:
142
+ body["system"] = system_content
143
+ if stream:
144
+ body["stream"] = True
145
+ if temperature is not None:
146
+ body["temperature"] = temperature
147
+ if top_p is not None:
148
+ body["top_p"] = top_p
149
+ if top_k is not None:
150
+ body["top_k"] = top_k
151
+
152
+ # Claude 扩展思考模式
153
+ if thinking is True:
154
+ body["thinking"] = {"type": "enabled", "budget_tokens": 10000}
155
+ elif isinstance(thinking, int) and thinking > 0:
156
+ body["thinking"] = {"type": "enabled", "budget_tokens": thinking}
157
+ elif thinking is False:
158
+ body["thinking"] = {"type": "disabled"}
159
+
160
+ # 透传其他参数(如 tools)
161
+ body.update(kwargs)
162
+ return body
163
+
164
+ def _convert_message(self, msg: dict) -> dict:
165
+ """转换消息格式(处理多模态内容)"""
166
+ role = msg.get("role", "user")
167
+ content = msg.get("content", "")
168
+
169
+ # Claude 格式: role 只能是 "user" 或 "assistant"
170
+ claude_role = "assistant" if role == "assistant" else "user"
171
+
172
+ # 处理多模态内容
173
+ if isinstance(content, list):
174
+ claude_content = []
175
+ for item in content:
176
+ if isinstance(item, str):
177
+ claude_content.append({"type": "text", "text": item})
178
+ elif isinstance(item, dict):
179
+ item_type = item.get("type", "text")
180
+ if item_type == "text":
181
+ claude_content.append(
182
+ {"type": "text", "text": item.get("text", "")}
183
+ )
184
+ elif item_type == "image_url":
185
+ # 转换 OpenAI 图片格式到 Claude 格式
186
+ url = item.get("image_url", {}).get("url", "")
187
+ if url.startswith("data:"):
188
+ # base64 格式
189
+ match = re.match(r"data:([^;]+);base64,(.+)", url)
190
+ if match:
191
+ claude_content.append(
192
+ {
193
+ "type": "image",
194
+ "source": {
195
+ "type": "base64",
196
+ "media_type": match.group(1),
197
+ "data": match.group(2),
198
+ },
199
+ }
200
+ )
201
+ else:
202
+ # URL 格式
203
+ claude_content.append(
204
+ {
205
+ "type": "image",
206
+ "source": {
207
+ "type": "url",
208
+ "url": url,
209
+ },
210
+ }
211
+ )
212
+ return {"role": claude_role, "content": claude_content}
213
+
214
+ return {"role": claude_role, "content": content}
215
+
216
+ def _extract_content(self, response_data: dict) -> Optional[str]:
217
+ """提取 Claude 响应中的文本内容"""
218
+ try:
219
+ content_blocks = response_data.get("content", [])
220
+ texts = []
221
+ for block in content_blocks:
222
+ if block.get("type") == "text":
223
+ texts.append(block.get("text", ""))
224
+ return "".join(texts) if texts else None
225
+ except Exception as e:
226
+ logger.warning(f"Failed to extract content: {e}")
227
+ return None
228
+
229
+ def _extract_usage(self, response_data: dict) -> Optional[dict]:
230
+ """提取 Claude usage 信息并转换为统一格式"""
231
+ if not response_data:
232
+ return None
233
+ usage = response_data.get("usage")
234
+ if not usage:
235
+ return None
236
+ return {
237
+ "prompt_tokens": usage.get("input_tokens", 0),
238
+ "completion_tokens": usage.get("output_tokens", 0),
239
+ "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
240
+ }
241
+
242
+ def _extract_tool_calls(self, response_data: dict) -> Optional[List[ToolCall]]:
243
+ """提取 Claude tool_use 信息"""
244
+ try:
245
+ content_blocks = response_data.get("content", [])
246
+ tool_calls = []
247
+ for block in content_blocks:
248
+ if block.get("type") == "tool_use":
249
+ tool_calls.append(
250
+ ToolCall(
251
+ id=block.get("id", ""),
252
+ type="function",
253
+ function={
254
+ "name": block.get("name", ""),
255
+ "arguments": json.dumps(block.get("input", {})),
256
+ },
257
+ )
258
+ )
259
+ return tool_calls if tool_calls else None
260
+ except Exception:
261
+ return None
262
+
263
+ # ========== 流式响应 ==========
264
+
265
+ def _extract_stream_content(self, data: dict) -> Optional[str]:
266
+ """从 Claude 流式响应中提取内容"""
267
+ # Claude 流式格式:event: content_block_delta, data: {"delta": {"text": "..."}}
268
+ if data.get("type") == "content_block_delta":
269
+ delta = data.get("delta", {})
270
+ if delta.get("type") == "text_delta":
271
+ return delta.get("text")
272
+ return None
273
+
274
+ async def chat_completions_stream(
275
+ self,
276
+ messages: List[dict],
277
+ model: str = None,
278
+ return_usage: bool = False,
279
+ preprocess_msg: bool = False,
280
+ url: str = None,
281
+ timeout: int = None,
282
+ **kwargs,
283
+ ):
284
+ """Claude 流式聊天完成"""
285
+ effective_model = self._get_effective_model(model)
286
+ messages = await self._preprocess_messages(messages, preprocess_msg)
287
+
288
+ body = self._build_request_body(
289
+ messages, effective_model, stream=True, **kwargs
290
+ )
291
+ effective_url = url or self._get_url(effective_model, stream=True)
292
+ headers = self._get_headers()
293
+
294
+ effective_timeout = timeout if timeout is not None else self._timeout
295
+ aio_timeout = aiohttp.ClientTimeout(total=effective_timeout)
296
+
297
+ async with aiohttp.ClientSession(trust_env=True) as session:
298
+ async with session.post(
299
+ effective_url, json=body, headers=headers, timeout=aio_timeout
300
+ ) as response:
301
+ if response.status != 200:
302
+ error_text = await response.text()
303
+ raise Exception(f"HTTP {response.status}: {error_text}")
304
+
305
+ usage_data = None
306
+ async for line in response.content:
307
+ line = line.decode("utf-8").strip()
308
+ if line.startswith("data: "):
309
+ data_str = line[6:]
310
+ if data_str == "[DONE]":
311
+ break
312
+ try:
313
+ data = json.loads(data_str)
314
+
315
+ # 提取内容
316
+ content = self._extract_stream_content(data)
317
+ if content:
318
+ if return_usage:
319
+ yield {"type": "content", "content": content}
320
+ else:
321
+ yield content
322
+
323
+ # 检查 message_delta 中的 usage
324
+ if data.get("type") == "message_delta":
325
+ usage = data.get("usage")
326
+ if usage:
327
+ usage_data = {
328
+ "prompt_tokens": usage.get("input_tokens", 0),
329
+ "completion_tokens": usage.get(
330
+ "output_tokens", 0
331
+ ),
332
+ "total_tokens": usage.get("input_tokens", 0)
333
+ + usage.get("output_tokens", 0),
334
+ }
335
+
336
+ # 检查 message_start 中的 usage(输入 tokens)
337
+ if data.get("type") == "message_start":
338
+ msg_usage = data.get("message", {}).get("usage", {})
339
+ if msg_usage:
340
+ usage_data = {
341
+ "prompt_tokens": msg_usage.get(
342
+ "input_tokens", 0
343
+ ),
344
+ "completion_tokens": 0,
345
+ "total_tokens": msg_usage.get("input_tokens", 0),
346
+ }
347
+
348
+ except json.JSONDecodeError:
349
+ continue
350
+
351
+ # 最后返回 usage
352
+ if return_usage and usage_data:
353
+ yield {"type": "usage", "usage": usage_data}
354
+
355
+ @staticmethod
356
+ def parse_thoughts(response_data: dict) -> dict:
357
+ """
358
+ 从响应中解析思考内容和答案
359
+
360
+ 当使用 thinking=True 时,可以用此方法解析响应。
361
+
362
+ Args:
363
+ response_data: 原始响应数据(通过 return_raw=True 获取)
364
+
365
+ Returns:
366
+ dict: {
367
+ "thought": str, # 思考过程(可能为空)
368
+ "answer": str, # 最终答案
369
+ }
370
+ """
371
+ try:
372
+ content_blocks = response_data.get("content", [])
373
+ thoughts = []
374
+ answers = []
375
+
376
+ for block in content_blocks:
377
+ block_type = block.get("type", "")
378
+ if block_type == "thinking":
379
+ thoughts.append(block.get("thinking", ""))
380
+ elif block_type == "text":
381
+ answers.append(block.get("text", ""))
382
+
383
+ return {
384
+ "thought": "\n".join(thoughts),
385
+ "answer": "".join(answers),
386
+ }
387
+ except Exception as e:
388
+ logger.warning(f"Failed to parse thoughts: {e}")
389
+ return {"thought": "", "answer": ""}
390
+
391
+ # ========== Claude 特有方法 ==========
392
+
393
+ def model_list(self) -> List[str]:
394
+ """返回 Claude 模型列表(静态)"""
395
+ return [
396
+ "claude-sonnet-4-20250514",
397
+ "claude-3-5-sonnet-20241022",
398
+ "claude-3-5-haiku-20241022",
399
+ "claude-3-opus-20240229",
400
+ "claude-3-sonnet-20240229",
401
+ "claude-3-haiku-20240307",
402
+ ]