flexllm 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flexllm/__init__.py +224 -0
- flexllm/__main__.py +1096 -0
- flexllm/async_api/__init__.py +9 -0
- flexllm/async_api/concurrent_call.py +100 -0
- flexllm/async_api/concurrent_executor.py +1036 -0
- flexllm/async_api/core.py +373 -0
- flexllm/async_api/interface.py +12 -0
- flexllm/async_api/progress.py +277 -0
- flexllm/base_client.py +988 -0
- flexllm/batch_tools/__init__.py +16 -0
- flexllm/batch_tools/folder_processor.py +317 -0
- flexllm/batch_tools/table_processor.py +363 -0
- flexllm/cache/__init__.py +10 -0
- flexllm/cache/response_cache.py +293 -0
- flexllm/chain_of_thought_client.py +1120 -0
- flexllm/claudeclient.py +402 -0
- flexllm/client_pool.py +698 -0
- flexllm/geminiclient.py +563 -0
- flexllm/llm_client.py +523 -0
- flexllm/llm_parser.py +60 -0
- flexllm/mllm_client.py +559 -0
- flexllm/msg_processors/__init__.py +174 -0
- flexllm/msg_processors/image_processor.py +729 -0
- flexllm/msg_processors/image_processor_helper.py +485 -0
- flexllm/msg_processors/messages_processor.py +341 -0
- flexllm/msg_processors/unified_processor.py +1404 -0
- flexllm/openaiclient.py +256 -0
- flexllm/pricing/__init__.py +104 -0
- flexllm/pricing/data.json +1201 -0
- flexllm/pricing/updater.py +223 -0
- flexllm/provider_router.py +213 -0
- flexllm/token_counter.py +270 -0
- flexllm/utils/__init__.py +1 -0
- flexllm/utils/core.py +41 -0
- flexllm-0.3.3.dist-info/METADATA +573 -0
- flexllm-0.3.3.dist-info/RECORD +39 -0
- flexllm-0.3.3.dist-info/WHEEL +4 -0
- flexllm-0.3.3.dist-info/entry_points.txt +3 -0
- flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
flexllm/__init__.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
flexllm - High-performance LLM client
|
|
3
|
+
|
|
4
|
+
Batch processing, caching, and checkpoint recovery for LLM APIs.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
# =====================================================
|
|
8
|
+
# 1. LLMClient - Unified Client (Recommended)
|
|
9
|
+
# =====================================================
|
|
10
|
+
from flexllm import LLMClient
|
|
11
|
+
|
|
12
|
+
# 自动识别 provider(根据 base_url 推断)
|
|
13
|
+
client = LLMClient(
|
|
14
|
+
base_url="https://api.openai.com/v1", # 或 vLLM/Ollama/DeepSeek 地址
|
|
15
|
+
api_key="your-key",
|
|
16
|
+
model="gpt-4",
|
|
17
|
+
concurrency_limit=10,
|
|
18
|
+
retry_times=3,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# 同步调用(简单场景)
|
|
22
|
+
result = client.chat_completions_sync(
|
|
23
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# 异步批量调用 + 断点续传
|
|
27
|
+
results = await client.chat_completions_batch(
|
|
28
|
+
messages_list,
|
|
29
|
+
show_progress=True,
|
|
30
|
+
output_jsonl="results.jsonl", # 增量写入,中断后自动恢复
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# 流式输出
|
|
34
|
+
async for chunk in client.chat_completions_stream(messages):
|
|
35
|
+
print(chunk, end="", flush=True)
|
|
36
|
+
|
|
37
|
+
# 使用 Gemini
|
|
38
|
+
gemini_client = LLMClient(
|
|
39
|
+
provider="gemini",
|
|
40
|
+
api_key="your-google-key",
|
|
41
|
+
model="gemini-2.5-flash",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# =====================================================
|
|
45
|
+
# 2. OpenAIClient - OpenAI 兼容 API(vLLM、Ollama 等)
|
|
46
|
+
# =====================================================
|
|
47
|
+
from flexllm import OpenAIClient, ResponseCacheConfig
|
|
48
|
+
|
|
49
|
+
client = OpenAIClient(
|
|
50
|
+
base_url="https://api.example.com/v1",
|
|
51
|
+
api_key="your-key",
|
|
52
|
+
model="qwen-vl-plus",
|
|
53
|
+
concurrency_limit=10, # 并发数
|
|
54
|
+
max_qps=50, # QPS 限制
|
|
55
|
+
retry_times=3, # 自动重试
|
|
56
|
+
cache=ResponseCacheConfig(enabled=True), # 启用响应缓存(默认1小时TTL)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# 单条调用
|
|
60
|
+
result = await client.chat_completions(messages)
|
|
61
|
+
|
|
62
|
+
# 批量调用 + 断点续传(中断后自动从缓存/文件恢复)
|
|
63
|
+
results = await client.chat_completions_batch(
|
|
64
|
+
messages_list,
|
|
65
|
+
show_progress=True,
|
|
66
|
+
output_jsonl="results.jsonl", # 增量写入文件(断点续传)
|
|
67
|
+
flush_interval=1.0, # 每秒刷新到磁盘
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# 流式输出
|
|
71
|
+
async for chunk in client.chat_completions_stream(messages):
|
|
72
|
+
print(chunk, end="", flush=True)
|
|
73
|
+
|
|
74
|
+
# =====================================================
|
|
75
|
+
# 3. GeminiClient - Google Gemini(Developer API / Vertex AI)
|
|
76
|
+
# =====================================================
|
|
77
|
+
from flexllm import GeminiClient
|
|
78
|
+
|
|
79
|
+
# Gemini Developer API
|
|
80
|
+
gemini = GeminiClient(
|
|
81
|
+
api_key="your-google-api-key",
|
|
82
|
+
model="gemini-2.5-flash",
|
|
83
|
+
concurrency_limit=10,
|
|
84
|
+
)
|
|
85
|
+
result = await gemini.chat_completions(messages)
|
|
86
|
+
|
|
87
|
+
# Vertex AI 模式
|
|
88
|
+
gemini_vertex = GeminiClient(
|
|
89
|
+
project_id="your-project-id",
|
|
90
|
+
location="us-central1",
|
|
91
|
+
model="gemini-2.5-flash",
|
|
92
|
+
use_vertex_ai=True,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Gemini 思考模式
|
|
96
|
+
result = await gemini.chat_completions(
|
|
97
|
+
messages,
|
|
98
|
+
thinking="high", # False, True, "minimal", "low", "medium", "high"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# =====================================================
|
|
102
|
+
# 4. 多 Endpoint 负载均衡和故障转移(推荐)
|
|
103
|
+
# =====================================================
|
|
104
|
+
from flexllm import LLMClientPool
|
|
105
|
+
|
|
106
|
+
# 创建客户端池(轮询 + 故障转移)
|
|
107
|
+
pool = LLMClientPool(
|
|
108
|
+
endpoints=[
|
|
109
|
+
{"base_url": "http://host1:8000/v1", "api_key": "key1", "model": "qwen"},
|
|
110
|
+
{"base_url": "http://host2:8000/v1", "api_key": "key2", "model": "qwen"},
|
|
111
|
+
],
|
|
112
|
+
load_balance="round_robin", # round_robin, weighted, random, fallback
|
|
113
|
+
fallback=True, # 失败时自动切换到其他 endpoint
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# 接口与 LLMClient 完全一致
|
|
117
|
+
result = await pool.chat_completions(messages)
|
|
118
|
+
results = await pool.chat_completions_batch(messages_list)
|
|
119
|
+
|
|
120
|
+
# 批量调用可分散到多个 endpoint 并行处理
|
|
121
|
+
results = await pool.chat_completions_batch(messages_list, distribute=True)
|
|
122
|
+
|
|
123
|
+
# =====================================================
|
|
124
|
+
# 5. 底层 Provider 路由器(高级用法)
|
|
125
|
+
# =====================================================
|
|
126
|
+
from flexllm import ProviderRouter, ProviderConfig, create_router_from_urls
|
|
127
|
+
|
|
128
|
+
# 快速创建(多个 URL 轮询)
|
|
129
|
+
router = create_router_from_urls(
|
|
130
|
+
urls=["http://host1:8000/v1", "http://host2:8000/v1"],
|
|
131
|
+
api_key="EMPTY",
|
|
132
|
+
strategy="round_robin",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# 获取下一个可用 provider
|
|
136
|
+
provider = router.get_next()
|
|
137
|
+
client = OpenAIClient(base_url=provider.base_url, api_key=provider.api_key)
|
|
138
|
+
|
|
139
|
+
# 请求成功/失败时更新状态(自动 fallback)
|
|
140
|
+
router.mark_success(provider) # 或 router.mark_failed(provider)
|
|
141
|
+
|
|
142
|
+
# =====================================================
|
|
143
|
+
# 6. 响应缓存配置
|
|
144
|
+
# =====================================================
|
|
145
|
+
from flexllm import ResponseCacheConfig
|
|
146
|
+
|
|
147
|
+
cache = ResponseCacheConfig() # 默认禁用
|
|
148
|
+
cache = ResponseCacheConfig(enabled=True) # 启用(默认1天 TTL)
|
|
149
|
+
cache = ResponseCacheConfig(enabled=True, ttl=0) # 启用(永不过期)
|
|
150
|
+
cache = ResponseCacheConfig(enabled=False) # 显式禁用
|
|
151
|
+
cache = ResponseCacheConfig(enabled=True, ttl=3600) # 自定义 TTL(秒)
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
__version__ = "0.3.3"
|
|
155
|
+
|
|
156
|
+
# 多模态模型功能
|
|
157
|
+
from .mllm_client import MllmClient
|
|
158
|
+
from .batch_tools import MllmFolderProcessor, MllmTableProcessor
|
|
159
|
+
|
|
160
|
+
# LLM基础功能
|
|
161
|
+
from .base_client import LLMClientBase, ChatCompletionResult, BatchResultItem, ToolCall
|
|
162
|
+
from .openaiclient import OpenAIClient
|
|
163
|
+
from .geminiclient import GeminiClient
|
|
164
|
+
from .claudeclient import ClaudeClient
|
|
165
|
+
from .llm_client import LLMClient
|
|
166
|
+
from .llm_parser import *
|
|
167
|
+
|
|
168
|
+
# Token 计数和成本估算
|
|
169
|
+
from .token_counter import (
|
|
170
|
+
count_tokens,
|
|
171
|
+
count_messages_tokens,
|
|
172
|
+
estimate_cost,
|
|
173
|
+
estimate_batch_cost,
|
|
174
|
+
messages_hash,
|
|
175
|
+
MODEL_PRICING,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# 响应缓存
|
|
179
|
+
from .cache import ResponseCache, ResponseCacheConfig
|
|
180
|
+
|
|
181
|
+
# Provider 路由
|
|
182
|
+
from .provider_router import ProviderRouter, ProviderConfig, create_router_from_urls
|
|
183
|
+
|
|
184
|
+
# 客户端池
|
|
185
|
+
from .client_pool import LLMClientPool, EndpointConfig
|
|
186
|
+
|
|
187
|
+
# Chain of Thought
|
|
188
|
+
from .chain_of_thought_client import ChainOfThoughtClient, Step
|
|
189
|
+
|
|
190
|
+
__all__ = [
|
|
191
|
+
# 客户端
|
|
192
|
+
'LLMClientBase',
|
|
193
|
+
'MllmClient',
|
|
194
|
+
'MllmTableProcessor',
|
|
195
|
+
'MllmFolderProcessor',
|
|
196
|
+
'OpenAIClient',
|
|
197
|
+
'GeminiClient',
|
|
198
|
+
'ClaudeClient',
|
|
199
|
+
'LLMClient',
|
|
200
|
+
# 结果类型
|
|
201
|
+
'ChatCompletionResult',
|
|
202
|
+
'BatchResultItem',
|
|
203
|
+
'ToolCall',
|
|
204
|
+
# Token 计数
|
|
205
|
+
'count_tokens',
|
|
206
|
+
'count_messages_tokens',
|
|
207
|
+
'estimate_cost',
|
|
208
|
+
'estimate_batch_cost',
|
|
209
|
+
'messages_hash',
|
|
210
|
+
'MODEL_PRICING',
|
|
211
|
+
# 缓存
|
|
212
|
+
'ResponseCache',
|
|
213
|
+
'ResponseCacheConfig',
|
|
214
|
+
# Provider 路由
|
|
215
|
+
'ProviderRouter',
|
|
216
|
+
'ProviderConfig',
|
|
217
|
+
'create_router_from_urls',
|
|
218
|
+
# 客户端池
|
|
219
|
+
'LLMClientPool',
|
|
220
|
+
'EndpointConfig',
|
|
221
|
+
# Chain of Thought
|
|
222
|
+
'ChainOfThoughtClient',
|
|
223
|
+
'Step',
|
|
224
|
+
]
|