flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
flexllm/__init__.py ADDED
@@ -0,0 +1,224 @@
1
+ """
2
+ flexllm - High-performance LLM client
3
+
4
+ Batch processing, caching, and checkpoint recovery for LLM APIs.
5
+
6
+ Example:
7
+ # =====================================================
8
+ # 1. LLMClient - Unified Client (Recommended)
9
+ # =====================================================
10
+ from flexllm import LLMClient
11
+
12
+ # 自动识别 provider(根据 base_url 推断)
13
+ client = LLMClient(
14
+ base_url="https://api.openai.com/v1", # 或 vLLM/Ollama/DeepSeek 地址
15
+ api_key="your-key",
16
+ model="gpt-4",
17
+ concurrency_limit=10,
18
+ retry_times=3,
19
+ )
20
+
21
+ # 同步调用(简单场景)
22
+ result = client.chat_completions_sync(
23
+ messages=[{"role": "user", "content": "Hello!"}]
24
+ )
25
+
26
+ # 异步批量调用 + 断点续传
27
+ results = await client.chat_completions_batch(
28
+ messages_list,
29
+ show_progress=True,
30
+ output_jsonl="results.jsonl", # 增量写入,中断后自动恢复
31
+ )
32
+
33
+ # 流式输出
34
+ async for chunk in client.chat_completions_stream(messages):
35
+ print(chunk, end="", flush=True)
36
+
37
+ # 使用 Gemini
38
+ gemini_client = LLMClient(
39
+ provider="gemini",
40
+ api_key="your-google-key",
41
+ model="gemini-2.5-flash",
42
+ )
43
+
44
+ # =====================================================
45
+ # 2. OpenAIClient - OpenAI 兼容 API(vLLM、Ollama 等)
46
+ # =====================================================
47
+ from flexllm import OpenAIClient, ResponseCacheConfig
48
+
49
+ client = OpenAIClient(
50
+ base_url="https://api.example.com/v1",
51
+ api_key="your-key",
52
+ model="qwen-vl-plus",
53
+ concurrency_limit=10, # 并发数
54
+ max_qps=50, # QPS 限制
55
+ retry_times=3, # 自动重试
56
+ cache=ResponseCacheConfig(enabled=True), # 启用响应缓存(默认1小时TTL)
57
+ )
58
+
59
+ # 单条调用
60
+ result = await client.chat_completions(messages)
61
+
62
+ # 批量调用 + 断点续传(中断后自动从缓存/文件恢复)
63
+ results = await client.chat_completions_batch(
64
+ messages_list,
65
+ show_progress=True,
66
+ output_jsonl="results.jsonl", # 增量写入文件(断点续传)
67
+ flush_interval=1.0, # 每秒刷新到磁盘
68
+ )
69
+
70
+ # 流式输出
71
+ async for chunk in client.chat_completions_stream(messages):
72
+ print(chunk, end="", flush=True)
73
+
74
+ # =====================================================
75
+ # 3. GeminiClient - Google Gemini(Developer API / Vertex AI)
76
+ # =====================================================
77
+ from flexllm import GeminiClient
78
+
79
+ # Gemini Developer API
80
+ gemini = GeminiClient(
81
+ api_key="your-google-api-key",
82
+ model="gemini-2.5-flash",
83
+ concurrency_limit=10,
84
+ )
85
+ result = await gemini.chat_completions(messages)
86
+
87
+ # Vertex AI 模式
88
+ gemini_vertex = GeminiClient(
89
+ project_id="your-project-id",
90
+ location="us-central1",
91
+ model="gemini-2.5-flash",
92
+ use_vertex_ai=True,
93
+ )
94
+
95
+ # Gemini 思考模式
96
+ result = await gemini.chat_completions(
97
+ messages,
98
+ thinking="high", # False, True, "minimal", "low", "medium", "high"
99
+ )
100
+
101
+ # =====================================================
102
+ # 4. 多 Endpoint 负载均衡和故障转移(推荐)
103
+ # =====================================================
104
+ from flexllm import LLMClientPool
105
+
106
+ # 创建客户端池(轮询 + 故障转移)
107
+ pool = LLMClientPool(
108
+ endpoints=[
109
+ {"base_url": "http://host1:8000/v1", "api_key": "key1", "model": "qwen"},
110
+ {"base_url": "http://host2:8000/v1", "api_key": "key2", "model": "qwen"},
111
+ ],
112
+ load_balance="round_robin", # round_robin, weighted, random, fallback
113
+ fallback=True, # 失败时自动切换到其他 endpoint
114
+ )
115
+
116
+ # 接口与 LLMClient 完全一致
117
+ result = await pool.chat_completions(messages)
118
+ results = await pool.chat_completions_batch(messages_list)
119
+
120
+ # 批量调用可分散到多个 endpoint 并行处理
121
+ results = await pool.chat_completions_batch(messages_list, distribute=True)
122
+
123
+ # =====================================================
124
+ # 5. 底层 Provider 路由器(高级用法)
125
+ # =====================================================
126
+ from flexllm import ProviderRouter, ProviderConfig, create_router_from_urls
127
+
128
+ # 快速创建(多个 URL 轮询)
129
+ router = create_router_from_urls(
130
+ urls=["http://host1:8000/v1", "http://host2:8000/v1"],
131
+ api_key="EMPTY",
132
+ strategy="round_robin",
133
+ )
134
+
135
+ # 获取下一个可用 provider
136
+ provider = router.get_next()
137
+ client = OpenAIClient(base_url=provider.base_url, api_key=provider.api_key)
138
+
139
+ # 请求成功/失败时更新状态(自动 fallback)
140
+ router.mark_success(provider) # 或 router.mark_failed(provider)
141
+
142
+ # =====================================================
143
+ # 6. 响应缓存配置
144
+ # =====================================================
145
+ from flexllm import ResponseCacheConfig
146
+
147
+ cache = ResponseCacheConfig() # 默认禁用
148
+ cache = ResponseCacheConfig(enabled=True) # 启用(默认1天 TTL)
149
+ cache = ResponseCacheConfig(enabled=True, ttl=0) # 启用(永不过期)
150
+ cache = ResponseCacheConfig(enabled=False) # 显式禁用
151
+ cache = ResponseCacheConfig(enabled=True, ttl=3600) # 自定义 TTL(秒)
152
+ """
153
+
154
+ __version__ = "0.3.3"
155
+
156
+ # 多模态模型功能
157
+ from .mllm_client import MllmClient
158
+ from .batch_tools import MllmFolderProcessor, MllmTableProcessor
159
+
160
+ # LLM基础功能
161
+ from .base_client import LLMClientBase, ChatCompletionResult, BatchResultItem, ToolCall
162
+ from .openaiclient import OpenAIClient
163
+ from .geminiclient import GeminiClient
164
+ from .claudeclient import ClaudeClient
165
+ from .llm_client import LLMClient
166
+ from .llm_parser import *
167
+
168
+ # Token 计数和成本估算
169
+ from .token_counter import (
170
+ count_tokens,
171
+ count_messages_tokens,
172
+ estimate_cost,
173
+ estimate_batch_cost,
174
+ messages_hash,
175
+ MODEL_PRICING,
176
+ )
177
+
178
+ # 响应缓存
179
+ from .cache import ResponseCache, ResponseCacheConfig
180
+
181
+ # Provider 路由
182
+ from .provider_router import ProviderRouter, ProviderConfig, create_router_from_urls
183
+
184
+ # 客户端池
185
+ from .client_pool import LLMClientPool, EndpointConfig
186
+
187
+ # Chain of Thought
188
+ from .chain_of_thought_client import ChainOfThoughtClient, Step
189
+
190
+ __all__ = [
191
+ # 客户端
192
+ 'LLMClientBase',
193
+ 'MllmClient',
194
+ 'MllmTableProcessor',
195
+ 'MllmFolderProcessor',
196
+ 'OpenAIClient',
197
+ 'GeminiClient',
198
+ 'ClaudeClient',
199
+ 'LLMClient',
200
+ # 结果类型
201
+ 'ChatCompletionResult',
202
+ 'BatchResultItem',
203
+ 'ToolCall',
204
+ # Token 计数
205
+ 'count_tokens',
206
+ 'count_messages_tokens',
207
+ 'estimate_cost',
208
+ 'estimate_batch_cost',
209
+ 'messages_hash',
210
+ 'MODEL_PRICING',
211
+ # 缓存
212
+ 'ResponseCache',
213
+ 'ResponseCacheConfig',
214
+ # Provider 路由
215
+ 'ProviderRouter',
216
+ 'ProviderConfig',
217
+ 'create_router_from_urls',
218
+ # 客户端池
219
+ 'LLMClientPool',
220
+ 'EndpointConfig',
221
+ # Chain of Thought
222
+ 'ChainOfThoughtClient',
223
+ 'Step',
224
+ ]