flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
flexllm/mllm_client.py ADDED
@@ -0,0 +1,559 @@
1
+ #! /usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ MLLM client
6
+ """
7
+
8
+ import asyncio
9
+ from typing import List, Callable, Optional, Any
10
+ from rich import print
11
+ from .openaiclient import OpenAIClient
12
+ from .cache import ResponseCacheConfig
13
+
14
+ from .msg_processors.unified_processor import (
15
+ batch_process_messages,
16
+ UnifiedProcessorConfig,
17
+ UnifiedImageProcessor,
18
+ )
19
+
20
+
21
+ from .msg_processors.image_processor import ImageCacheConfig
22
+ from abc import ABC, abstractmethod
23
+
24
+
25
+ class MllmClientBase(ABC):
26
+ """
27
+ MLLM客户端抽象基类
28
+ 定义了所有MLLM客户端必须实现的核心接口
29
+ """
30
+
31
+ @abstractmethod
32
+ async def call_llm(
33
+ self,
34
+ messages_list,
35
+ model=None,
36
+ temperature=0.1,
37
+ max_tokens=2000,
38
+ top_p=0.95,
39
+ safety={
40
+ "input_level": "none",
41
+ "input_image_level": "none",
42
+ },
43
+ **kwargs,
44
+ ):
45
+ """
46
+ 调用LLM的抽象方法
47
+
48
+ Args:
49
+ messages_list: 消息列表
50
+ model: 模型名称
51
+ temperature: 温度参数
52
+ max_tokens: 最大生成token数
53
+ top_p: top_p参数
54
+ safety: 安全级别
55
+ **kwargs: 其他参数
56
+
57
+ Returns:
58
+ response_list: 响应列表
59
+ """
60
+ pass
61
+
62
+
63
+ class MllmClient(MllmClientBase):
64
+ """
65
+ MLLM客户端实现类
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ model: str,
71
+ base_url: str,
72
+ api_key="EMPTY",
73
+ concurrency_limit=10,
74
+ preprocess_concurrency=16,
75
+ max_qps=50,
76
+ timeout=60,
77
+ retry_times=3,
78
+ retry_delay=0.55,
79
+ processor_config: Optional[UnifiedProcessorConfig] = None,
80
+ cache: Optional[ResponseCacheConfig] = None,
81
+ **kwargs,
82
+ ):
83
+ """
84
+ 初始化MLLM客户端
85
+
86
+ Args:
87
+ model: 模型名称
88
+ base_url: API基础URL
89
+ api_key: API密钥
90
+ concurrency_limit: 并发限制
91
+ preprocess_concurrency: 预处理并发数
92
+ max_qps: 最大QPS
93
+ timeout: 超时时间(秒)
94
+ retry_times: 重试次数
95
+ retry_delay: 重试延迟(秒)
96
+ processor_config: 统一处理器配置,如为None则使用高性能默认配置
97
+ cache: 响应缓存配置,默认启用(24小时TTL)
98
+ **kwargs: 其他参数
99
+ """
100
+ self.client = OpenAIClient(
101
+ api_key=api_key,
102
+ base_url=base_url,
103
+ concurrency_limit=concurrency_limit,
104
+ max_qps=max_qps,
105
+ timeout=timeout,
106
+ retry_times=retry_times,
107
+ retry_delay=retry_delay,
108
+ cache=cache,
109
+ **kwargs,
110
+ )
111
+ self.model = model
112
+ self.preprocess_concurrency = preprocess_concurrency
113
+
114
+ # 创建处理器配置和实例(关键改进)
115
+ self.processor_config = (
116
+ processor_config or UnifiedProcessorConfig.high_performance()
117
+ )
118
+ # 创建并持有处理器实例,保持缓存效果
119
+ self.processor_instance = UnifiedImageProcessor(self.processor_config)
120
+
121
+ # 延迟导入避免循环引用
122
+ from .batch_tools import MllmFolderProcessor
123
+
124
+ self._table = None # 延迟初始化
125
+ self.folder = MllmFolderProcessor(self)
126
+
127
+ @property
128
+ def table(self):
129
+ """表格处理器(需要 pandas,延迟加载)"""
130
+ if self._table is None:
131
+ try:
132
+ from .batch_tools import MllmTableProcessor
133
+ self._table = MllmTableProcessor(self)
134
+ except ImportError:
135
+ raise ImportError(
136
+ "表格处理功能需要安装 pandas。请运行: pip install pandas"
137
+ )
138
+ return self._table
139
+
140
+ def call_llm_sync(
141
+ self,
142
+ messages_list,
143
+ model=None,
144
+ temperature=0.1,
145
+ max_tokens=2000,
146
+ top_p=0.95,
147
+ safety={
148
+ "input_level": "none",
149
+ "input_image_level": "none",
150
+ },
151
+ **kwargs,
152
+ ):
153
+ return asyncio.run(
154
+ self.call_llm(
155
+ messages_list, model, temperature, max_tokens, top_p, safety, **kwargs
156
+ )
157
+ )
158
+
159
+ async def call_llm(
160
+ self,
161
+ messages_list,
162
+ model=None,
163
+ temperature=0.1,
164
+ max_tokens=2000,
165
+ top_p=0.95,
166
+ safety={
167
+ "input_level": "none",
168
+ "input_image_level": "none",
169
+ },
170
+ show_progress=True,
171
+ **kwargs,
172
+ ):
173
+ """
174
+ 调用LLM
175
+
176
+ Args:
177
+ messages_list: 消息列表
178
+ model: 模型名称,默认使用初始化时指定的模型
179
+ temperature: 温度参数
180
+ max_tokens: 最大生成token数
181
+ top_p: top_p参数
182
+ safety: 安全级别
183
+ show_progress: 是否显示每一步的进度条和统计信息
184
+ **kwargs: 其他参数
185
+
186
+ Returns:
187
+ response_list: 响应列表
188
+ """
189
+ if model is None:
190
+ model = self.model
191
+
192
+ # 使用持有的处理器实例进行预处理,保持缓存效果
193
+ messages_list = await self._preprocess_messages_with_instance(
194
+ messages_list,
195
+ show_progress=show_progress,
196
+ )
197
+ # print(f"{messages_list[-1]=}")
198
+ response_list, _ = await self.client.chat_completions_batch(
199
+ messages_list=messages_list,
200
+ model=model,
201
+ temperature=temperature,
202
+ max_tokens=max_tokens,
203
+ top_p=top_p,
204
+ return_summary=True,
205
+ safety=safety,
206
+ show_progress=show_progress,
207
+ **kwargs,
208
+ )
209
+ return response_list
210
+
211
+ async def call_llm_stream(
212
+ self,
213
+ messages: list,
214
+ model=None,
215
+ temperature=0.1,
216
+ max_tokens=2000,
217
+ top_p=0.95,
218
+ safety={
219
+ "input_level": "none",
220
+ "input_image_level": "none",
221
+ },
222
+ **kwargs,
223
+ ):
224
+ """
225
+ 流式调用LLM - 逐token返回响应,适合单条对话
226
+
227
+ Args:
228
+ messages: 单条消息列表 (不是messages_list)
229
+ model: 模型名称,默认使用初始化时指定的模型
230
+ temperature: 温度参数
231
+ max_tokens: 最大生成token数
232
+ top_p: top_p参数
233
+ safety: 安全级别
234
+ **kwargs: 其他参数
235
+
236
+ Yields:
237
+ str: 流式返回的token片段
238
+ """
239
+ if model is None:
240
+ model = self.model
241
+
242
+ # 预处理消息(不显示进度条,因为只有一条消息)
243
+ processed_messages = await self._preprocess_messages_with_instance(
244
+ [messages], show_progress=False
245
+ )
246
+
247
+ # 使用OpenAIClient的流式方法
248
+ async for token in self.client.chat_completions_stream(
249
+ messages=processed_messages[0],
250
+ model=model,
251
+ temperature=temperature,
252
+ max_tokens=max_tokens,
253
+ top_p=top_p,
254
+ **kwargs,
255
+ ):
256
+ yield token
257
+
258
+ async def _preprocess_messages_with_instance(
259
+ self,
260
+ messages_list,
261
+ show_progress=True,
262
+ ):
263
+ """
264
+ 使用持有的处理器实例进行消息预处理
265
+ 这样可以保持缓存效果,避免重复初始化开销
266
+ """
267
+ from .msg_processors.unified_processor import process_content_recursive
268
+ import aiohttp
269
+ from copy import deepcopy
270
+ from tqdm.asyncio import tqdm
271
+ import asyncio
272
+
273
+ # 创建消息副本,避免修改原始数据
274
+ messages_list = deepcopy(messages_list)
275
+
276
+ # 创建进度条
277
+ pbar = None
278
+ if show_progress:
279
+ try:
280
+ pbar = tqdm(
281
+ total=len(messages_list),
282
+ desc="处理图片",
283
+ unit=" items",
284
+ ncols=100,
285
+ miniters=1,
286
+ )
287
+ except ImportError:
288
+ pbar = None
289
+
290
+ try:
291
+ # 使用HTTP会话和持有的处理器实例
292
+ async with aiohttp.ClientSession() as session:
293
+ # 创建信号量控制并发
294
+ semaphore = asyncio.Semaphore(self.preprocess_concurrency)
295
+
296
+ async def process_single_messages(messages):
297
+ async with semaphore:
298
+ for message in messages:
299
+ await process_content_recursive(
300
+ message,
301
+ session,
302
+ self.processor_instance, # 使用持有的实例!
303
+ )
304
+ if pbar:
305
+ pbar.update(1)
306
+ return messages
307
+
308
+ # 并发处理所有消息组
309
+ tasks = [
310
+ process_single_messages(messages) for messages in messages_list
311
+ ]
312
+ processed_messages_list = await asyncio.gather(*tasks)
313
+
314
+ return processed_messages_list
315
+
316
+ finally:
317
+ if pbar:
318
+ pbar.close()
319
+
320
+ async def call_llm_with_selection(
321
+ self,
322
+ messages_list,
323
+ n_predictions: int = 3,
324
+ selector_fn: Optional[Callable[[List[Any]], Any]] = None,
325
+ model=None,
326
+ temperature=0.1,
327
+ max_tokens=2000,
328
+ top_p=0.95,
329
+ safety={
330
+ "input_level": "none",
331
+ "input_image_level": "none",
332
+ },
333
+ show_progress=True,
334
+ **kwargs,
335
+ ):
336
+ """
337
+ 增强版LLM调用方法,对每条消息进行n次预测,并使用选择函数选择最佳结果
338
+
339
+ Args:
340
+ messages_list: 消息列表
341
+ n_predictions: 每条消息预测次数
342
+ selector_fn: 选择函数,接收n个响应列表,返回选中的响应
343
+ 如果为None,默认返回第一个响应
344
+ model: 模型名称,默认使用初始化时指定的模型
345
+ temperature: 温度参数
346
+ max_tokens: 最大生成token数
347
+ top_p: top_p参数
348
+ safety: 安全级别
349
+ show_progress: 是否显示进度条
350
+ **kwargs: 其他参数
351
+
352
+ Returns:
353
+ response_list: 选择后的响应列表
354
+ """
355
+ if model is None:
356
+ model = self.model
357
+
358
+ # 默认选择函数(如果未提供),简单返回第一个响应
359
+ if selector_fn is None:
360
+ selector_fn = lambda responses: responses[0]
361
+
362
+ # 为每条消息创建n个副本
363
+ expanded_messages_list = []
364
+ for messages in messages_list:
365
+ for _ in range(n_predictions):
366
+ expanded_messages_list.append(messages)
367
+
368
+ # 调用模型获取所有响应 - 使用持有的处理器实例
369
+ messages_list = await self._preprocess_messages_with_instance(
370
+ expanded_messages_list,
371
+ show_progress=show_progress,
372
+ )
373
+ all_responses, _ = await self.client.chat_completions_batch(
374
+ messages_list=messages_list,
375
+ model=model,
376
+ temperature=temperature,
377
+ max_tokens=max_tokens,
378
+ top_p=top_p,
379
+ return_summary=True,
380
+ safety=safety,
381
+ show_progress=show_progress,
382
+ **kwargs,
383
+ )
384
+
385
+ # 重组响应并应用选择函数
386
+ selected_responses = []
387
+ for i in range(0, len(all_responses), n_predictions):
388
+ message_responses = all_responses[i : i + n_predictions]
389
+ # 安全打印,避免打印可能包含base64的响应数据
390
+ print(
391
+ f"[cyan]处理第 {i // n_predictions + 1} 组响应(包含 {len(message_responses)} 个预测)[/cyan]"
392
+ )
393
+ selected_response = selector_fn(message_responses)
394
+ selected_responses.append(selected_response)
395
+
396
+ return selected_responses
397
+
398
+ async def call_llm_nested(
399
+ self,
400
+ messages_list_list,
401
+ model=None,
402
+ temperature=0.1,
403
+ max_tokens=2000,
404
+ top_p=0.95,
405
+ safety={
406
+ "input_level": "none",
407
+ "input_image_level": "none",
408
+ },
409
+ **kwargs,
410
+ ):
411
+ """
412
+ 处理嵌套的messages_list_list结构
413
+ 将messages_list_list展平为messages_list,调用call_llm获取结果,再重组为response_list_list
414
+ 这样做可以提高整体调用性能
415
+
416
+ Args:
417
+ messages_list_list: 嵌套的消息列表列表
418
+ model: 模型名称
419
+ temperature: 温度参数
420
+ max_tokens: 最大生成token数
421
+ top_p: top_p参数
422
+ safety: 安全级别
423
+ **kwargs: 其他参数
424
+
425
+ Returns:
426
+ response_list_list: 嵌套的响应列表列表,与输入结构对应
427
+ """
428
+ # 记录每个子列表的长度,用于之后重组结果
429
+ lengths = [len(messages_list) for messages_list in messages_list_list]
430
+
431
+ # 展平messages_list_list
432
+ flattened_messages_list = []
433
+ for messages_list in messages_list_list:
434
+ flattened_messages_list.extend(messages_list)
435
+
436
+ # 调用call_llm获取展平后的response_list
437
+ flattened_response_list = await self.call_llm(
438
+ flattened_messages_list,
439
+ model=model,
440
+ temperature=temperature,
441
+ max_tokens=max_tokens,
442
+ top_p=top_p,
443
+ safety=safety,
444
+ **kwargs,
445
+ )
446
+
447
+ # 根据之前记录的长度,将展平的response_list重组为response_list_list
448
+ response_list_list = []
449
+ start_idx = 0
450
+ for length in lengths:
451
+ response_list_list.append(
452
+ flattened_response_list[start_idx : start_idx + length]
453
+ )
454
+ start_idx += length
455
+
456
+ return response_list_list
457
+
458
+ async def call_llm_nested_with_selection(
459
+ self,
460
+ messages_list_list,
461
+ n_predictions: int = 3,
462
+ selector_fn: Optional[Callable[[List[Any]], Any]] = None,
463
+ model=None,
464
+ temperature=0.1,
465
+ max_tokens=2000,
466
+ top_p=0.95,
467
+ safety={
468
+ "input_level": "none",
469
+ "input_image_level": "none",
470
+ },
471
+ **kwargs,
472
+ ):
473
+ """
474
+ 处理嵌套的messages_list_list结构,并对每条消息进行多次预测和选择
475
+
476
+ Args:
477
+ messages_list_list: 嵌套的消息列表列表
478
+ n_predictions: 每条消息预测次数
479
+ selector_fn: 选择函数,接收n个响应列表,返回选中的响应
480
+ model: 模型名称
481
+ temperature: 温度参数
482
+ max_tokens: 最大生成token数
483
+ top_p: top_p参数
484
+ safety: 安全级别
485
+ **kwargs: 其他参数
486
+
487
+ Returns:
488
+ response_list_list: 嵌套的响应列表列表,与输入结构对应
489
+ """
490
+ # 记录每个子列表的长度,用于之后重组结果
491
+ lengths = [len(messages_list) for messages_list in messages_list_list]
492
+
493
+ # 展平messages_list_list
494
+ flattened_messages_list = []
495
+ for messages_list in messages_list_list:
496
+ flattened_messages_list.extend(messages_list)
497
+
498
+ # 调用enhanced_call_llm获取展平后的response_list
499
+ flattened_response_list = await self.call_llm_with_selection(
500
+ flattened_messages_list,
501
+ n_predictions=n_predictions,
502
+ selector_fn=selector_fn,
503
+ model=model,
504
+ temperature=temperature,
505
+ max_tokens=max_tokens,
506
+ top_p=top_p,
507
+ safety=safety,
508
+ **kwargs,
509
+ )
510
+
511
+ # 根据之前记录的长度,将展平的response_list重组为response_list_list
512
+ response_list_list = []
513
+ start_idx = 0
514
+ for length in lengths:
515
+ response_list_list.append(
516
+ flattened_response_list[start_idx : start_idx + length]
517
+ )
518
+ start_idx += length
519
+
520
+ return response_list_list
521
+
522
+ def cleanup(self):
523
+ """
524
+ 清理资源,释放处理器实例和客户端连接
525
+ """
526
+ if hasattr(self, "processor_instance") and self.processor_instance:
527
+ self.processor_instance.cleanup()
528
+ self.processor_instance = None
529
+
530
+ # 清理客户端资源(包括响应缓存)
531
+ if hasattr(self, "client") and self.client:
532
+ self.client.close()
533
+
534
+ def close(self):
535
+ """关闭客户端,释放资源(别名方法,与 LLMClientBase 接口一致)"""
536
+ self.cleanup()
537
+
538
+ def __enter__(self):
539
+ return self
540
+
541
+ def __exit__(self, *args):
542
+ self.cleanup()
543
+
544
+ async def __aenter__(self):
545
+ return self
546
+
547
+ async def __aexit__(self, *args):
548
+ self.cleanup()
549
+
550
+ def __del__(self):
551
+ """
552
+ 析构函数,确保资源被释放
553
+ """
554
+ try:
555
+ self.cleanup()
556
+ except Exception:
557
+ pass # 析构函数中避免抛出异常
558
+
559
+ # 所有table和dataframe相关方法已移至TableProcessor类