flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1404 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ 统一的图像处理器
4
+ 合并本地文件处理和URL处理功能,提供高性能的批量消息预处理
5
+ """
6
+
7
+ import asyncio
8
+ import aiohttp
9
+ import time
10
+ import os
11
+ import base64
12
+ import hashlib
13
+ import json
14
+ import sys
15
+ import io
16
+ import threading
17
+ from concurrent.futures import ThreadPoolExecutor, as_completed
18
+ from pathlib import Path
19
+ from typing import Optional, List, Dict, Any, Tuple, Union, Callable
20
+ from dataclasses import dataclass
21
+ from functools import lru_cache
22
+ from copy import deepcopy
23
+ from collections import defaultdict
24
+ import gc
25
+ import contextlib
26
+
27
+ import numpy as np
28
+ from PIL import Image
29
+ from loguru import logger
30
+
31
+ try:
32
+ import cv2
33
+ HAS_CV2 = True
34
+ except ImportError:
35
+ cv2 = None
36
+ HAS_CV2 = False
37
+
38
+ # 导入缓存配置
39
+ try:
40
+ from .image_processor import (
41
+ ImageCacheConfig,
42
+ DEFAULT_CACHE_DIR,
43
+ get_target_size,
44
+ LANCZOS,
45
+ )
46
+
47
+ HAS_IMAGE_PROCESSOR = True
48
+ except ImportError:
49
+ HAS_IMAGE_PROCESSOR = False
50
+ DEFAULT_CACHE_DIR = "cache"
51
+
52
+ try:
53
+ from tqdm.asyncio import tqdm
54
+
55
+ TQDM_AVAILABLE = True
56
+ except ImportError:
57
+ TQDM_AVAILABLE = False
58
+
59
+
60
+ @contextlib.contextmanager
61
+ def suppress_stdout():
62
+ """上下文管理器,用于抑制stdout输出"""
63
+ old_stdout = sys.stdout
64
+ try:
65
+ sys.stdout = io.StringIO()
66
+ yield
67
+ finally:
68
+ sys.stdout = old_stdout
69
+
70
+
71
+ @contextlib.contextmanager
72
+ def suppress_stderr():
73
+ """上下文管理器,用于抑制stderr输出"""
74
+ old_stderr = sys.stderr
75
+ try:
76
+ sys.stderr = io.StringIO()
77
+ yield
78
+ finally:
79
+ sys.stderr = old_stderr
80
+
81
+
82
+ @contextlib.contextmanager
83
+ def suppress_all_output():
84
+ """上下文管理器,用于抑制所有输出"""
85
+ with suppress_stdout(), suppress_stderr():
86
+ yield
87
+
88
+
89
+ def safe_repr_source(source: str, max_length: int = 100) -> str:
90
+ """安全地表示图像源,避免输出大量base64字符串"""
91
+ if not source:
92
+ return "空源"
93
+
94
+ # 检查是否是base64数据URI
95
+ if source.startswith("data:image/") and ";base64," in source:
96
+ parts = source.split(";base64,", 1)
97
+ if len(parts) == 2:
98
+ mime_type = parts[0].replace("data:", "")
99
+ base64_data = parts[1]
100
+ return f"[{mime_type} base64数据 长度:{len(base64_data)}]"
101
+
102
+ # 检查是否是纯base64字符串(很长且只包含base64字符)
103
+ if len(source) > 100 and all(
104
+ c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
105
+ for c in source
106
+ ):
107
+ return f"[base64数据 长度:{len(source)}]"
108
+
109
+ # 普通字符串,截断显示
110
+ if len(source) <= max_length:
111
+ return source
112
+ else:
113
+ return source[:max_length] + "..."
114
+
115
+
116
+ def safe_repr_error(error_msg: str, max_length: int = 200) -> str:
117
+ """安全地表示错误信息,避免输出大量base64字符串"""
118
+ if not error_msg:
119
+ return error_msg
120
+
121
+ # 检查错误信息中是否包含data:image的base64数据
122
+ if "data:image/" in error_msg and ";base64," in error_msg:
123
+ import re
124
+
125
+ # 使用正则表达式替换base64数据URI
126
+ pattern = r"data:image/[^;]+;base64,[A-Za-z0-9+/]+=*"
127
+
128
+ def replace_base64(match):
129
+ full_uri = match.group(0)
130
+ parts = full_uri.split(";base64,", 1)
131
+ if len(parts) == 2:
132
+ mime_type = parts[0].replace("data:", "")
133
+ base64_data = parts[1]
134
+ return f"[{mime_type} base64数据 长度:{len(base64_data)}]"
135
+ return full_uri
136
+
137
+ error_msg = re.sub(pattern, replace_base64, error_msg)
138
+
139
+ # 截断过长的错误信息
140
+ if len(error_msg) <= max_length:
141
+ return error_msg
142
+ else:
143
+ return error_msg[:max_length] + "..."
144
+
145
+
146
+ @dataclass
147
+ class UnifiedProcessorConfig:
148
+ """统一处理器配置"""
149
+
150
+ # 线程和并发配置
151
+ max_workers: int = 8
152
+ max_concurrent: int = 10
153
+ enable_multithreading: bool = True
154
+
155
+ # 图像质量配置
156
+ jpeg_quality: int = 95
157
+ png_compression: int = 1
158
+ webp_quality: int = 90
159
+
160
+ # 缓存配置
161
+ memory_cache_size_mb: int = 500
162
+ enable_disk_cache: bool = True
163
+ disk_cache_dir: str = DEFAULT_CACHE_DIR
164
+ force_refresh_disk_cache: bool = False
165
+ retry_failed_disk_cache: bool = False
166
+
167
+ # 性能配置
168
+ prefetch_size: int = 50
169
+ enable_simd: bool = True
170
+ suppress_opencv_output: bool = True
171
+
172
+ # 超时配置
173
+ single_file_timeout: float = 10.0
174
+ batch_timeout: float = 60.0
175
+ network_timeout: float = 15.0
176
+
177
+ @classmethod
178
+ def default(cls) -> "UnifiedProcessorConfig":
179
+ """默认配置"""
180
+ return cls()
181
+
182
+ @classmethod
183
+ def high_performance(cls) -> "UnifiedProcessorConfig":
184
+ """高性能配置"""
185
+ return cls(
186
+ max_workers=16,
187
+ max_concurrent=32,
188
+ jpeg_quality=95,
189
+ png_compression=3,
190
+ memory_cache_size_mb=1000,
191
+ prefetch_size=100,
192
+ )
193
+
194
+ @classmethod
195
+ def memory_optimized(cls) -> "UnifiedProcessorConfig":
196
+ """内存优化配置"""
197
+ return cls(
198
+ max_workers=4,
199
+ max_concurrent=6,
200
+ jpeg_quality=80,
201
+ png_compression=6,
202
+ memory_cache_size_mb=200,
203
+ prefetch_size=20,
204
+ )
205
+
206
+ @classmethod
207
+ def from_image_cache_config(cls, cache_config: "ImageCacheConfig") -> "UnifiedProcessorConfig":
208
+ """从旧版本ImageCacheConfig创建新配置"""
209
+ return cls(
210
+ enable_disk_cache=cache_config.enabled,
211
+ disk_cache_dir=cache_config.cache_dir,
212
+ force_refresh_disk_cache=cache_config.force_refresh,
213
+ retry_failed_disk_cache=cache_config.retry_failed,
214
+ )
215
+
216
+ @classmethod
217
+ def auto_detect(cls) -> "UnifiedProcessorConfig":
218
+ """自适应配置,根据系统资源自动调整"""
219
+ try:
220
+ import psutil
221
+ import os
222
+
223
+ # 获取系统信息
224
+ cpu_count = os.cpu_count() or 4
225
+ memory_gb = psutil.virtual_memory().total / (1024**3)
226
+
227
+ # 根据CPU核心数调整worker数量
228
+ max_workers = max(4, min(cpu_count, 24))
229
+ max_concurrent = max(6, min(cpu_count * 2, 40))
230
+
231
+ # 根据内存大小调整缓存
232
+ if memory_gb >= 16:
233
+ # 16GB+: 高性能模式
234
+ cache_size = 1000
235
+ prefetch_size = 100
236
+ jpeg_quality = 95
237
+ elif memory_gb >= 8:
238
+ # 8-16GB: 平衡模式
239
+ cache_size = 500
240
+ prefetch_size = 50
241
+ jpeg_quality = 90
242
+ else:
243
+ # <8GB: 节省模式
244
+ cache_size = 200
245
+ prefetch_size = 20
246
+ jpeg_quality = 80
247
+
248
+ return cls(
249
+ max_workers=max_workers,
250
+ max_concurrent=max_concurrent,
251
+ memory_cache_size_mb=cache_size,
252
+ prefetch_size=prefetch_size,
253
+ jpeg_quality=jpeg_quality,
254
+ png_compression=3,
255
+ enable_disk_cache=True, # 默认启用磁盘缓存
256
+ )
257
+
258
+ except ImportError:
259
+ # 如果没有psutil,回退到默认配置
260
+ return cls.default()
261
+ except Exception:
262
+ # 其他异常,回退到默认配置
263
+ return cls.default()
264
+
265
+
266
+ class UnifiedMemoryCache:
267
+ """统一的线程安全内存缓存"""
268
+
269
+ def __init__(self, max_size_mb: int = 500):
270
+ self.max_size_bytes = max_size_mb * 1024 * 1024
271
+ self.cache = {}
272
+ self.access_times = {}
273
+ self.cache_sizes = {}
274
+ self.current_size = 0
275
+ self.lock = threading.RLock()
276
+ self.hit_count = 0
277
+ self.miss_count = 0
278
+
279
+ def _evict_lru(self):
280
+ """清理LRU项目"""
281
+ if not self.cache:
282
+ return
283
+
284
+ # 找到最少使用的项目
285
+ lru_key = min(self.access_times.keys(), key=lambda k: self.access_times[k])
286
+
287
+ # 移除LRU项目
288
+ if lru_key in self.cache:
289
+ self.cache.pop(lru_key)
290
+ self.access_times.pop(lru_key)
291
+ size = self.cache_sizes.pop(lru_key, 0)
292
+ self.current_size -= size
293
+
294
+ def _generate_cache_key(
295
+ self,
296
+ source: str,
297
+ max_width: Optional[int] = None,
298
+ max_height: Optional[int] = None,
299
+ max_pixels: Optional[int] = None,
300
+ **kwargs,
301
+ ) -> str:
302
+ """生成缓存键"""
303
+ try:
304
+ key_parts = [source]
305
+
306
+ # 添加处理参数
307
+ if max_width is not None:
308
+ key_parts.append(f"w:{max_width}")
309
+ if max_height is not None:
310
+ key_parts.append(f"h:{max_height}")
311
+ if max_pixels is not None:
312
+ key_parts.append(f"p:{max_pixels}")
313
+
314
+ # 对于本地文件,添加修改时间
315
+ if os.path.exists(source):
316
+ try:
317
+ mtime = os.path.getmtime(source)
318
+ key_parts.append(f"mtime:{mtime}")
319
+ except:
320
+ pass
321
+
322
+ # 添加其他参数
323
+ for key in sorted(kwargs.keys()):
324
+ if kwargs[key] is not None:
325
+ key_parts.append(f"{key}:{kwargs[key]}")
326
+
327
+ key_data = "|".join(key_parts)
328
+ return hashlib.md5(key_data.encode()).hexdigest()
329
+ except Exception:
330
+ return hashlib.md5(source.encode()).hexdigest()
331
+
332
+ def get(self, source: str, **kwargs) -> Optional[str]:
333
+ """获取缓存数据"""
334
+ cache_key = self._generate_cache_key(source, **kwargs)
335
+
336
+ with self.lock:
337
+ if cache_key in self.cache:
338
+ self.access_times[cache_key] = time.time()
339
+ self.hit_count += 1
340
+ return self.cache[cache_key]
341
+ else:
342
+ self.miss_count += 1
343
+ return None
344
+
345
+ def put(self, source: str, data: str, **kwargs):
346
+ """存储缓存数据"""
347
+ cache_key = self._generate_cache_key(source, **kwargs)
348
+
349
+ with self.lock:
350
+ try:
351
+ data_size = len(data.encode("utf-8"))
352
+
353
+ # 如果数据太大,不缓存
354
+ if data_size > self.max_size_bytes * 0.5:
355
+ return
356
+
357
+ # 清理空间
358
+ while (
359
+ self.current_size + data_size > self.max_size_bytes and self.cache
360
+ ):
361
+ self._evict_lru()
362
+
363
+ # 存储数据
364
+ self.cache[cache_key] = data
365
+ self.access_times[cache_key] = time.time()
366
+ self.cache_sizes[cache_key] = data_size
367
+ self.current_size += data_size
368
+ except Exception:
369
+ # 静默处理缓存错误
370
+ pass
371
+
372
+ def clear(self):
373
+ """清空缓存"""
374
+ with self.lock:
375
+ self.cache.clear()
376
+ self.access_times.clear()
377
+ self.cache_sizes.clear()
378
+ self.current_size = 0
379
+ gc.collect()
380
+
381
+ def get_stats(self) -> Dict[str, Any]:
382
+ """获取缓存统计"""
383
+ with self.lock:
384
+ total_requests = self.hit_count + self.miss_count
385
+ hit_rate = (
386
+ (self.hit_count / total_requests * 100) if total_requests > 0 else 0
387
+ )
388
+
389
+ return {
390
+ "cached_items": len(self.cache),
391
+ "current_size_mb": self.current_size / 1024 / 1024,
392
+ "max_size_mb": self.max_size_bytes / 1024 / 1024,
393
+ "usage_percent": (self.current_size / self.max_size_bytes * 100)
394
+ if self.max_size_bytes > 0
395
+ else 0,
396
+ "hit_count": self.hit_count,
397
+ "miss_count": self.miss_count,
398
+ "hit_rate_percent": hit_rate,
399
+ "total_requests": total_requests,
400
+ "avg_item_size_kb": (self.current_size / 1024 / len(self.cache))
401
+ if self.cache
402
+ else 0,
403
+ "cache_efficiency": "excellent"
404
+ if hit_rate > 80
405
+ else "good"
406
+ if hit_rate > 60
407
+ else "fair"
408
+ if hit_rate > 40
409
+ else "poor",
410
+ }
411
+
412
+
413
+ class UnifiedImageProcessor:
414
+ """统一的图像处理器,支持本地文件和URL"""
415
+
416
+ def __init__(self, config: Optional[UnifiedProcessorConfig] = None):
417
+ self.config = config or UnifiedProcessorConfig.default()
418
+ self.memory_cache = UnifiedMemoryCache(self.config.memory_cache_size_mb)
419
+
420
+ # 磁盘缓存配置
421
+ self.disk_cache_config = None
422
+ if self.config.enable_disk_cache and HAS_IMAGE_PROCESSOR:
423
+ self.disk_cache_config = ImageCacheConfig(
424
+ enabled=True,
425
+ cache_dir=self.config.disk_cache_dir,
426
+ force_refresh=self.config.force_refresh_disk_cache,
427
+ retry_failed=self.config.retry_failed_disk_cache
428
+ )
429
+
430
+ # 线程池和锁
431
+ self.executor = None
432
+ self.processing_locks: Dict[str, asyncio.Lock] = {}
433
+ self.lock = asyncio.Lock()
434
+ self._executor_initialized = False
435
+ self._init_lock = threading.Lock()
436
+
437
+ # 性能统计
438
+ self._total_processed = 0
439
+ self._total_processing_time = 0.0
440
+ self._start_time = time.time()
441
+
442
+ # 初始化OpenCV优化
443
+ self._init_opencv_optimizations()
444
+
445
+ def _init_opencv_optimizations(self):
446
+ """初始化OpenCV优化设置"""
447
+ if not HAS_CV2:
448
+ return
449
+ try:
450
+ with (
451
+ suppress_all_output()
452
+ if self.config.suppress_opencv_output
453
+ else contextlib.nullcontext()
454
+ ):
455
+ cv2.setUseOptimized(True)
456
+ cv2.setNumThreads(self.config.max_workers)
457
+ cv2.setLogLevel(cv2.LOG_LEVEL_ERROR)
458
+
459
+ if self.config.enable_simd and hasattr(cv2, "useOptimized"):
460
+ cv2.useOptimized()
461
+ except Exception:
462
+ pass
463
+
464
+ def _get_executor(self) -> ThreadPoolExecutor:
465
+ """获取线程池执行器(延迟初始化)"""
466
+ if not self._executor_initialized:
467
+ with self._init_lock:
468
+ if not self._executor_initialized:
469
+ self.executor = ThreadPoolExecutor(
470
+ max_workers=self.config.max_workers,
471
+ thread_name_prefix="unified_processor",
472
+ )
473
+ self._executor_initialized = True
474
+ return self.executor
475
+
476
+ async def _get_processing_lock(self, cache_key: str) -> asyncio.Lock:
477
+ """获取文件处理锁"""
478
+ async with self.lock:
479
+ if cache_key not in self.processing_locks:
480
+ self.processing_locks[cache_key] = asyncio.Lock()
481
+ return self.processing_locks[cache_key]
482
+
483
+ def _detect_image_format(self, file_path: str) -> str:
484
+ """检测图像格式"""
485
+ try:
486
+ ext = Path(file_path).suffix.lower()
487
+ format_map = {
488
+ ".jpg": "JPEG",
489
+ ".jpeg": "JPEG",
490
+ ".png": "PNG",
491
+ ".webp": "WEBP",
492
+ ".bmp": "BMP",
493
+ ".tiff": "TIFF",
494
+ ".tif": "TIFF",
495
+ }
496
+ return format_map.get(ext, "JPEG")
497
+ except Exception:
498
+ return "JPEG"
499
+
500
+ def _get_encode_params(self, format_type: str) -> List[int]:
501
+ """获取编码参数"""
502
+ if not HAS_CV2:
503
+ return []
504
+ try:
505
+ if format_type == "JPEG":
506
+ return [cv2.IMWRITE_JPEG_QUALITY, self.config.jpeg_quality]
507
+ elif format_type == "PNG":
508
+ return [cv2.IMWRITE_PNG_COMPRESSION, self.config.png_compression]
509
+ elif format_type == "WEBP":
510
+ return [cv2.IMWRITE_WEBP_QUALITY, self.config.webp_quality]
511
+ else:
512
+ return []
513
+ except Exception:
514
+ return []
515
+
516
+ def _calculate_target_size(
517
+ self,
518
+ original_width: int,
519
+ original_height: int,
520
+ max_width: Optional[int],
521
+ max_height: Optional[int],
522
+ max_pixels: Optional[int],
523
+ ) -> Tuple[int, int]:
524
+ """计算目标尺寸"""
525
+ try:
526
+ width, height = original_width, original_height
527
+
528
+ # 应用最大宽度/高度限制
529
+ if max_width and width > max_width:
530
+ height = int(height * max_width / width)
531
+ width = max_width
532
+
533
+ if max_height and height > max_height:
534
+ width = int(width * max_height / height)
535
+ height = max_height
536
+
537
+ # 应用最大像素限制
538
+ if max_pixels and (width * height > max_pixels):
539
+ ratio = (max_pixels / (width * height)) ** 0.5
540
+ width = int(width * ratio)
541
+ height = int(height * ratio)
542
+
543
+ return max(1, width), max(1, height)
544
+ except Exception:
545
+ return original_width, original_height
546
+
547
+ def _process_local_file_sync(
548
+ self,
549
+ file_path: str,
550
+ max_width: Optional[int] = None,
551
+ max_height: Optional[int] = None,
552
+ max_pixels: Optional[int] = None,
553
+ return_with_mime: bool = True,
554
+ ) -> str:
555
+ """同步处理本地文件"""
556
+ if not HAS_CV2:
557
+ raise ImportError(
558
+ "图像处理功能需要安装 opencv-python。请运行: pip install flexllm[image]"
559
+ )
560
+ try:
561
+ with (
562
+ suppress_all_output()
563
+ if self.config.suppress_opencv_output
564
+ else contextlib.nullcontext()
565
+ ):
566
+ # 使用OpenCV读取图像
567
+ img = cv2.imread(file_path, cv2.IMREAD_COLOR)
568
+ if img is None:
569
+ raise ValueError(f"无法读取图像文件: {file_path}")
570
+
571
+ original_height, original_width = img.shape[:2]
572
+
573
+ # 计算目标尺寸
574
+ target_width, target_height = self._calculate_target_size(
575
+ original_width, original_height, max_width, max_height, max_pixels
576
+ )
577
+
578
+ # 如果需要调整大小
579
+ if target_width != original_width or target_height != original_height:
580
+ img = cv2.resize(
581
+ img,
582
+ (target_width, target_height),
583
+ interpolation=cv2.INTER_LANCZOS4,
584
+ )
585
+
586
+ # 检测原始格式并编码
587
+ format_type = self._detect_image_format(file_path)
588
+ encode_params = self._get_encode_params(format_type)
589
+
590
+ ext = f".{format_type.lower()}"
591
+ if format_type == "JPEG":
592
+ ext = ".jpg"
593
+
594
+ success, buffer = cv2.imencode(ext, img, encode_params)
595
+ if not success:
596
+ raise ValueError(f"图像编码失败: {file_path}")
597
+
598
+ # 转换为base64
599
+ base64_data = base64.b64encode(buffer.tobytes()).decode("utf-8")
600
+
601
+ # 添加MIME类型前缀
602
+ if return_with_mime:
603
+ mime_type = f"image/{format_type.lower()}"
604
+ result = f"data:{mime_type};base64,{base64_data}"
605
+ else:
606
+ result = base64_data
607
+
608
+ return result
609
+
610
+ except Exception as e:
611
+ raise ValueError(f"处理本地文件失败: {file_path}, 错误: {str(e)}")
612
+
613
+ async def _process_url_async(
614
+ self,
615
+ url: str,
616
+ session: aiohttp.ClientSession,
617
+ max_width: Optional[int] = None,
618
+ max_height: Optional[int] = None,
619
+ max_pixels: Optional[int] = None,
620
+ return_with_mime: bool = True,
621
+ ) -> str:
622
+ """异步处理URL"""
623
+ try:
624
+ # 如果有image_processor,使用它处理URL(包含磁盘缓存)
625
+ if HAS_IMAGE_PROCESSOR:
626
+ from .image_processor import encode_image_to_base64
627
+
628
+ return await encode_image_to_base64(
629
+ url, session, max_width, max_height, max_pixels,
630
+ return_with_mime, cache_config=self.disk_cache_config
631
+ )
632
+ else:
633
+ # 简单的URL处理实现
634
+ timeout = aiohttp.ClientTimeout(total=self.config.network_timeout)
635
+ async with session.get(url, timeout=timeout) as response:
636
+ if response.status == 200:
637
+ content = await response.read()
638
+ base64_data = base64.b64encode(content).decode("utf-8")
639
+
640
+ if return_with_mime:
641
+ content_type = response.headers.get(
642
+ "content-type", "image/jpeg"
643
+ )
644
+ return f"data:{content_type};base64,{base64_data}"
645
+ return base64_data
646
+ else:
647
+ raise ValueError(f"HTTP {response.status}")
648
+ except Exception as e:
649
+ raise ValueError(
650
+ f"处理URL失败: {safe_repr_source(url)}, 错误: {safe_repr_error(str(e))}"
651
+ )
652
+
653
+ async def process_single_source(
654
+ self,
655
+ source: str,
656
+ session: Optional[aiohttp.ClientSession] = None,
657
+ max_width: Optional[int] = None,
658
+ max_height: Optional[int] = None,
659
+ max_pixels: Optional[int] = None,
660
+ return_with_mime: bool = True,
661
+ ) -> str:
662
+ """处理单个图像源(本地文件或URL)"""
663
+
664
+ # 1. 首先检查内存缓存
665
+ cached_result = self.memory_cache.get(
666
+ source,
667
+ max_width=max_width,
668
+ max_height=max_height,
669
+ max_pixels=max_pixels,
670
+ return_with_mime=return_with_mime,
671
+ )
672
+ if cached_result is not None:
673
+ return cached_result
674
+
675
+ # 获取处理锁
676
+ cache_key = self.memory_cache._generate_cache_key(
677
+ source,
678
+ max_width=max_width,
679
+ max_height=max_height,
680
+ max_pixels=max_pixels,
681
+ return_with_mime=return_with_mime,
682
+ )
683
+ file_lock = await self._get_processing_lock(cache_key)
684
+
685
+ async with file_lock:
686
+ # 再次检查内存缓存(双重检查锁定模式)
687
+ cached_result = self.memory_cache.get(
688
+ source,
689
+ max_width=max_width,
690
+ max_height=max_height,
691
+ max_pixels=max_pixels,
692
+ return_with_mime=return_with_mime,
693
+ )
694
+ if cached_result is not None:
695
+ return cached_result
696
+
697
+ # 开始性能计时
698
+ start_time = time.time()
699
+
700
+ try:
701
+ # 判断是本地文件还是URL
702
+ if os.path.exists(source) or source.startswith("file://"):
703
+ file_path = source[7:] if source.startswith("file://") else source
704
+
705
+ # 在线程池中处理本地文件
706
+ executor = self._get_executor()
707
+ loop = asyncio.get_event_loop()
708
+ result = await asyncio.wait_for(
709
+ loop.run_in_executor(
710
+ executor,
711
+ self._process_local_file_sync,
712
+ file_path,
713
+ max_width,
714
+ max_height,
715
+ max_pixels,
716
+ return_with_mime,
717
+ ),
718
+ timeout=self.config.single_file_timeout,
719
+ )
720
+ else:
721
+ # 处理URL(会自动使用磁盘缓存)
722
+ if session is None:
723
+ async with aiohttp.ClientSession() as temp_session:
724
+ result = await asyncio.wait_for(
725
+ self._process_url_async(
726
+ source,
727
+ temp_session,
728
+ max_width,
729
+ max_height,
730
+ max_pixels,
731
+ return_with_mime,
732
+ ),
733
+ timeout=self.config.network_timeout,
734
+ )
735
+ else:
736
+ result = await asyncio.wait_for(
737
+ self._process_url_async(
738
+ source,
739
+ session,
740
+ max_width,
741
+ max_height,
742
+ max_pixels,
743
+ return_with_mime,
744
+ ),
745
+ timeout=self.config.network_timeout,
746
+ )
747
+
748
+ # 将结果缓存到内存
749
+ self.memory_cache.put(
750
+ source,
751
+ result,
752
+ max_width=max_width,
753
+ max_height=max_height,
754
+ max_pixels=max_pixels,
755
+ return_with_mime=return_with_mime,
756
+ )
757
+
758
+ # 更新性能统计
759
+ processing_time = time.time() - start_time
760
+ self._total_processed += 1
761
+ self._total_processing_time += processing_time
762
+
763
+ return result
764
+
765
+ except asyncio.TimeoutError:
766
+ logger.warning(f"处理超时: {safe_repr_source(source)}")
767
+ return ""
768
+ except Exception as e:
769
+ logger.error(
770
+ f"处理失败: {safe_repr_source(source)}, 错误: {safe_repr_error(str(e))}"
771
+ )
772
+ return ""
773
+
774
+ async def process_batch(
775
+ self,
776
+ sources: List[str],
777
+ session: Optional[aiohttp.ClientSession] = None,
778
+ max_width: Optional[int] = None,
779
+ max_height: Optional[int] = None,
780
+ max_pixels: Optional[int] = None,
781
+ return_with_mime: bool = True,
782
+ ) -> List[str]:
783
+ """批量处理图像源"""
784
+ if not sources:
785
+ return []
786
+
787
+ # 去重并保持顺序映射
788
+ unique_sources = []
789
+ source_indices = {}
790
+ for i, source in enumerate(sources):
791
+ if source not in source_indices:
792
+ source_indices[source] = []
793
+ unique_sources.append(source)
794
+ source_indices[source].append(i)
795
+
796
+ # 创建信号量控制并发
797
+ semaphore = asyncio.Semaphore(self.config.max_concurrent)
798
+
799
+ async def process_single_with_semaphore(source: str) -> Tuple[str, str]:
800
+ async with semaphore:
801
+ result = await self.process_single_source(
802
+ source, session, max_width, max_height, max_pixels, return_with_mime
803
+ )
804
+ return source, result
805
+
806
+ # 并发处理所有唯一源
807
+ tasks = [process_single_with_semaphore(source) for source in unique_sources]
808
+
809
+ try:
810
+ results = await asyncio.wait_for(
811
+ asyncio.gather(*tasks, return_exceptions=True),
812
+ timeout=self.config.batch_timeout,
813
+ )
814
+ except asyncio.TimeoutError:
815
+ logger.warning("批处理超时")
816
+ results = [(source, "") for source in unique_sources]
817
+
818
+ # 构建结果映射
819
+ result_mapping = {}
820
+ for result in results:
821
+ if isinstance(result, Exception):
822
+ continue
823
+ source, processed_result = result
824
+ result_mapping[source] = processed_result
825
+
826
+ # 根据原始顺序返回结果
827
+ final_results = []
828
+ for source in sources:
829
+ final_results.append(result_mapping.get(source, ""))
830
+
831
+ return final_results
832
+
833
+ def get_cache_stats(self) -> Dict[str, Any]:
834
+ """获取缓存统计信息"""
835
+ stats = {"memory_cache": self.memory_cache.get_stats()}
836
+
837
+ # 如果启用了磁盘缓存,添加磁盘缓存统计
838
+ if self.config.enable_disk_cache and self.disk_cache_config and self.disk_cache_config.enabled:
839
+ disk_stats = self._get_disk_cache_stats()
840
+ stats["disk_cache"] = disk_stats
841
+
842
+ return stats
843
+
844
+ def _get_disk_cache_stats(self) -> Dict[str, Any]:
845
+ """获取磁盘缓存统计信息"""
846
+ if not self.disk_cache_config or not self.disk_cache_config.enabled:
847
+ return {"enabled": False}
848
+
849
+ try:
850
+ cache_dir = Path(self.disk_cache_config.cache_dir)
851
+ if not cache_dir.exists():
852
+ return {"enabled": True, "cached_files": 0, "total_size_mb": 0}
853
+
854
+ # 统计缓存文件
855
+ image_files = list(cache_dir.glob("*"))
856
+ image_files = [f for f in image_files if f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp', '.gif']]
857
+ error_files = list(cache_dir.glob("*.error"))
858
+
859
+ # 计算总大小
860
+ total_size = sum(f.stat().st_size for f in image_files if f.is_file())
861
+
862
+ return {
863
+ "enabled": True,
864
+ "cache_dir": str(cache_dir),
865
+ "cached_images": len(image_files),
866
+ "error_cache_files": len(error_files),
867
+ "total_files": len(image_files) + len(error_files),
868
+ "total_size_mb": total_size / 1024 / 1024,
869
+ "force_refresh": self.disk_cache_config.force_refresh,
870
+ "retry_failed": self.disk_cache_config.retry_failed,
871
+ }
872
+ except Exception as e:
873
+ return {"enabled": True, "error": str(e)}
874
+
875
+ def get_performance_stats(self) -> Dict[str, Any]:
876
+ """获取性能统计信息"""
877
+ uptime = time.time() - self._start_time
878
+ avg_processing_time = (
879
+ self._total_processing_time / self._total_processed
880
+ if self._total_processed > 0
881
+ else 0
882
+ )
883
+ throughput = self._total_processed / uptime if uptime > 0 else 0
884
+
885
+ return {
886
+ "total_processed": self._total_processed,
887
+ "total_processing_time": self._total_processing_time,
888
+ "uptime_seconds": uptime,
889
+ "avg_processing_time": avg_processing_time,
890
+ "throughput_per_second": throughput,
891
+ "cache_stats": self.get_cache_stats(),
892
+ "config": {
893
+ "max_workers": self.config.max_workers,
894
+ "max_concurrent": self.config.max_concurrent,
895
+ "memory_cache_size_mb": self.config.memory_cache_size_mb,
896
+ "jpeg_quality": self.config.jpeg_quality,
897
+ },
898
+ }
899
+
900
+ def clear_cache(self, clear_disk_cache: bool = False):
901
+ """清空缓存"""
902
+ # 清空内存缓存
903
+ self.memory_cache.clear()
904
+
905
+ # 可选地清空磁盘缓存
906
+ if clear_disk_cache and self.disk_cache_config and self.disk_cache_config.enabled:
907
+ self._clear_disk_cache()
908
+
909
+ def _clear_disk_cache(self):
910
+ """清空磁盘缓存"""
911
+ try:
912
+ cache_dir = Path(self.disk_cache_config.cache_dir)
913
+ if cache_dir.exists():
914
+ # 删除所有缓存文件
915
+ for cache_file in cache_dir.iterdir():
916
+ if cache_file.is_file():
917
+ cache_file.unlink()
918
+
919
+ logger.info(f"已清空磁盘缓存目录: {cache_dir}")
920
+ except Exception as e:
921
+ logger.warning(f"清空磁盘缓存失败: {e}")
922
+
923
+ def cleanup(self):
924
+ """清理资源"""
925
+ try:
926
+ if self.executor:
927
+ self.executor.shutdown(wait=True)
928
+ self.clear_cache()
929
+ except Exception:
930
+ pass
931
+
932
+
933
+ # 全局处理器实例
934
+ _global_unified_processor = None
935
+ _unified_processor_lock = threading.Lock()
936
+
937
+
938
+ def get_global_unified_processor(
939
+ config: Optional[UnifiedProcessorConfig] = None,
940
+ ) -> UnifiedImageProcessor:
941
+ """获取全局统一处理器实例(单例模式)"""
942
+ global _global_unified_processor
943
+
944
+ if _global_unified_processor is None:
945
+ with _unified_processor_lock:
946
+ if _global_unified_processor is None:
947
+ _global_unified_processor = UnifiedImageProcessor(config)
948
+
949
+ return _global_unified_processor
950
+
951
+
952
+ async def process_content_recursive(
953
+ content: Any,
954
+ session: Optional[aiohttp.ClientSession] = None,
955
+ processor: Optional[UnifiedImageProcessor] = None,
956
+ **kwargs,
957
+ ):
958
+ """递归处理内容中的图像URL"""
959
+ if processor is None:
960
+ processor = get_global_unified_processor()
961
+
962
+ if isinstance(content, dict):
963
+ for key, value in content.items():
964
+ if key == "url" and isinstance(value, str):
965
+ # 处理图像URL
966
+ try:
967
+ base64_data = await processor.process_single_source(
968
+ value, session, **kwargs
969
+ )
970
+ if base64_data:
971
+ content[key] = base64_data
972
+ except Exception as e:
973
+ logger.error(
974
+ f"处理URL失败 {safe_repr_source(value)}: {safe_repr_error(str(e))}"
975
+ )
976
+ else:
977
+ await process_content_recursive(value, session, processor, **kwargs)
978
+ elif isinstance(content, list):
979
+ for item in content:
980
+ await process_content_recursive(item, session, processor, **kwargs)
981
+
982
+
983
+ async def unified_messages_preprocess(
984
+ messages: List[Dict[str, Any]],
985
+ inplace: bool = False,
986
+ processor_config: Optional[UnifiedProcessorConfig] = None,
987
+ **kwargs,
988
+ ) -> List[Dict[str, Any]]:
989
+ """
990
+ 统一的单个消息列表预处理
991
+
992
+ Args:
993
+ messages: 单个消息列表
994
+ inplace: 是否原地修改
995
+ processor_config: 处理器配置
996
+ **kwargs: 其他处理参数
997
+
998
+ Returns:
999
+ 处理后的消息列表
1000
+ """
1001
+ # 创建或获取处理器
1002
+ if processor_config:
1003
+ processor = UnifiedImageProcessor(processor_config)
1004
+ else:
1005
+ processor = get_global_unified_processor()
1006
+
1007
+ try:
1008
+ # 如果不是原地修改,创建副本
1009
+ if not inplace:
1010
+ messages = deepcopy(messages)
1011
+
1012
+ # 使用HTTP会话处理所有图像
1013
+ async with aiohttp.ClientSession() as session:
1014
+ # 递归处理所有消息内容
1015
+ for message in messages:
1016
+ await process_content_recursive(message, session, processor, **kwargs)
1017
+
1018
+ return messages
1019
+
1020
+ except Exception as e:
1021
+ logger.error(f"消息预处理失败: {e}")
1022
+ return messages
1023
+
1024
+
1025
+ async def unified_batch_messages_preprocess(
1026
+ messages_list: Union[List[List[Dict[str, Any]]], Any],
1027
+ max_concurrent: int = 10,
1028
+ inplace: bool = False,
1029
+ processor_config: Optional[UnifiedProcessorConfig] = None,
1030
+ as_iterator: bool = False,
1031
+ progress_callback: Optional[Callable] = None,
1032
+ show_progress: bool = False,
1033
+ progress_desc: str = "统一处理消息",
1034
+ max_width: Optional[int] = None,
1035
+ max_height: Optional[int] = None,
1036
+ max_pixels: Optional[int] = None,
1037
+ **kwargs,
1038
+ ) -> Union[List[List[Dict[str, Any]]], Any]:
1039
+ """
1040
+ 统一的批量消息预处理函数
1041
+
1042
+ 完全兼容messages_processor.py的API,支持本地文件和URL的高性能处理
1043
+
1044
+ Args:
1045
+ messages_list: 消息列表的列表,可以是列表、迭代器或异步迭代器
1046
+ max_concurrent: 最大并发处理数
1047
+ inplace: 是否原地修改
1048
+ processor_config: 处理器配置
1049
+ as_iterator: 是否返回异步迭代器
1050
+ progress_callback: 进度回调函数
1051
+ show_progress: 是否显示进度条
1052
+ progress_desc: 进度描述
1053
+ max_width: 最大宽度
1054
+ max_height: 最大高度
1055
+ max_pixels: 最大像素数
1056
+ **kwargs: 其他处理参数
1057
+
1058
+ Returns:
1059
+ 处理后的消息列表或异步迭代器
1060
+ """
1061
+
1062
+ # 创建或获取处理器
1063
+ if processor_config:
1064
+ processor = UnifiedImageProcessor(processor_config)
1065
+ else:
1066
+ processor = get_global_unified_processor()
1067
+
1068
+ print(f"{processor.config=}")
1069
+
1070
+ # 创建处理单个消息列表的函数
1071
+ async def process_single_batch(messages, semaphore, index=None):
1072
+ async with semaphore:
1073
+ try:
1074
+ processed_messages = await unified_messages_preprocess(
1075
+ messages,
1076
+ inplace=inplace,
1077
+ processor_config=processor_config,
1078
+ max_width=max_width,
1079
+ max_height=max_height,
1080
+ max_pixels=max_pixels,
1081
+ **kwargs,
1082
+ )
1083
+ except Exception as e:
1084
+ logger.error(f"批处理错误 {index}: {e}")
1085
+ processed_messages = messages
1086
+ return processed_messages, index
1087
+
1088
+ # 进度报告函数
1089
+ def report_progress(current: int, total: int, start_time: float = None):
1090
+ if progress_callback:
1091
+ try:
1092
+ # 计算时间信息
1093
+ elapsed_time = time.time() - start_time if start_time else 0
1094
+
1095
+ # 创建扩展的进度信息
1096
+ progress_info = {
1097
+ "current": current,
1098
+ "total": total,
1099
+ "percentage": (current / total * 100) if total > 0 else 0,
1100
+ "elapsed_time": elapsed_time,
1101
+ "estimated_total_time": (elapsed_time / current * total)
1102
+ if current > 0
1103
+ else 0,
1104
+ "estimated_remaining_time": (
1105
+ elapsed_time / current * (total - current)
1106
+ )
1107
+ if current > 0
1108
+ else 0,
1109
+ "rate": current / elapsed_time if elapsed_time > 0 else 0,
1110
+ }
1111
+
1112
+ # 如果回调函数接受单个参数,传递扩展信息;否则保持兼容性
1113
+ import inspect
1114
+
1115
+ sig = inspect.signature(progress_callback)
1116
+ if len(sig.parameters) == 1:
1117
+ progress_callback(progress_info)
1118
+ else:
1119
+ progress_callback(current, total)
1120
+
1121
+ except Exception as e:
1122
+ logger.warning(f"进度回调函数执行失败: {e}")
1123
+
1124
+ # 如果要求返回迭代器
1125
+ if as_iterator:
1126
+
1127
+ async def process_iterator():
1128
+ semaphore = asyncio.Semaphore(max_concurrent)
1129
+
1130
+ # 检查是否为异步迭代器
1131
+ is_async_iterator = hasattr(messages_list, "__aiter__")
1132
+
1133
+ processed_count = 0
1134
+ total_count = None
1135
+ messages_to_process = messages_list
1136
+
1137
+ # 如果可以获取总数,先计算总数
1138
+ if not is_async_iterator and hasattr(messages_list, "__len__"):
1139
+ total_count = len(messages_list)
1140
+ elif not is_async_iterator:
1141
+ # 对于迭代器,先转换为列表获取长度
1142
+ messages_list_converted = list(messages_list)
1143
+ total_count = len(messages_list_converted)
1144
+ messages_to_process = iter(messages_list_converted)
1145
+
1146
+ # 创建进度条
1147
+ pbar = None
1148
+ start_time = time.time()
1149
+ if show_progress and TQDM_AVAILABLE and total_count:
1150
+ bar_format = "{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"
1151
+ pbar = tqdm(
1152
+ total=total_count,
1153
+ desc=progress_desc,
1154
+ unit="批次",
1155
+ bar_format=bar_format,
1156
+ ncols=100,
1157
+ miniters=1,
1158
+ )
1159
+
1160
+ try:
1161
+ # 处理异步迭代器
1162
+ if is_async_iterator:
1163
+ pending_tasks = []
1164
+ task_index = 0
1165
+ async for messages in messages_to_process:
1166
+ # 如果已经达到最大并发数,等待一个任务完成
1167
+ if len(pending_tasks) >= max_concurrent:
1168
+ done, pending_tasks = await asyncio.wait(
1169
+ pending_tasks, return_when=asyncio.FIRST_COMPLETED
1170
+ )
1171
+ for task in done:
1172
+ result, _ = await task
1173
+ processed_count += 1
1174
+ if pbar:
1175
+ pbar.update(1)
1176
+ report_progress(
1177
+ processed_count,
1178
+ total_count or processed_count,
1179
+ start_time,
1180
+ )
1181
+ yield result
1182
+
1183
+ # 创建新任务
1184
+ task = asyncio.create_task(
1185
+ process_single_batch(messages, semaphore, task_index)
1186
+ )
1187
+ pending_tasks.append(task)
1188
+ task_index += 1
1189
+
1190
+ # 等待所有剩余任务完成
1191
+ if pending_tasks:
1192
+ for task in asyncio.as_completed(pending_tasks):
1193
+ result, _ = await task
1194
+ processed_count += 1
1195
+ if pbar:
1196
+ pbar.update(1)
1197
+ report_progress(
1198
+ processed_count,
1199
+ total_count or processed_count,
1200
+ start_time,
1201
+ )
1202
+ yield result
1203
+
1204
+ # 处理同步迭代器或列表
1205
+ else:
1206
+ # 转换为列表以避免消耗迭代器
1207
+ if not isinstance(messages_to_process, (list, tuple)):
1208
+ messages_list_converted = list(messages_to_process)
1209
+ else:
1210
+ messages_list_converted = messages_to_process
1211
+
1212
+ if not total_count:
1213
+ total_count = len(messages_list_converted)
1214
+ if pbar:
1215
+ pbar.total = total_count
1216
+
1217
+ # 分批处理
1218
+ for i in range(0, len(messages_list_converted), max_concurrent):
1219
+ batch = messages_list_converted[i : i + max_concurrent]
1220
+ tasks = [
1221
+ process_single_batch(messages, semaphore, i + j)
1222
+ for j, messages in enumerate(batch)
1223
+ ]
1224
+ results = await asyncio.gather(*tasks)
1225
+
1226
+ for result, _ in results:
1227
+ processed_count += 1
1228
+ if pbar:
1229
+ pbar.update(1)
1230
+ report_progress(processed_count, total_count, start_time)
1231
+ yield result
1232
+
1233
+ finally:
1234
+ if pbar:
1235
+ pbar.close()
1236
+
1237
+ return process_iterator()
1238
+
1239
+ # 原始实现,返回列表
1240
+ else:
1241
+ semaphore = asyncio.Semaphore(max_concurrent)
1242
+
1243
+ # 检查是否为异步迭代器
1244
+ is_async_iterator = hasattr(messages_list, "__aiter__")
1245
+
1246
+ # 转换为列表
1247
+ if is_async_iterator:
1248
+ messages_list_converted = []
1249
+ async for messages in messages_list:
1250
+ messages_list_converted.append(messages)
1251
+ elif not isinstance(messages_list, (list, tuple)):
1252
+ messages_list_converted = list(messages_list)
1253
+ else:
1254
+ messages_list_converted = messages_list
1255
+
1256
+ if not messages_list_converted:
1257
+ return []
1258
+
1259
+ total_count = len(messages_list_converted)
1260
+ processed_count = 0
1261
+
1262
+ # 创建进度条
1263
+ pbar = None
1264
+ start_time = time.time()
1265
+ if show_progress and TQDM_AVAILABLE:
1266
+ bar_format = (
1267
+ "{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"
1268
+ )
1269
+ pbar = tqdm(
1270
+ total=total_count,
1271
+ desc=progress_desc,
1272
+ unit=" items",
1273
+ bar_format=bar_format,
1274
+ ncols=100,
1275
+ miniters=1,
1276
+ )
1277
+
1278
+ try:
1279
+ # 分批处理以实现进度更新
1280
+ results = []
1281
+ for i in range(0, len(messages_list_converted), max_concurrent):
1282
+ batch = messages_list_converted[i : i + max_concurrent]
1283
+ tasks = [
1284
+ process_single_batch(messages, semaphore, i + j)
1285
+ for j, messages in enumerate(batch)
1286
+ ]
1287
+ batch_results = await asyncio.gather(*tasks)
1288
+
1289
+ for result, _ in batch_results:
1290
+ results.append(result)
1291
+ processed_count += 1
1292
+ if pbar:
1293
+ pbar.update(1)
1294
+ report_progress(processed_count, total_count, start_time)
1295
+
1296
+ return results
1297
+
1298
+ finally:
1299
+ if pbar:
1300
+ pbar.close()
1301
+
1302
+
1303
+ # 向后兼容的别名和完整的API兼容性
1304
+ messages_preprocess = unified_messages_preprocess
1305
+ batch_messages_preprocess = unified_batch_messages_preprocess
1306
+ batch_process_messages = unified_batch_messages_preprocess
1307
+
1308
+ # 专用别名
1309
+ optimized_batch_messages_preprocess = unified_batch_messages_preprocess
1310
+ improved_batch_messages_preprocess = unified_batch_messages_preprocess
1311
+ opencv_batch_messages_preprocess = unified_batch_messages_preprocess
1312
+
1313
+
1314
+ # 便捷函数
1315
+ async def unified_encode_image_to_base64(
1316
+ image_source: Union[str, List[str]],
1317
+ session: Optional[aiohttp.ClientSession] = None,
1318
+ max_width: Optional[int] = None,
1319
+ max_height: Optional[int] = None,
1320
+ max_pixels: Optional[int] = None,
1321
+ return_with_mime: bool = True,
1322
+ processor_config: Optional[UnifiedProcessorConfig] = None,
1323
+ ) -> Union[str, List[str]]:
1324
+ """
1325
+ 统一的图像编码函数,支持本地文件和URL
1326
+
1327
+ Args:
1328
+ image_source: 图像源,可以是单个路径/URL或列表
1329
+ session: HTTP会话(可选)
1330
+ max_width: 最大宽度
1331
+ max_height: 最大高度
1332
+ max_pixels: 最大像素数
1333
+ return_with_mime: 是否返回带MIME前缀的结果
1334
+ processor_config: 处理器配置
1335
+
1336
+ Returns:
1337
+ Base64编码的图像数据
1338
+ """
1339
+ processor = (
1340
+ UnifiedImageProcessor(processor_config)
1341
+ if processor_config
1342
+ else get_global_unified_processor()
1343
+ )
1344
+
1345
+ if isinstance(image_source, str):
1346
+ return await processor.process_single_source(
1347
+ image_source, session, max_width, max_height, max_pixels, return_with_mime
1348
+ )
1349
+ elif isinstance(image_source, list):
1350
+ return await processor.process_batch(
1351
+ image_source, session, max_width, max_height, max_pixels, return_with_mime
1352
+ )
1353
+ else:
1354
+ raise ValueError(f"不支持的图像源类型: {type(image_source)}")
1355
+
1356
+
1357
+ # 向后兼容别名
1358
+ encode_image_to_base64 = unified_encode_image_to_base64
1359
+ safe_optimized_encode_image_to_base64 = unified_encode_image_to_base64
1360
+
1361
+
1362
+ def cleanup_global_unified_processor():
1363
+ """清理全局统一处理器"""
1364
+ global _global_unified_processor
1365
+ if _global_unified_processor:
1366
+ _global_unified_processor.cleanup()
1367
+ _global_unified_processor = None
1368
+
1369
+
1370
+ # 示例用法
1371
+ if __name__ == "__main__":
1372
+
1373
+ async def test_unified_processor():
1374
+ config = UnifiedProcessorConfig.high_performance()
1375
+ processor = UnifiedImageProcessor(config)
1376
+
1377
+ # 测试本地文件
1378
+ # local_result = await processor.process_single_source(
1379
+ # "test_image.jpg", max_width=800, max_height=600
1380
+ # )
1381
+ # print(f"本地文件处理完成,长度: {len(local_result)}")
1382
+
1383
+ # 测试URL
1384
+ async with aiohttp.ClientSession() as session:
1385
+ url_result = await processor.process_single_source(
1386
+ "https://p2.itc.cn/q_70/images03/20230402/1853ae33e80b499ebc120426a80b19d3.jpeg",
1387
+ session,
1388
+ max_width=80,
1389
+ max_height=60,
1390
+ )
1391
+ # 安全打印,避免打印整个base64数据
1392
+ print(f"URL处理完成,长度: {len(url_result)}")
1393
+ if len(url_result) > 100:
1394
+ print(f"结果预览: {url_result[:100]}...")
1395
+ else:
1396
+ print(f"完整结果: {url_result}")
1397
+
1398
+ # 获取统计信息
1399
+ stats = processor.get_cache_stats()
1400
+ print(f"缓存统计: {stats}")
1401
+
1402
+ processor.cleanup()
1403
+
1404
+ asyncio.run(test_unified_processor())