flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,293 @@
1
+ #! /usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ LLM 响应缓存模块
6
+
7
+ 使用 FlaxKV2 作为存储后端,提供高性能缓存。
8
+ 支持两种模式:
9
+ - IPC 模式(默认):通过 Unix Socket 访问,支持多进程共享缓存
10
+ - 本地模式:直接读写 LevelDB,单进程场景
11
+ """
12
+
13
+ import os
14
+ from dataclasses import dataclass, field
15
+ from typing import Optional, Dict, Any, List, TYPE_CHECKING
16
+
17
+ from loguru import logger
18
+
19
+ from ..token_counter import messages_hash
20
+
21
+ if TYPE_CHECKING:
22
+ from flaxkv2 import FlaxKV
23
+
24
+
25
+ DEFAULT_CACHE_DIR = os.path.expanduser("~/.cache/maque/llm_response")
26
+
27
+
28
+ @dataclass
29
+ class ResponseCacheConfig:
30
+ """
31
+ 响应缓存配置
32
+
33
+ Attributes:
34
+ enabled: 是否启用缓存
35
+ cache_dir: 缓存目录(本地模式)或数据目录(IPC 模式)
36
+ ttl: 缓存过期时间(秒),0 表示永不过期
37
+ use_ipc: 是否使用 IPC 模式(默认 True,多进程共享缓存)
38
+ """
39
+ enabled: bool = False
40
+ cache_dir: str = DEFAULT_CACHE_DIR
41
+ ttl: int = 86400 # 24小时
42
+ use_ipc: bool = True # 默认使用 IPC 模式
43
+
44
+ @classmethod
45
+ def disabled(cls) -> "ResponseCacheConfig":
46
+ """禁用缓存"""
47
+ return cls(enabled=False)
48
+
49
+ @classmethod
50
+ def default(cls) -> "ResponseCacheConfig":
51
+ """默认配置:禁用缓存"""
52
+ return cls(enabled=False)
53
+
54
+ @classmethod
55
+ def with_ttl(
56
+ cls,
57
+ ttl: int = 3600,
58
+ cache_dir: str = None,
59
+ use_ipc: bool = True
60
+ ) -> "ResponseCacheConfig":
61
+ """
62
+ 启用缓存,自定义 TTL(默认 IPC 模式)
63
+
64
+ Args:
65
+ ttl: 过期时间(秒)
66
+ cache_dir: 缓存目录
67
+ use_ipc: 是否使用 IPC 模式(默认 True)
68
+ """
69
+ return cls(
70
+ enabled=True,
71
+ ttl=ttl,
72
+ cache_dir=cache_dir or DEFAULT_CACHE_DIR,
73
+ use_ipc=use_ipc,
74
+ )
75
+
76
+ @classmethod
77
+ def persistent(
78
+ cls,
79
+ cache_dir: str = DEFAULT_CACHE_DIR,
80
+ use_ipc: bool = True
81
+ ) -> "ResponseCacheConfig":
82
+ """持久缓存:永不过期(默认 IPC 模式)"""
83
+ return cls(enabled=True, cache_dir=cache_dir, ttl=0, use_ipc=use_ipc)
84
+
85
+ @classmethod
86
+ def ipc(cls, ttl: int = 86400, cache_dir: str = None) -> "ResponseCacheConfig":
87
+ """
88
+ IPC 模式缓存(多进程共享,默认模式)
89
+
90
+ 使用 Unix Socket 通信,自动启动守护进程服务器。
91
+ 适用于多进程并发调用 LLM API 的场景。
92
+
93
+ Args:
94
+ ttl: 过期时间(秒),默认 24 小时
95
+ cache_dir: 数据目录
96
+ """
97
+ return cls(
98
+ enabled=True,
99
+ ttl=ttl,
100
+ cache_dir=cache_dir or DEFAULT_CACHE_DIR,
101
+ use_ipc=True,
102
+ )
103
+
104
+ @classmethod
105
+ def local(cls, ttl: int = 86400, cache_dir: str = None) -> "ResponseCacheConfig":
106
+ """
107
+ 本地模式缓存(单进程)
108
+
109
+ 直接读写 LevelDB,不支持多进程共享。
110
+ 适用于单进程场景,性能略高于 IPC 模式。
111
+
112
+ Args:
113
+ ttl: 过期时间(秒),默认 24 小时
114
+ cache_dir: 缓存目录
115
+ """
116
+ return cls(
117
+ enabled=True,
118
+ ttl=ttl,
119
+ cache_dir=cache_dir or DEFAULT_CACHE_DIR,
120
+ use_ipc=False,
121
+ )
122
+
123
+
124
+ class ResponseCache:
125
+ """
126
+ LLM 响应缓存
127
+
128
+ 使用 FlaxKV2 存储,支持 TTL 过期、高性能读写。
129
+
130
+ 支持两种模式:
131
+ - IPC 模式(默认):通过 Unix Socket 通信,自动启动守护进程,支持多进程共享
132
+ - 本地模式:直接读写 LevelDB,适合单进程
133
+ """
134
+
135
+ def __init__(self, config: Optional[ResponseCacheConfig] = None):
136
+ self.config = config or ResponseCacheConfig.disabled()
137
+ self._stats = {"hits": 0, "misses": 0}
138
+ self._db: Optional["FlaxKV"] = None
139
+
140
+ if self.config.enabled:
141
+ try:
142
+ from flaxkv2 import FlaxKV
143
+ except ImportError:
144
+ raise ImportError(
145
+ "缓存功能需要安装 flaxkv2。请运行: pip install flexllm[cache]"
146
+ )
147
+
148
+ ttl = self.config.ttl if self.config.ttl > 0 else None
149
+
150
+ if self.config.use_ipc:
151
+ # IPC 模式:通过 Unix Socket 访问,自动启动守护进程
152
+ logger.debug(f"使用 IPC 模式缓存: data_dir={self.config.cache_dir}")
153
+ self._db = FlaxKV(
154
+ "llm_cache",
155
+ self.config.cache_dir,
156
+ use_ipc=True, # 自动启动守护进程
157
+ default_ttl=ttl,
158
+ )
159
+ else:
160
+ # 本地模式:直接读写 LevelDB
161
+ logger.debug(f"使用本地模式缓存: cache_dir={self.config.cache_dir}")
162
+ self._db = FlaxKV(
163
+ "llm_cache",
164
+ self.config.cache_dir,
165
+ default_ttl=ttl,
166
+ read_cache_size=10000,
167
+ write_buffer_size=100,
168
+ async_flush=True,
169
+ )
170
+
171
+ def _make_key(self, messages: List[Dict], model: str, **kwargs) -> str:
172
+ """生成缓存键"""
173
+ return messages_hash(messages, model, **kwargs)
174
+
175
+ def get(
176
+ self,
177
+ messages: List[Dict],
178
+ model: str = "",
179
+ **kwargs
180
+ ) -> Optional[Any]:
181
+ """
182
+ 获取缓存的响应
183
+
184
+ Args:
185
+ messages: 消息列表
186
+ model: 模型名称
187
+ **kwargs: 其他参数 (temperature, max_tokens 等)
188
+
189
+ Returns:
190
+ 缓存的响应,未命中返回 None
191
+ """
192
+ if self._db is None:
193
+ return None
194
+
195
+ cache_key = self._make_key(messages, model, **kwargs)
196
+ result = self._db.get(cache_key)
197
+
198
+ if result is not None:
199
+ self._stats["hits"] += 1
200
+ else:
201
+ self._stats["misses"] += 1
202
+
203
+ return result
204
+
205
+ def set(
206
+ self,
207
+ messages: List[Dict],
208
+ response: Any,
209
+ model: str = "",
210
+ **kwargs
211
+ ) -> None:
212
+ """
213
+ 存储响应到缓存
214
+
215
+ Args:
216
+ messages: 消息列表
217
+ response: API 响应
218
+ model: 模型名称
219
+ **kwargs: 其他参数
220
+ """
221
+ if self._db is None:
222
+ return
223
+
224
+ cache_key = self._make_key(messages, model, **kwargs)
225
+ self._db[cache_key] = response
226
+
227
+ def get_batch(
228
+ self,
229
+ messages_list: List[List[Dict]],
230
+ model: str = "",
231
+ **kwargs
232
+ ) -> tuple[List[Optional[Any]], List[int]]:
233
+ """
234
+ 批量获取缓存
235
+
236
+ Returns:
237
+ (cached_responses, uncached_indices)
238
+ """
239
+ cached = []
240
+ uncached_indices = []
241
+
242
+ for i, messages in enumerate(messages_list):
243
+ result = self.get(messages, model, **kwargs)
244
+ cached.append(result)
245
+ if result is None:
246
+ uncached_indices.append(i)
247
+
248
+ return cached, uncached_indices
249
+
250
+ def set_batch(
251
+ self,
252
+ messages_list: List[List[Dict]],
253
+ responses: List[Any],
254
+ model: str = "",
255
+ **kwargs
256
+ ) -> None:
257
+ """批量存储缓存"""
258
+ for messages, response in zip(messages_list, responses):
259
+ if response is not None:
260
+ self.set(messages, response, model, **kwargs)
261
+
262
+ def clear(self) -> int:
263
+ """清空缓存"""
264
+ if self._db is None:
265
+ return 0
266
+ keys = list(self._db.keys())
267
+ count = len(keys)
268
+ for key in keys:
269
+ del self._db[key]
270
+ return count
271
+
272
+ def close(self):
273
+ """关闭缓存"""
274
+ if self._db is not None:
275
+ self._db.close()
276
+ self._db = None
277
+
278
+ def __enter__(self):
279
+ return self
280
+
281
+ def __exit__(self, *args):
282
+ self.close()
283
+
284
+ @property
285
+ def stats(self) -> Dict[str, Any]:
286
+ """返回缓存统计"""
287
+ total = self._stats["hits"] + self._stats["misses"]
288
+ hit_rate = self._stats["hits"] / total if total > 0 else 0
289
+ return {
290
+ **self._stats,
291
+ "total": total,
292
+ "hit_rate": round(hit_rate, 4),
293
+ }