flexllm 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flexllm/__init__.py +224 -0
- flexllm/__main__.py +1096 -0
- flexllm/async_api/__init__.py +9 -0
- flexllm/async_api/concurrent_call.py +100 -0
- flexllm/async_api/concurrent_executor.py +1036 -0
- flexllm/async_api/core.py +373 -0
- flexllm/async_api/interface.py +12 -0
- flexllm/async_api/progress.py +277 -0
- flexllm/base_client.py +988 -0
- flexllm/batch_tools/__init__.py +16 -0
- flexllm/batch_tools/folder_processor.py +317 -0
- flexllm/batch_tools/table_processor.py +363 -0
- flexllm/cache/__init__.py +10 -0
- flexllm/cache/response_cache.py +293 -0
- flexllm/chain_of_thought_client.py +1120 -0
- flexllm/claudeclient.py +402 -0
- flexllm/client_pool.py +698 -0
- flexllm/geminiclient.py +563 -0
- flexllm/llm_client.py +523 -0
- flexllm/llm_parser.py +60 -0
- flexllm/mllm_client.py +559 -0
- flexllm/msg_processors/__init__.py +174 -0
- flexllm/msg_processors/image_processor.py +729 -0
- flexllm/msg_processors/image_processor_helper.py +485 -0
- flexllm/msg_processors/messages_processor.py +341 -0
- flexllm/msg_processors/unified_processor.py +1404 -0
- flexllm/openaiclient.py +256 -0
- flexllm/pricing/__init__.py +104 -0
- flexllm/pricing/data.json +1201 -0
- flexllm/pricing/updater.py +223 -0
- flexllm/provider_router.py +213 -0
- flexllm/token_counter.py +270 -0
- flexllm/utils/__init__.py +1 -0
- flexllm/utils/core.py +41 -0
- flexllm-0.3.3.dist-info/METADATA +573 -0
- flexllm-0.3.3.dist-info/RECORD +39 -0
- flexllm-0.3.3.dist-info/WHEEL +4 -0
- flexllm-0.3.3.dist-info/entry_points.txt +3 -0
- flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
LLM 响应缓存模块
|
|
6
|
+
|
|
7
|
+
使用 FlaxKV2 作为存储后端,提供高性能缓存。
|
|
8
|
+
支持两种模式:
|
|
9
|
+
- IPC 模式(默认):通过 Unix Socket 访问,支持多进程共享缓存
|
|
10
|
+
- 本地模式:直接读写 LevelDB,单进程场景
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Optional, Dict, Any, List, TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
from loguru import logger
|
|
18
|
+
|
|
19
|
+
from ..token_counter import messages_hash
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from flaxkv2 import FlaxKV
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
DEFAULT_CACHE_DIR = os.path.expanduser("~/.cache/maque/llm_response")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ResponseCacheConfig:
|
|
30
|
+
"""
|
|
31
|
+
响应缓存配置
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
enabled: 是否启用缓存
|
|
35
|
+
cache_dir: 缓存目录(本地模式)或数据目录(IPC 模式)
|
|
36
|
+
ttl: 缓存过期时间(秒),0 表示永不过期
|
|
37
|
+
use_ipc: 是否使用 IPC 模式(默认 True,多进程共享缓存)
|
|
38
|
+
"""
|
|
39
|
+
enabled: bool = False
|
|
40
|
+
cache_dir: str = DEFAULT_CACHE_DIR
|
|
41
|
+
ttl: int = 86400 # 24小时
|
|
42
|
+
use_ipc: bool = True # 默认使用 IPC 模式
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def disabled(cls) -> "ResponseCacheConfig":
|
|
46
|
+
"""禁用缓存"""
|
|
47
|
+
return cls(enabled=False)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def default(cls) -> "ResponseCacheConfig":
|
|
51
|
+
"""默认配置:禁用缓存"""
|
|
52
|
+
return cls(enabled=False)
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def with_ttl(
|
|
56
|
+
cls,
|
|
57
|
+
ttl: int = 3600,
|
|
58
|
+
cache_dir: str = None,
|
|
59
|
+
use_ipc: bool = True
|
|
60
|
+
) -> "ResponseCacheConfig":
|
|
61
|
+
"""
|
|
62
|
+
启用缓存,自定义 TTL(默认 IPC 模式)
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
ttl: 过期时间(秒)
|
|
66
|
+
cache_dir: 缓存目录
|
|
67
|
+
use_ipc: 是否使用 IPC 模式(默认 True)
|
|
68
|
+
"""
|
|
69
|
+
return cls(
|
|
70
|
+
enabled=True,
|
|
71
|
+
ttl=ttl,
|
|
72
|
+
cache_dir=cache_dir or DEFAULT_CACHE_DIR,
|
|
73
|
+
use_ipc=use_ipc,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def persistent(
|
|
78
|
+
cls,
|
|
79
|
+
cache_dir: str = DEFAULT_CACHE_DIR,
|
|
80
|
+
use_ipc: bool = True
|
|
81
|
+
) -> "ResponseCacheConfig":
|
|
82
|
+
"""持久缓存:永不过期(默认 IPC 模式)"""
|
|
83
|
+
return cls(enabled=True, cache_dir=cache_dir, ttl=0, use_ipc=use_ipc)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def ipc(cls, ttl: int = 86400, cache_dir: str = None) -> "ResponseCacheConfig":
|
|
87
|
+
"""
|
|
88
|
+
IPC 模式缓存(多进程共享,默认模式)
|
|
89
|
+
|
|
90
|
+
使用 Unix Socket 通信,自动启动守护进程服务器。
|
|
91
|
+
适用于多进程并发调用 LLM API 的场景。
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
ttl: 过期时间(秒),默认 24 小时
|
|
95
|
+
cache_dir: 数据目录
|
|
96
|
+
"""
|
|
97
|
+
return cls(
|
|
98
|
+
enabled=True,
|
|
99
|
+
ttl=ttl,
|
|
100
|
+
cache_dir=cache_dir or DEFAULT_CACHE_DIR,
|
|
101
|
+
use_ipc=True,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def local(cls, ttl: int = 86400, cache_dir: str = None) -> "ResponseCacheConfig":
|
|
106
|
+
"""
|
|
107
|
+
本地模式缓存(单进程)
|
|
108
|
+
|
|
109
|
+
直接读写 LevelDB,不支持多进程共享。
|
|
110
|
+
适用于单进程场景,性能略高于 IPC 模式。
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
ttl: 过期时间(秒),默认 24 小时
|
|
114
|
+
cache_dir: 缓存目录
|
|
115
|
+
"""
|
|
116
|
+
return cls(
|
|
117
|
+
enabled=True,
|
|
118
|
+
ttl=ttl,
|
|
119
|
+
cache_dir=cache_dir or DEFAULT_CACHE_DIR,
|
|
120
|
+
use_ipc=False,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ResponseCache:
|
|
125
|
+
"""
|
|
126
|
+
LLM 响应缓存
|
|
127
|
+
|
|
128
|
+
使用 FlaxKV2 存储,支持 TTL 过期、高性能读写。
|
|
129
|
+
|
|
130
|
+
支持两种模式:
|
|
131
|
+
- IPC 模式(默认):通过 Unix Socket 通信,自动启动守护进程,支持多进程共享
|
|
132
|
+
- 本地模式:直接读写 LevelDB,适合单进程
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def __init__(self, config: Optional[ResponseCacheConfig] = None):
|
|
136
|
+
self.config = config or ResponseCacheConfig.disabled()
|
|
137
|
+
self._stats = {"hits": 0, "misses": 0}
|
|
138
|
+
self._db: Optional["FlaxKV"] = None
|
|
139
|
+
|
|
140
|
+
if self.config.enabled:
|
|
141
|
+
try:
|
|
142
|
+
from flaxkv2 import FlaxKV
|
|
143
|
+
except ImportError:
|
|
144
|
+
raise ImportError(
|
|
145
|
+
"缓存功能需要安装 flaxkv2。请运行: pip install flexllm[cache]"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
ttl = self.config.ttl if self.config.ttl > 0 else None
|
|
149
|
+
|
|
150
|
+
if self.config.use_ipc:
|
|
151
|
+
# IPC 模式:通过 Unix Socket 访问,自动启动守护进程
|
|
152
|
+
logger.debug(f"使用 IPC 模式缓存: data_dir={self.config.cache_dir}")
|
|
153
|
+
self._db = FlaxKV(
|
|
154
|
+
"llm_cache",
|
|
155
|
+
self.config.cache_dir,
|
|
156
|
+
use_ipc=True, # 自动启动守护进程
|
|
157
|
+
default_ttl=ttl,
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
# 本地模式:直接读写 LevelDB
|
|
161
|
+
logger.debug(f"使用本地模式缓存: cache_dir={self.config.cache_dir}")
|
|
162
|
+
self._db = FlaxKV(
|
|
163
|
+
"llm_cache",
|
|
164
|
+
self.config.cache_dir,
|
|
165
|
+
default_ttl=ttl,
|
|
166
|
+
read_cache_size=10000,
|
|
167
|
+
write_buffer_size=100,
|
|
168
|
+
async_flush=True,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def _make_key(self, messages: List[Dict], model: str, **kwargs) -> str:
|
|
172
|
+
"""生成缓存键"""
|
|
173
|
+
return messages_hash(messages, model, **kwargs)
|
|
174
|
+
|
|
175
|
+
def get(
|
|
176
|
+
self,
|
|
177
|
+
messages: List[Dict],
|
|
178
|
+
model: str = "",
|
|
179
|
+
**kwargs
|
|
180
|
+
) -> Optional[Any]:
|
|
181
|
+
"""
|
|
182
|
+
获取缓存的响应
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
messages: 消息列表
|
|
186
|
+
model: 模型名称
|
|
187
|
+
**kwargs: 其他参数 (temperature, max_tokens 等)
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
缓存的响应,未命中返回 None
|
|
191
|
+
"""
|
|
192
|
+
if self._db is None:
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
cache_key = self._make_key(messages, model, **kwargs)
|
|
196
|
+
result = self._db.get(cache_key)
|
|
197
|
+
|
|
198
|
+
if result is not None:
|
|
199
|
+
self._stats["hits"] += 1
|
|
200
|
+
else:
|
|
201
|
+
self._stats["misses"] += 1
|
|
202
|
+
|
|
203
|
+
return result
|
|
204
|
+
|
|
205
|
+
def set(
|
|
206
|
+
self,
|
|
207
|
+
messages: List[Dict],
|
|
208
|
+
response: Any,
|
|
209
|
+
model: str = "",
|
|
210
|
+
**kwargs
|
|
211
|
+
) -> None:
|
|
212
|
+
"""
|
|
213
|
+
存储响应到缓存
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
messages: 消息列表
|
|
217
|
+
response: API 响应
|
|
218
|
+
model: 模型名称
|
|
219
|
+
**kwargs: 其他参数
|
|
220
|
+
"""
|
|
221
|
+
if self._db is None:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
cache_key = self._make_key(messages, model, **kwargs)
|
|
225
|
+
self._db[cache_key] = response
|
|
226
|
+
|
|
227
|
+
def get_batch(
|
|
228
|
+
self,
|
|
229
|
+
messages_list: List[List[Dict]],
|
|
230
|
+
model: str = "",
|
|
231
|
+
**kwargs
|
|
232
|
+
) -> tuple[List[Optional[Any]], List[int]]:
|
|
233
|
+
"""
|
|
234
|
+
批量获取缓存
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
(cached_responses, uncached_indices)
|
|
238
|
+
"""
|
|
239
|
+
cached = []
|
|
240
|
+
uncached_indices = []
|
|
241
|
+
|
|
242
|
+
for i, messages in enumerate(messages_list):
|
|
243
|
+
result = self.get(messages, model, **kwargs)
|
|
244
|
+
cached.append(result)
|
|
245
|
+
if result is None:
|
|
246
|
+
uncached_indices.append(i)
|
|
247
|
+
|
|
248
|
+
return cached, uncached_indices
|
|
249
|
+
|
|
250
|
+
def set_batch(
|
|
251
|
+
self,
|
|
252
|
+
messages_list: List[List[Dict]],
|
|
253
|
+
responses: List[Any],
|
|
254
|
+
model: str = "",
|
|
255
|
+
**kwargs
|
|
256
|
+
) -> None:
|
|
257
|
+
"""批量存储缓存"""
|
|
258
|
+
for messages, response in zip(messages_list, responses):
|
|
259
|
+
if response is not None:
|
|
260
|
+
self.set(messages, response, model, **kwargs)
|
|
261
|
+
|
|
262
|
+
def clear(self) -> int:
|
|
263
|
+
"""清空缓存"""
|
|
264
|
+
if self._db is None:
|
|
265
|
+
return 0
|
|
266
|
+
keys = list(self._db.keys())
|
|
267
|
+
count = len(keys)
|
|
268
|
+
for key in keys:
|
|
269
|
+
del self._db[key]
|
|
270
|
+
return count
|
|
271
|
+
|
|
272
|
+
def close(self):
|
|
273
|
+
"""关闭缓存"""
|
|
274
|
+
if self._db is not None:
|
|
275
|
+
self._db.close()
|
|
276
|
+
self._db = None
|
|
277
|
+
|
|
278
|
+
def __enter__(self):
|
|
279
|
+
return self
|
|
280
|
+
|
|
281
|
+
def __exit__(self, *args):
|
|
282
|
+
self.close()
|
|
283
|
+
|
|
284
|
+
@property
|
|
285
|
+
def stats(self) -> Dict[str, Any]:
|
|
286
|
+
"""返回缓存统计"""
|
|
287
|
+
total = self._stats["hits"] + self._stats["misses"]
|
|
288
|
+
hit_rate = self._stats["hits"] / total if total > 0 else 0
|
|
289
|
+
return {
|
|
290
|
+
**self._stats,
|
|
291
|
+
"total": total,
|
|
292
|
+
"hit_rate": round(hit_rate, 4),
|
|
293
|
+
}
|