pynomadic 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pynomadic-0.1.0/PKG-INFO +3440 -0
- pynomadic-0.1.0/README.md +3425 -0
- pynomadic-0.1.0/pyproject.toml +27 -0
- pynomadic-0.1.0/src/pynomad/__init__.py +87 -0
- pynomadic-0.1.0/src/pynomad/cache/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/cache/core/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/cache/core/cache_interface.py +70 -0
- pynomadic-0.1.0/src/pynomad/cache/core/cache_registry.py +84 -0
- pynomadic-0.1.0/src/pynomad/cache/core/meta.py +73 -0
- pynomadic-0.1.0/src/pynomad/cache/core/types.py +73 -0
- pynomadic-0.1.0/src/pynomad/cache/dataframe/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/cache/dataframe/base_df_decorator.py +431 -0
- pynomadic-0.1.0/src/pynomad/cache/dataframe/df_memory_decorator.py +167 -0
- pynomadic-0.1.0/src/pynomad/cache/dataframe/df_pickle_decorator.py +234 -0
- pynomadic-0.1.0/src/pynomad/cache/dataframe/df_redis_decorator.py +268 -0
- pynomadic-0.1.0/src/pynomad/cache/dataframe/types.py +196 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/base_decorator.py +224 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/keygenerator.py +409 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/memory_decorator.py +128 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/multi_level.py +459 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/pickle_decorator.py +190 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/redis_decorator.py +217 -0
- pynomadic-0.1.0/src/pynomad/cache/decorator/sql_decorator.py +227 -0
- pynomadic-0.1.0/src/pynomad/cache/impl/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/cache/impl/memory_cache.py +500 -0
- pynomadic-0.1.0/src/pynomad/cache/impl/pickle_cache.py +881 -0
- pynomadic-0.1.0/src/pynomad/cache/impl/redis_cache.py +898 -0
- pynomadic-0.1.0/src/pynomad/cache/impl/sql_cache.py +1090 -0
- pynomadic-0.1.0/src/pynomad/cache/properties.py +74 -0
- pynomadic-0.1.0/src/pynomad/common/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/common/ansi.py +239 -0
- pynomadic-0.1.0/src/pynomad/common/converter.py +65 -0
- pynomadic-0.1.0/src/pynomad/common/datetime_util.py +413 -0
- pynomadic-0.1.0/src/pynomad/common/decorator.py +208 -0
- pynomadic-0.1.0/src/pynomad/common/dict_utils.py +673 -0
- pynomadic-0.1.0/src/pynomad/common/enhandce_dict.py +558 -0
- pynomadic-0.1.0/src/pynomad/common/environment.py +848 -0
- pynomadic-0.1.0/src/pynomad/common/exceptions.py +200 -0
- pynomadic-0.1.0/src/pynomad/common/network.py +117 -0
- pynomadic-0.1.0/src/pynomad/common/serializer.py +287 -0
- pynomadic-0.1.0/src/pynomad/common/strings.py +206 -0
- pynomadic-0.1.0/src/pynomad/common/types.py +208 -0
- pynomadic-0.1.0/src/pynomad/config/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/config/auto/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/config/auto/autoconfig.py +1560 -0
- pynomadic-0.1.0/src/pynomad/config/auto/field_info.py +138 -0
- pynomadic-0.1.0/src/pynomad/config/auto/generate/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/config/auto/generate/attributeInfo.py +625 -0
- pynomadic-0.1.0/src/pynomad/config/auto/generate/generator.py +487 -0
- pynomadic-0.1.0/src/pynomad/config/auto/properties.py +17 -0
- pynomadic-0.1.0/src/pynomad/config/config_utils.py +267 -0
- pynomadic-0.1.0/src/pynomad/config/loader/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/config/loader/base.py +631 -0
- pynomadic-0.1.0/src/pynomad/config/loader/env_manager.py +715 -0
- pynomadic-0.1.0/src/pynomad/config/loader/file_loader.py +583 -0
- pynomadic-0.1.0/src/pynomad/config/loader/manager.py +285 -0
- pynomadic-0.1.0/src/pynomad/config/loader/resource_loader.py +551 -0
- pynomadic-0.1.0/src/pynomad/config/parser/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/config/parser/base.py +163 -0
- pynomadic-0.1.0/src/pynomad/config/parser/common_parser.py +420 -0
- pynomadic-0.1.0/src/pynomad/config/parser/file_parser.py +352 -0
- pynomadic-0.1.0/src/pynomad/config/parser/toml_parser.py +855 -0
- pynomadic-0.1.0/src/pynomad/config/parser/yaml_parser.py +833 -0
- pynomadic-0.1.0/src/pynomad/console/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/console/base.py +209 -0
- pynomadic-0.1.0/src/pynomad/console/logger.py +890 -0
- pynomadic-0.1.0/src/pynomad/console/printer.py +254 -0
- pynomadic-0.1.0/src/pynomad/console/progress.py +238 -0
- pynomadic-0.1.0/src/pynomad/logsystem/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/logsystem/builder.py +1095 -0
- pynomadic-0.1.0/src/pynomad/logsystem/formatters.py +332 -0
- pynomadic-0.1.0/src/pynomad/logsystem/logger.py +482 -0
- pynomadic-0.1.0/src/pynomad/logsystem/logtypes.py +144 -0
- pynomadic-0.1.0/src/pynomad/logsystem/manager.py +317 -0
- pynomadic-0.1.0/src/pynomad/naming/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/naming/exceptions.py +25 -0
- pynomadic-0.1.0/src/pynomad/naming/keywords.json +512 -0
- pynomadic-0.1.0/src/pynomad/naming/keywords.py +209 -0
- pynomadic-0.1.0/src/pynomad/naming/naming.py +404 -0
- pynomadic-0.1.0/src/pynomad/naming/naming_types.py +45 -0
- pynomadic-0.1.0/src/pynomad/naming/patterns.py +176 -0
- pynomadic-0.1.0/src/pynomad/naming/system.py +920 -0
- pynomadic-0.1.0/src/pynomad/result/__init__.py +0 -0
- pynomadic-0.1.0/src/pynomad/result/decorator.py +433 -0
- pynomadic-0.1.0/src/pynomad/result/result.py +916 -0
pynomadic-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,3440 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pynomadic
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: 一个python工具包,包括自动配置系统,常用工具包,控制台输出系统,日志系统,命名系统,缓存系统,返回值封装类
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Author: nomadicooer
|
|
7
|
+
Author-email: nomadicooer@qq.com
|
|
8
|
+
Requires-Python: >=3.13
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
12
|
+
Requires-Dist: redis (>=7.3.0,<8.0.0)
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# pynomad
|
|
16
|
+
|
|
17
|
+
[](https://www.python.org/downloads/)
|
|
18
|
+
[](LICENSE)
|
|
19
|
+
|
|
20
|
+
**仓库地址**: https://gitee.com/nomadicooer/pynomad.git
|
|
21
|
+
|
|
22
|
+
> ⚠️ **重要声明**
|
|
23
|
+
>
|
|
24
|
+
> - 本系统大部分代码由 AI 生成
|
|
25
|
+
> - 本介绍文档完全由 AI 生成,当中存在大量错误介绍
|
|
26
|
+
> - 正确用法请自行发掘
|
|
27
|
+
> - 具体使用方法可参照 [findata 项目](https://gitee.com/nomadicooer/findata.git)
|
|
28
|
+
|
|
29
|
+
一个功能丰富的 Python 工具包,提供自动配置系统、多级缓存、DataFrame 缓存、日志系统、命名系统和结果封装等核心功能。
|
|
30
|
+
|
|
31
|
+
## 功能特性
|
|
32
|
+
|
|
33
|
+
### 🚀 缓存系统
|
|
34
|
+
|
|
35
|
+
提供多种缓存实现和灵活的缓存策略:
|
|
36
|
+
|
|
37
|
+
- **Memory Cache** (`@memcached`) - 内存缓存
|
|
38
|
+
- 支持淘汰策略 (LFU/LRU/FIFO)
|
|
39
|
+
- 线程安全实现
|
|
40
|
+
- 适合临时数据缓存
|
|
41
|
+
|
|
42
|
+
- **Pickle Cache** (`@pickled`) - Pickle 文件缓存
|
|
43
|
+
- 持久化到本地文件
|
|
44
|
+
- 支持对象序列化
|
|
45
|
+
|
|
46
|
+
- **Redis Cache** (`@rediscached`) - Redis 分布式缓存
|
|
47
|
+
- 支持自定义 Redis 客户端和连接池
|
|
48
|
+
- 支持加密存储
|
|
49
|
+
- 提供分布式缓存能力
|
|
50
|
+
|
|
51
|
+
- **DataFrame 缓存** - 专为 DataFrame 设计的缓存系统
|
|
52
|
+
- **Memory** (`@df_memcached`) - DataFrame 内存缓存
|
|
53
|
+
- **Pickle** (`@df_pickled`) - DataFrame 文件缓存
|
|
54
|
+
- **Redis** (`@df_rediscached`) - DataFrame Redis 缓存
|
|
55
|
+
- **SQL** (`@sqlcached`) - DataFrame SQL 数据库缓存
|
|
56
|
+
- 采用三阶段处理模型(GET → PUT → EXTRACT)
|
|
57
|
+
- 支持自定义 ValueLoader 实现灵活的数据处理逻辑
|
|
58
|
+
|
|
59
|
+
- **多级缓存** (`@multi_level_cached`)
|
|
60
|
+
- 支持三级缓存 (L1/L2/L3)
|
|
61
|
+
- 写穿透策略 (Write-Through)
|
|
62
|
+
- 读穿透策略 (Read-Through)
|
|
63
|
+
- 独立的 TTL 和淘汰策略配置
|
|
64
|
+
|
|
65
|
+
### 🎯 Result 结果封装
|
|
66
|
+
|
|
67
|
+
统一的结果封装模式,优雅的错误处理:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
Result.success(data) # 成功
|
|
71
|
+
Result.client_error(exception) # 客户端错误
|
|
72
|
+
Result.network_error(exception) # 网络错误
|
|
73
|
+
Result.server_error(exception) # 服务端错误
|
|
74
|
+
Result.third_party_error(ex) # 第三方错误
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**链式操作**:
|
|
78
|
+
```python
|
|
79
|
+
result.map(fn) # 转换数据
|
|
80
|
+
result.map_err(fn) # 转换异常
|
|
81
|
+
result.unwrap() # 获取数据或抛异常
|
|
82
|
+
result.unwrap_or(default) # 获取数据或返回默认值
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### ⚙️ 自动配置系统
|
|
86
|
+
|
|
87
|
+
类似 Spring Boot `@ConfigurationProperties` 的配置注入:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
@inject(prefix="database")
|
|
91
|
+
class DatabaseConfig:
|
|
92
|
+
host: str = "localhost"
|
|
93
|
+
port: int = 3306
|
|
94
|
+
|
|
95
|
+
config = DatabaseConfig() # 自动从配置加载
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**支持的配置格式**:
|
|
99
|
+
- TOML (`.toml`) - 推荐使用 `settings.toml`
|
|
100
|
+
- 环境变量(配置文件中支持 `${ENV_VAR}` 格式)
|
|
101
|
+
|
|
102
|
+
### 📝 日志系统
|
|
103
|
+
|
|
104
|
+
功能完善的日志系统:
|
|
105
|
+
|
|
106
|
+
- 自定义 Logger 类
|
|
107
|
+
- 彩色输出 (`ColorFormatter`)
|
|
108
|
+
- 支持 TRACE/DEBUG/INFO/WARNING/ERROR/CRITICAL 级别
|
|
109
|
+
- 动态日志级别配置
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from pynomad.logsystem.manager import get_logger
|
|
113
|
+
|
|
114
|
+
logger = get_logger()
|
|
115
|
+
logger.trace("追踪信息")
|
|
116
|
+
logger.debug("调试信息")
|
|
117
|
+
logger.info("信息")
|
|
118
|
+
logger.warning("警告")
|
|
119
|
+
logger.error("错误")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 🏷️ 命名系统
|
|
123
|
+
|
|
124
|
+
支持 16 种命名风格的转换和检测:
|
|
125
|
+
|
|
126
|
+
- `camelCase` - 驼峰命名
|
|
127
|
+
- `PascalCase` - 帕斯卡命名
|
|
128
|
+
- `snake_case` - 蛇形命名
|
|
129
|
+
- `kebab-case` - 短横线命名
|
|
130
|
+
- `SCREAMING_SNAKE_CASE` - 大写蛇形
|
|
131
|
+
- 等其他变体
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from pynomad.naming.naming import convert_name, detect_style
|
|
135
|
+
|
|
136
|
+
style = detect_style("myVariable") # 返回 NameStyle.CAMEL
|
|
137
|
+
converted = convert_name("myVariable", NameStyle.SNAKE) # 返回 "my_variable"
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### 🔧 通用工具
|
|
141
|
+
|
|
142
|
+
- **单例装饰器** - 线程安全的单例模式
|
|
143
|
+
- **线程安全装饰器** - 并发控制
|
|
144
|
+
- **重试装饰器** - 自动重试机制
|
|
145
|
+
- **环境检测** - 自动检测项目根目录
|
|
146
|
+
- **网络检测** - 网络连接检查
|
|
147
|
+
- **字典工具** - 字典序列化、展平、转换
|
|
148
|
+
- **序列化工具** - 支持 INI/TOML/JSON
|
|
149
|
+
|
|
150
|
+
## 安装
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
pip install pynomadic
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### 可选依赖
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# Redis 缓存
|
|
160
|
+
pip install redis>=7.3.0
|
|
161
|
+
|
|
162
|
+
# SQL 缓存
|
|
163
|
+
pip install sqlalchemy>=2.0.48
|
|
164
|
+
|
|
165
|
+
# DataFrame 缓存
|
|
166
|
+
pip install pandas>=3.0.1
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## 快速开始
|
|
170
|
+
|
|
171
|
+
### 基本缓存使用
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
from pynomad import memcached, rediscached
|
|
175
|
+
|
|
176
|
+
@memcached(ttl=60)
|
|
177
|
+
def expensive_calc(x: int, y: int) -> int:
|
|
178
|
+
return x + y
|
|
179
|
+
|
|
180
|
+
@rediscached(ttl=300, host="localhost")
|
|
181
|
+
def get_user_data(user_id: str) -> dict:
|
|
182
|
+
return fetch_from_db(user_id)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### DataFrame 缓存
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
from pynomad import df_memcached
|
|
189
|
+
from pandas import DataFrame
|
|
190
|
+
|
|
191
|
+
@df_memcached(ttl=3600)
|
|
192
|
+
def load_data(symbol: str) -> DataFrame:
|
|
193
|
+
# 从 API 加载数据
|
|
194
|
+
return DataFrame()
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### SQL 缓存
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
from pynomad import sqlcached
|
|
201
|
+
from pynomad.result import Result
|
|
202
|
+
from pandas import DataFrame
|
|
203
|
+
|
|
204
|
+
@sqlcached(db_type="sqlite", db_url="sqlite:///cache.db", ttl=3600)
|
|
205
|
+
def fetch_data(symbol: str) -> Result[DataFrame]:
|
|
206
|
+
# 从 API 加载数据
|
|
207
|
+
df = DataFrame({"symbol": [symbol], "price": [100.0]})
|
|
208
|
+
return Result.success(df)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### 多级缓存
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from pynomad import multi_level_cached
|
|
215
|
+
|
|
216
|
+
@multi_level_cached(
|
|
217
|
+
name="my_cache",
|
|
218
|
+
l1_code="memory", # L1: 内存
|
|
219
|
+
l2_code="pickle", # L2: 文件
|
|
220
|
+
l3_code="redis", # L3: Redis
|
|
221
|
+
l1_ttl=10, # L1: 10秒
|
|
222
|
+
l2_ttl=60, # L2: 60秒
|
|
223
|
+
l3_ttl=300, # L3: 300秒
|
|
224
|
+
)
|
|
225
|
+
def get_data(key: str):
|
|
226
|
+
return expensive_operation(key)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Result 使用
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
from pynomad import Result
|
|
233
|
+
|
|
234
|
+
result = Result.success({"id": 1, "name": "Alice"})
|
|
235
|
+
if result.is_success():
|
|
236
|
+
data = result.data
|
|
237
|
+
|
|
238
|
+
result = Result.client_error(ValueError("Invalid parameter"))
|
|
239
|
+
result.unwrap_or(None)
|
|
240
|
+
|
|
241
|
+
# 链式操作
|
|
242
|
+
result = result.map(lambda x: x * 2)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## 自动配置系统详解
|
|
246
|
+
|
|
247
|
+
pynomad 提供类似 Spring Boot `@ConfigurationProperties` 的自动配置系统,支持从 `settings.toml` 配置文件自动注入配置值。
|
|
248
|
+
|
|
249
|
+
### 1. 基本用法
|
|
250
|
+
|
|
251
|
+
#### 1.1 使用 prefix 参数
|
|
252
|
+
|
|
253
|
+
`@inject` 装饰器的 `prefix` 参数用于指定配置文件中的配置前缀:
|
|
254
|
+
|
|
255
|
+
**settings.toml:**
|
|
256
|
+
|
|
257
|
+
```toml
|
|
258
|
+
[database]
|
|
259
|
+
host = "localhost"
|
|
260
|
+
port = 3306
|
|
261
|
+
username = "root"
|
|
262
|
+
password = "${DB_PASSWORD}"
|
|
263
|
+
|
|
264
|
+
[cache.redis]
|
|
265
|
+
host = "localhost"
|
|
266
|
+
port = 6379
|
|
267
|
+
db = 0
|
|
268
|
+
password = "${REDIS_PASSWORD}"
|
|
269
|
+
ttl = 3600
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Python 代码:**
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
from pynomad.config.auto.autoconfig import inject
|
|
276
|
+
|
|
277
|
+
@inject(prefix="database")
|
|
278
|
+
class DatabaseConfig:
|
|
279
|
+
host: str = "localhost"
|
|
280
|
+
port: int = 3306
|
|
281
|
+
username: str = "root"
|
|
282
|
+
password: str = ""
|
|
283
|
+
|
|
284
|
+
# 创建实例时自动注入配置
|
|
285
|
+
config = DatabaseConfig()
|
|
286
|
+
print(config.host) # 从 settings.toml 的 database.host 读取
|
|
287
|
+
print(config.port) # 从 settings.toml 的 database.port 读取
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
#### 1.2 使用 field() 函数
|
|
291
|
+
|
|
292
|
+
`field()` 函数用于指定默认值、自定义转换器和别名(不是配置路径):
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
from pynomad.config.auto.autoconfig import inject
|
|
296
|
+
from pynomad.config.auto.field_info import field
|
|
297
|
+
|
|
298
|
+
@inject(prefix="cache.redis")
|
|
299
|
+
class RedisConfig:
|
|
300
|
+
host: str = field(default="localhost") # 默认值
|
|
301
|
+
port: int = field(default=6379)
|
|
302
|
+
db: int = field(default=0)
|
|
303
|
+
password: str = field(default="")
|
|
304
|
+
ttl: int = field(default=3600)
|
|
305
|
+
|
|
306
|
+
redis_config = RedisConfig()
|
|
307
|
+
print(redis_config.host) # 从 cache.redis.host 读取
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
**注意:**
|
|
311
|
+
- `field()` 的第一个参数是 `default`(默认值),不是配置路径
|
|
312
|
+
- 配置路径由 `@inject(prefix="...")` 和字段名自动确定
|
|
313
|
+
- `field()` 主要用于:
|
|
314
|
+
- 指定默认值(`default`)
|
|
315
|
+
- 自定义类型转换器(`converter`)
|
|
316
|
+
- 指定配置文件中的别名(`alias`)
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
### 2. 高级功能
|
|
321
|
+
|
|
322
|
+
#### 2.1 单例模式
|
|
323
|
+
|
|
324
|
+
使用 `@inject` 装饰的类采用单例模式,多次创建返回同一实例:
|
|
325
|
+
|
|
326
|
+
```python
|
|
327
|
+
config1 = DatabaseConfig()
|
|
328
|
+
config2 = DatabaseConfig()
|
|
329
|
+
|
|
330
|
+
print(config1 is config2) # True,同一实例
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
**单例模式的优势:**
|
|
334
|
+
- 全局配置一致性
|
|
335
|
+
- 避免重复加载配置
|
|
336
|
+
- 提高性能
|
|
337
|
+
|
|
338
|
+
**注意事项:**
|
|
339
|
+
- 配置对象应为只读,不建议运行时修改
|
|
340
|
+
- 如需临时修改,使用 `copy_config()` 创建副本
|
|
341
|
+
|
|
342
|
+
#### 2.2 配置副本
|
|
343
|
+
|
|
344
|
+
使用 `copy_config()` 创建配置对象的深拷贝副本:
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
from pynomad.config.auto.autoconfig import inject, copy_config
|
|
348
|
+
|
|
349
|
+
@inject(prefix="database")
|
|
350
|
+
class DatabaseConfig:
|
|
351
|
+
host: str = "localhost"
|
|
352
|
+
port: int = 3306
|
|
353
|
+
|
|
354
|
+
config = DatabaseConfig()
|
|
355
|
+
config_copy = copy_config(config)
|
|
356
|
+
|
|
357
|
+
# 修改副本不影响原始配置
|
|
358
|
+
config_copy.host = "modified.host"
|
|
359
|
+
print(config.host) # 仍然是原始值
|
|
360
|
+
print(config_copy.host) # 已修改
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
#### 2.3 重新加载配置
|
|
364
|
+
|
|
365
|
+
使用 `reload_config()` 重新加载配置文件:
|
|
366
|
+
|
|
367
|
+
```python
|
|
368
|
+
from pynomad.config.auto.autoconfig import inject, reload_config
|
|
369
|
+
|
|
370
|
+
@inject(prefix="database")
|
|
371
|
+
class DatabaseConfig:
|
|
372
|
+
host: str = "localhost"
|
|
373
|
+
port: int = 3306
|
|
374
|
+
|
|
375
|
+
config = DatabaseConfig()
|
|
376
|
+
|
|
377
|
+
# 修改 settings.toml 文件后
|
|
378
|
+
reload_config(config)
|
|
379
|
+
# config 的属性会更新为最新的配置值
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
#### 2.4 清除配置缓存
|
|
383
|
+
|
|
384
|
+
使用 `clear_config_cache()` 清除配置缓存:
|
|
385
|
+
|
|
386
|
+
```python
|
|
387
|
+
from pynomad.config.auto.autoconfig import clear_config_cache, DatabaseConfig
|
|
388
|
+
|
|
389
|
+
# 清除缓存
|
|
390
|
+
clear_config_cache()
|
|
391
|
+
|
|
392
|
+
# 下次创建实例时会重新加载配置
|
|
393
|
+
config = DatabaseConfig()
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
#### 2.5 获取配置值
|
|
397
|
+
|
|
398
|
+
使用 `get_config_value()` 便捷函数获取配置值:
|
|
399
|
+
|
|
400
|
+
```python
|
|
401
|
+
from pynomad.config.auto.autoconfig import get_config_value
|
|
402
|
+
|
|
403
|
+
# 获取 database.host,默认为 "localhost"
|
|
404
|
+
host = get_config_value("database", "host", "localhost")
|
|
405
|
+
print(host)
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
---
|
|
409
|
+
|
|
410
|
+
### 3. 类型支持
|
|
411
|
+
|
|
412
|
+
#### 3.1 基本类型
|
|
413
|
+
|
|
414
|
+
自动配置系统支持所有基本 Python 类型:
|
|
415
|
+
|
|
416
|
+
```python
|
|
417
|
+
@inject(prefix="app")
|
|
418
|
+
class AppConfig:
|
|
419
|
+
# 字符串
|
|
420
|
+
name: str = field(default="my_app")
|
|
421
|
+
# 整数
|
|
422
|
+
port: int = field(default=8080)
|
|
423
|
+
# 浮点数
|
|
424
|
+
timeout: float = field(default=3.5)
|
|
425
|
+
# 布尔值
|
|
426
|
+
enabled: bool = field(default=True)
|
|
427
|
+
```
|
|
428
|
+
|
|
429
|
+
**settings.toml:**
|
|
430
|
+
|
|
431
|
+
```toml
|
|
432
|
+
[app]
|
|
433
|
+
name = "my_application"
|
|
434
|
+
port = 9090
|
|
435
|
+
timeout = 5.0
|
|
436
|
+
enabled = false
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
#### 3.2 路径类型
|
|
440
|
+
|
|
441
|
+
支持 `Path` 类型,并自动展开路径变量:
|
|
442
|
+
|
|
443
|
+
```python
|
|
444
|
+
from pathlib import Path
|
|
445
|
+
|
|
446
|
+
@inject(prefix="paths")
|
|
447
|
+
class PathConfig:
|
|
448
|
+
home_dir: Path = field(default="{home}/app")
|
|
449
|
+
workspace_dir: Path = field(default="{workspace}/data")
|
|
450
|
+
cache_dir: Path = field(default="./cache")
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
**settings.toml:**
|
|
454
|
+
|
|
455
|
+
```toml
|
|
456
|
+
[paths]
|
|
457
|
+
home_dir = "{home}/my_app"
|
|
458
|
+
workspace_dir = "{workspace}/project"
|
|
459
|
+
cache_dir = "./cache"
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
**支持的路径变量:**
|
|
463
|
+
- `{home}` - 用户主目录
|
|
464
|
+
- `{workspace}` - 项目根目录
|
|
465
|
+
- `{temp}` - 临时目录
|
|
466
|
+
|
|
467
|
+
#### 3.3 列表类型
|
|
468
|
+
|
|
469
|
+
支持 `list` 和 `List[T]` 类型:
|
|
470
|
+
|
|
471
|
+
```python
|
|
472
|
+
@inject(prefix="servers")
|
|
473
|
+
class ServerConfig:
|
|
474
|
+
hosts: list[str] = field(default=[])
|
|
475
|
+
ports: list[int] = field(default=[])
|
|
476
|
+
enabled: list[bool] = field(default=[])
|
|
477
|
+
```
|
|
478
|
+
|
|
479
|
+
**settings.toml:**
|
|
480
|
+
|
|
481
|
+
```toml
|
|
482
|
+
[servers]
|
|
483
|
+
hosts = ["server1.example.com", "server2.example.com"]
|
|
484
|
+
ports = [8080, 9090]
|
|
485
|
+
enabled = [true, false]
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
#### 3.4 嵌套配置类
|
|
489
|
+
|
|
490
|
+
支持嵌套配置类:
|
|
491
|
+
|
|
492
|
+
```python
|
|
493
|
+
from dataclasses import dataclass
|
|
494
|
+
|
|
495
|
+
@inject(prefix="app")
|
|
496
|
+
class AppConfig:
|
|
497
|
+
name: str = "my_app"
|
|
498
|
+
debug: bool = False
|
|
499
|
+
|
|
500
|
+
@dataclass
|
|
501
|
+
class DatabaseSection:
|
|
502
|
+
host: str = "localhost"
|
|
503
|
+
port: int = 3306
|
|
504
|
+
|
|
505
|
+
@inject(prefix="database")
|
|
506
|
+
class DatabaseConfig:
|
|
507
|
+
connection: DatabaseSection = DatabaseSection()
|
|
508
|
+
username: str = "root"
|
|
509
|
+
password: str = ""
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
**settings.toml:**
|
|
513
|
+
|
|
514
|
+
```toml
|
|
515
|
+
[app]
|
|
516
|
+
name = "production_app"
|
|
517
|
+
debug = false
|
|
518
|
+
|
|
519
|
+
[database.connection]
|
|
520
|
+
host = "db.example.com"
|
|
521
|
+
port = 3306
|
|
522
|
+
|
|
523
|
+
[database]
|
|
524
|
+
username = "db_user"
|
|
525
|
+
password = "${DB_PASSWORD}"
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
---
|
|
529
|
+
|
|
530
|
+
### 4. 自定义转换器
|
|
531
|
+
|
|
532
|
+
使用 `field()` 的 `converter` 参数自定义类型转换:
|
|
533
|
+
|
|
534
|
+
```python
|
|
535
|
+
from pynomad.config.auto.autoconfig import inject
|
|
536
|
+
from pynomad.config.auto.field_info import field
|
|
537
|
+
|
|
538
|
+
def to_seconds(value) -> int:
|
|
539
|
+
"""将时间字符串转换为秒数"""
|
|
540
|
+
if isinstance(value, int):
|
|
541
|
+
return value
|
|
542
|
+
if isinstance(value, str):
|
|
543
|
+
if value.endswith("s"):
|
|
544
|
+
return int(value[:-1])
|
|
545
|
+
if value.endswith("m"):
|
|
546
|
+
return int(value[:-1]) * 60
|
|
547
|
+
if value.endswith("h"):
|
|
548
|
+
return int(value[:-1]) * 3600
|
|
549
|
+
return int(value)
|
|
550
|
+
|
|
551
|
+
@inject(prefix="cache")
|
|
552
|
+
class CacheConfig:
|
|
553
|
+
timeout: int = field(default=60, converter=to_seconds)
|
|
554
|
+
ttl: int = field(default=3600, converter=to_seconds)
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
**settings.toml:**
|
|
558
|
+
|
|
559
|
+
```toml
|
|
560
|
+
[cache]
|
|
561
|
+
timeout = "5m" # 自动转换为 300 秒
|
|
562
|
+
ttl = "1h" # 自动转换为 3600 秒
|
|
563
|
+
```
|
|
564
|
+
|
|
565
|
+
---
|
|
566
|
+
|
|
567
|
+
### 5. 别名支持
|
|
568
|
+
|
|
569
|
+
使用 `alias` 参数解决 Python 关键字冲突:
|
|
570
|
+
|
|
571
|
+
```python
|
|
572
|
+
@inject(prefix="app")
|
|
573
|
+
class AppConfig:
|
|
574
|
+
# Python 中 class 是关键字,使用 class_ 作为属性名
|
|
575
|
+
class_: str = field(default="default_class", alias="class")
|
|
576
|
+
# 配置文件中使用 "class",代码中使用 "class_"
|
|
577
|
+
type_: str = field(default="default_type", alias="type")
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
**settings.toml:**
|
|
581
|
+
|
|
582
|
+
```toml
|
|
583
|
+
[app]
|
|
584
|
+
class = "my_class"
|
|
585
|
+
type = "my_type"
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
---
|
|
589
|
+
|
|
590
|
+
### 6. 环境变量替换
|
|
591
|
+
|
|
592
|
+
配置文件支持环境变量替换:
|
|
593
|
+
|
|
594
|
+
**settings.toml:**
|
|
595
|
+
|
|
596
|
+
```toml
|
|
597
|
+
[database]
|
|
598
|
+
host = "localhost"
|
|
599
|
+
port = 3306
|
|
600
|
+
username = "root"
|
|
601
|
+
password = "${DB_PASSWORD}"
|
|
602
|
+
|
|
603
|
+
[cache.redis]
|
|
604
|
+
host = "localhost"
|
|
605
|
+
port = 6379
|
|
606
|
+
password = "${REDIS_PASSWORD}"
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
**设置环境变量:**
|
|
610
|
+
|
|
611
|
+
```bash
|
|
612
|
+
# Linux/Mac
|
|
613
|
+
export DB_PASSWORD="my_db_password"
|
|
614
|
+
export REDIS_PASSWORD="my_redis_password"
|
|
615
|
+
|
|
616
|
+
# Windows
|
|
617
|
+
set DB_PASSWORD=my_db_password
|
|
618
|
+
set REDIS_PASSWORD=my_redis_password
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
---
|
|
622
|
+
|
|
623
|
+
### 7. 配置文件位置
|
|
624
|
+
|
|
625
|
+
`settings.toml` 文件应放在以下位置之一(按优先级顺序):
|
|
626
|
+
|
|
627
|
+
1. **项目根目录**:`./settings.toml`
|
|
628
|
+
2. **配置目录**:`./config/settings.toml`
|
|
629
|
+
3. **用户目录**:`~/.pynomad/settings.toml`
|
|
630
|
+
4. **环境变量指定**:`PYNOMAD_CONFIG_PATH`
|
|
631
|
+
|
|
632
|
+
```bash
|
|
633
|
+
# 通过环境变量指定配置文件路径
|
|
634
|
+
export PYNOMAD_CONFIG_PATH="/path/to/settings.toml"
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
---
|
|
638
|
+
|
|
639
|
+
### 8. 完整示例
|
|
640
|
+
|
|
641
|
+
**settings.toml:**
|
|
642
|
+
|
|
643
|
+
```toml
|
|
644
|
+
[app]
|
|
645
|
+
name = "my_application"
|
|
646
|
+
version = "1.0.0"
|
|
647
|
+
debug = false
|
|
648
|
+
timeout = "5m"
|
|
649
|
+
|
|
650
|
+
[database]
|
|
651
|
+
host = "localhost"
|
|
652
|
+
port = 3306
|
|
653
|
+
username = "root"
|
|
654
|
+
password = "${DB_PASSWORD}"
|
|
655
|
+
pool_size = 10
|
|
656
|
+
|
|
657
|
+
[cache.memory]
|
|
658
|
+
ttl = 600
|
|
659
|
+
maxsize = 1000
|
|
660
|
+
|
|
661
|
+
[cache.redis]
|
|
662
|
+
host = "localhost"
|
|
663
|
+
port = 6379
|
|
664
|
+
db = 0
|
|
665
|
+
password = "${REDIS_PASSWORD}"
|
|
666
|
+
ttl = 1800
|
|
667
|
+
|
|
668
|
+
[paths]
|
|
669
|
+
home_dir = "{home}/my_app"
|
|
670
|
+
workspace_dir = "{workspace}/data"
|
|
671
|
+
cache_dir = "./cache"
|
|
672
|
+
|
|
673
|
+
[servers]
|
|
674
|
+
hosts = ["server1.example.com", "server2.example.com"]
|
|
675
|
+
ports = [8080, 9090]
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
**main.py:**
|
|
679
|
+
|
|
680
|
+
```python
|
|
681
|
+
from pynomad.config.auto.autoconfig import inject, field
|
|
682
|
+
from pynomad.config.auto.field_info import field as config_field
|
|
683
|
+
from pathlib import Path
|
|
684
|
+
|
|
685
|
+
def to_seconds(value) -> int:
|
|
686
|
+
"""将时间字符串转换为秒数"""
|
|
687
|
+
if isinstance(value, int):
|
|
688
|
+
return value
|
|
689
|
+
if isinstance(value, str):
|
|
690
|
+
if value.endswith("m"):
|
|
691
|
+
return int(value[:-1]) * 60
|
|
692
|
+
if value.endswith("h"):
|
|
693
|
+
return int(value[:-1]) * 3600
|
|
694
|
+
return int(value)
|
|
695
|
+
|
|
696
|
+
@inject(prefix="app")
|
|
697
|
+
class AppConfig:
|
|
698
|
+
name: str = "my_app"
|
|
699
|
+
version: str = "0.0.1"
|
|
700
|
+
debug: bool = False
|
|
701
|
+
timeout: int = field(default=60, converter=to_seconds)
|
|
702
|
+
|
|
703
|
+
@inject(prefix="database")
|
|
704
|
+
class DatabaseConfig:
|
|
705
|
+
host: str = "localhost"
|
|
706
|
+
port: int = 3306
|
|
707
|
+
username: str = "root"
|
|
708
|
+
password: str = ""
|
|
709
|
+
pool_size: int = 10
|
|
710
|
+
|
|
711
|
+
@inject(prefix="cache.redis")
|
|
712
|
+
class RedisCacheConfig:
|
|
713
|
+
host: str = "localhost"
|
|
714
|
+
port: int = 6379
|
|
715
|
+
db: int = 0
|
|
716
|
+
password: str = ""
|
|
717
|
+
ttl: int = 3600
|
|
718
|
+
|
|
719
|
+
@inject(prefix="paths")
|
|
720
|
+
class PathConfig:
|
|
721
|
+
home_dir: Path = field(default="{home}/app")
|
|
722
|
+
workspace_dir: Path = field(default="{workspace}/data")
|
|
723
|
+
cache_dir: Path = field(default="./cache")
|
|
724
|
+
|
|
725
|
+
@inject(prefix="servers")
|
|
726
|
+
class ServerConfig:
|
|
727
|
+
hosts: list[str] = []
|
|
728
|
+
ports: list[int] = []
|
|
729
|
+
|
|
730
|
+
# 使用配置
|
|
731
|
+
app_config = AppConfig()
|
|
732
|
+
db_config = DatabaseConfig()
|
|
733
|
+
redis_config = RedisCacheConfig()
|
|
734
|
+
path_config = PathConfig()
|
|
735
|
+
server_config = ServerConfig()
|
|
736
|
+
|
|
737
|
+
print(f"应用名称: {app_config.name}")
|
|
738
|
+
print(f"应用版本: {app_config.version}")
|
|
739
|
+
print(f"调试模式: {app_config.debug}")
|
|
740
|
+
print(f"超时时间: {app_config.timeout} 秒")
|
|
741
|
+
print(f"数据库主机: {db_config.host}")
|
|
742
|
+
print(f"数据库端口: {db_config.port}")
|
|
743
|
+
print(f"Redis 主机: {redis_config.host}")
|
|
744
|
+
print(f"缓存目录: {path_config.cache_dir}")
|
|
745
|
+
print(f"服务器列表: {server_config.hosts}")
|
|
746
|
+
```
|
|
747
|
+
|
|
748
|
+
---
|
|
749
|
+
|
|
750
|
+
### 9. 测试场景
|
|
751
|
+
|
|
752
|
+
#### 9.1 重置单例
|
|
753
|
+
|
|
754
|
+
测试时使用 `reset_singleton()` 重置单例:
|
|
755
|
+
|
|
756
|
+
```python
|
|
757
|
+
from pynomad.config.auto.autoconfig import DatabaseConfig, reset_singleton
|
|
758
|
+
|
|
759
|
+
def test_database_config():
|
|
760
|
+
# 测试前重置单例
|
|
761
|
+
reset_singleton(DatabaseConfig)
|
|
762
|
+
|
|
763
|
+
config = DatabaseConfig()
|
|
764
|
+
assert config.host == "localhost"
|
|
765
|
+
|
|
766
|
+
# 清理
|
|
767
|
+
reset_singleton(DatabaseConfig)
|
|
768
|
+
```
|
|
769
|
+
|
|
770
|
+
#### 9.2 使用配置副本
|
|
771
|
+
|
|
772
|
+
测试时使用配置副本避免影响原始配置:
|
|
773
|
+
|
|
774
|
+
```python
|
|
775
|
+
from pynomad.config.auto.autoconfig import DatabaseConfig, copy_config
|
|
776
|
+
|
|
777
|
+
def test_with_modified_config():
|
|
778
|
+
original_config = DatabaseConfig()
|
|
779
|
+
test_config = copy_config(original_config)
|
|
780
|
+
|
|
781
|
+
# 修改副本用于测试
|
|
782
|
+
test_config.host = "test.host"
|
|
783
|
+
|
|
784
|
+
# 原始配置不受影响
|
|
785
|
+
assert original_config.host != test_config.host
|
|
786
|
+
```
|
|
787
|
+
|
|
788
|
+
---
|
|
789
|
+
|
|
790
|
+
## 缓存系统完整指南
|
|
791
|
+
|
|
792
|
+
### 缓存类型总览
|
|
793
|
+
|
|
794
|
+
pynomad 提供多种缓存实现,适用于不同的使用场景:
|
|
795
|
+
|
|
796
|
+
| 缓存类型 | 装饰器 | 适用场景 | 持久化 | 分布式 |
|
|
797
|
+
|---------|--------|---------|--------|--------|
|
|
798
|
+
| Memory Cache | `@memcached` | 临时数据、高频访问 | ❌ | ❌ |
|
|
799
|
+
| Pickle Cache | `@pickled` | 本地持久化、离线场景 | ✅ | ❌ |
|
|
800
|
+
| Redis Cache | `@rediscached` | 分布式缓存、多实例共享 | ✅ | ✅ |
|
|
801
|
+
| DataFrame Memory | `@df_memcached` | DataFrame 临时缓存 | ❌ | ❌ |
|
|
802
|
+
| DataFrame Pickle | `@df_pickled` | DataFrame 持久化 | ✅ | ❌ |
|
|
803
|
+
| DataFrame Redis | `@df_rediscached` | DataFrame 分布式 | ✅ | ✅ |
|
|
804
|
+
| DataFrame SQL | `@sqlcached` | DataFrame SQL 数据库缓存 | ✅ | ✅ |
|
|
805
|
+
| 多级缓存 | `@multi_level_cached` | 性能与可靠性平衡 | 混合 | 混合 |
|
|
806
|
+
|
|
807
|
+
### 1. Memory Cache (内存缓存)
|
|
808
|
+
|
|
809
|
+
#### 基本用法
|
|
810
|
+
|
|
811
|
+
```python
|
|
812
|
+
from pynomad import memcached
|
|
813
|
+
|
|
814
|
+
# 无参数装饰(使用默认配置)
|
|
815
|
+
@memcached
|
|
816
|
+
def expensive_calc(x: int, y: int) -> int:
|
|
817
|
+
print("执行计算...")
|
|
818
|
+
return x + y
|
|
819
|
+
```
|
|
820
|
+
|
|
821
|
+
#### 带参数配置
|
|
822
|
+
|
|
823
|
+
```python
|
|
824
|
+
from pynomad import memcached
|
|
825
|
+
|
|
826
|
+
@memcached(
|
|
827
|
+
name="my_cache", # 缓存名称
|
|
828
|
+
ttl=60, # 缓存存活时间(秒),0 表示永不过期
|
|
829
|
+
maxsize=100, # 最大缓存数量
|
|
830
|
+
eviction_policy="lru" # 淘汰策略: lru/lfu/fifo/none
|
|
831
|
+
)
|
|
832
|
+
def fetch_user(user_id: int) -> dict:
|
|
833
|
+
print(f"从数据库查询用户 {user_id}...")
|
|
834
|
+
return {"id": user_id, "name": f"User{user_id}"}
|
|
835
|
+
```
|
|
836
|
+
|
|
837
|
+
#### 淘汰策略说明
|
|
838
|
+
|
|
839
|
+
- **LRU (Least Recently Used)**: 淘汰最久未使用的数据
|
|
840
|
+
- **LFU (Least Frequently Used)**: 淘汰使用频率最低的数据
|
|
841
|
+
- **FIFO (First In First Out)**: 淘汰最早缓存的数据
|
|
842
|
+
- **None**: 不淘汰,缓存满时拒绝写入
|
|
843
|
+
|
|
844
|
+
#### 使用场景
|
|
845
|
+
|
|
846
|
+
- 高频访问的临时数据
|
|
847
|
+
- API 响应缓存
|
|
848
|
+
- 计算密集型操作结果缓存
|
|
849
|
+
|
|
850
|
+
---
|
|
851
|
+
|
|
852
|
+
### 2. Pickle Cache (文件缓存)
|
|
853
|
+
|
|
854
|
+
#### 基本用法
|
|
855
|
+
|
|
856
|
+
```python
|
|
857
|
+
from pynomad import pickled
|
|
858
|
+
|
|
859
|
+
@pickled(ttl=3600)
|
|
860
|
+
def load_config() -> dict:
|
|
861
|
+
print("加载配置文件...")
|
|
862
|
+
return {"debug": True, "timeout": 30}
|
|
863
|
+
```
|
|
864
|
+
|
|
865
|
+
#### 带参数配置
|
|
866
|
+
|
|
867
|
+
```python
|
|
868
|
+
from pynomad import pickled
|
|
869
|
+
|
|
870
|
+
@pickled(
|
|
871
|
+
name="app_cache",
|
|
872
|
+
ttl=3600, # 1小时过期
|
|
873
|
+
maxsize=500,
|
|
874
|
+
cache_dir="./cache", # 缓存文件存储目录
|
|
875
|
+
enable_encryption=True, # 启用加密
|
|
876
|
+
salt="my_app_v1", # 加密盐值
|
|
877
|
+
enable_background_cleanup=True, # 启用后台清理
|
|
878
|
+
cleanup_interval=300 # 每5分钟清理一次
|
|
879
|
+
)
|
|
880
|
+
def process_data(data_id: str) -> dict:
|
|
881
|
+
print(f"处理数据 {data_id}...")
|
|
882
|
+
return {"id": data_id, "processed": True}
|
|
883
|
+
```
|
|
884
|
+
|
|
885
|
+
#### 使用场景
|
|
886
|
+
|
|
887
|
+
- 需要持久化的缓存数据
|
|
888
|
+
- 离线场景的数据访问
|
|
889
|
+
- 跨进程共享(通过共享目录)
|
|
890
|
+
- 数据迁移和备份
|
|
891
|
+
- 数据分析和审计
|
|
892
|
+
|
|
893
|
+
#### 加密说明
|
|
894
|
+
|
|
895
|
+
- 使用机器派生密钥进行加密
|
|
896
|
+
- `salt` 参数用于生成密钥,建议使用项目版本号
|
|
897
|
+
- 不同机器使用不同的加密密钥,增强安全性
|
|
898
|
+
|
|
899
|
+
---
|
|
900
|
+
|
|
901
|
+
### 3. Redis Cache (Redis 分布式缓存)
|
|
902
|
+
|
|
903
|
+
#### 基本用法
|
|
904
|
+
|
|
905
|
+
```python
|
|
906
|
+
from pynomad import rediscached
|
|
907
|
+
|
|
908
|
+
@rediscached(ttl=300)
|
|
909
|
+
def get_product(product_id: str) -> dict:
|
|
910
|
+
print(f"从数据库查询产品 {product_id}...")
|
|
911
|
+
return {"id": product_id, "price": 99.99}
|
|
912
|
+
```
|
|
913
|
+
|
|
914
|
+
#### 带参数配置
|
|
915
|
+
|
|
916
|
+
```python
|
|
917
|
+
from pynomad import rediscached
|
|
918
|
+
|
|
919
|
+
@rediscached(
|
|
920
|
+
name="product_cache",
|
|
921
|
+
ttl=1800, # 30分钟过期
|
|
922
|
+
host="localhost", # Redis 主机
|
|
923
|
+
port=6379, # Redis 端口
|
|
924
|
+
db=0, # 数据库编号
|
|
925
|
+
username="default", # 用户名
|
|
926
|
+
password="password", # 密码
|
|
927
|
+
enable_encryption=True, # 启用加密
|
|
928
|
+
salt="redis_cache_v1" # 加密盐值
|
|
929
|
+
)
|
|
930
|
+
def get_order(order_id: str) -> dict:
|
|
931
|
+
print(f"从数据库查询订单 {order_id}...")
|
|
932
|
+
return {"id": order_id, "status": "completed"}
|
|
933
|
+
```
|
|
934
|
+
|
|
935
|
+
#### 使用自定义 Redis 客户端
|
|
936
|
+
|
|
937
|
+
```python
|
|
938
|
+
from redis import Redis, ConnectionPool
|
|
939
|
+
from pynomad import rediscached
|
|
940
|
+
|
|
941
|
+
# 创建连接池
|
|
942
|
+
pool = ConnectionPool(
|
|
943
|
+
host="localhost",
|
|
944
|
+
port=6379,
|
|
945
|
+
db=0,
|
|
946
|
+
password="password",
|
|
947
|
+
max_connections=10
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
# 创建 Redis 客户端
|
|
951
|
+
redis_client = Redis(connection_pool=pool)
|
|
952
|
+
|
|
953
|
+
@rediscached(
|
|
954
|
+
name="custom_redis",
|
|
955
|
+
redis_client=redis_client, # 使用自定义客户端
|
|
956
|
+
ttl=600
|
|
957
|
+
)
|
|
958
|
+
def get_user(user_id: int) -> dict:
|
|
959
|
+
print(f"查询用户 {user_id}...")
|
|
960
|
+
return {"id": user_id, "name": f"User{user_id}"}
|
|
961
|
+
```
|
|
962
|
+
|
|
963
|
+
#### 使用场景
|
|
964
|
+
|
|
965
|
+
- 分布式系统缓存
|
|
966
|
+
- 多实例共享缓存
|
|
967
|
+
- 高并发场景
|
|
968
|
+
- 实时数据同步
|
|
969
|
+
|
|
970
|
+
---
|
|
971
|
+
|
|
972
|
+
### 4. DataFrame 缓存
|
|
973
|
+
|
|
974
|
+
DataFrame 缓存采用三阶段处理模型:**GET → PUT → EXTRACT**
|
|
975
|
+
|
|
976
|
+
**重要规范**:所有 DataFrame 缓存装饰器装饰的函数必须显式声明返回类型注解。
|
|
977
|
+
|
|
978
|
+
#### 4.1 DataFrame 内存缓存 (`@df_memcached`)
|
|
979
|
+
|
|
980
|
+
```python
|
|
981
|
+
from pynomad import df_memcached
|
|
982
|
+
from pandas import DataFrame
|
|
983
|
+
import pandas as pd
|
|
984
|
+
|
|
985
|
+
# 无参数装饰(使用配置默认值)
|
|
986
|
+
@df_memcached
|
|
987
|
+
def load_stock_data(symbol: str) -> DataFrame:
|
|
988
|
+
"""必须声明返回类型为 DataFrame"""
|
|
989
|
+
print(f"加载股票数据: {symbol}")
|
|
990
|
+
return DataFrame({
|
|
991
|
+
"symbol": [symbol] * 5,
|
|
992
|
+
"date": pd.date_range("2024-01-01", periods=5),
|
|
993
|
+
"price": [100.0, 101.0, 102.0, 101.5, 103.0]
|
|
994
|
+
})
|
|
995
|
+
|
|
996
|
+
# 带参数装饰
|
|
997
|
+
@df_memcached(
|
|
998
|
+
name="stock_cache",
|
|
999
|
+
ttl=600, # 10分钟过期
|
|
1000
|
+
maxsize=100, # 最多缓存100个symbol
|
|
1001
|
+
eviction_policy="lru" # LRU淘汰策略
|
|
1002
|
+
)
|
|
1003
|
+
def load_market_data(symbol: str, period: str = "1d") -> DataFrame:
|
|
1004
|
+
print(f"加载市场数据: {symbol}, 周期: {period}")
|
|
1005
|
+
return DataFrame()
|
|
1006
|
+
```
|
|
1007
|
+
|
|
1008
|
+
#### 4.2 DataFrame 文件缓存 (`@df_pickled`)
|
|
1009
|
+
|
|
1010
|
+
```python
|
|
1011
|
+
from pynomad import df_pickled
|
|
1012
|
+
from pandas import DataFrame
|
|
1013
|
+
|
|
1014
|
+
# 基本用法
|
|
1015
|
+
@df_pickled(ttl=3600)
|
|
1016
|
+
def load_historical_data(symbol: str) -> DataFrame:
|
|
1017
|
+
print(f"加载历史数据: {symbol}")
|
|
1018
|
+
# 从 API 获取数据
|
|
1019
|
+
return DataFrame()
|
|
1020
|
+
|
|
1021
|
+
# 完整配置
|
|
1022
|
+
@df_pickled(
|
|
1023
|
+
name="historical_cache",
|
|
1024
|
+
cache_dir="./df_cache", # 缓存文件存储目录
|
|
1025
|
+
ttl=86400, # 24小时过期
|
|
1026
|
+
maxsize=500,
|
|
1027
|
+
enable_encryption=True, # 启用加密
|
|
1028
|
+
salt="finance_data_v1", # 加密盐值
|
|
1029
|
+
enable_background_cleanup=True, # 启用后台清理
|
|
1030
|
+
cleanup_interval=300 # 每5分钟清理一次
|
|
1031
|
+
)
|
|
1032
|
+
def load_daily_data(date: str) -> DataFrame:
|
|
1033
|
+
print(f"加载日线数据: {date}")
|
|
1034
|
+
return DataFrame()
|
|
1035
|
+
```
|
|
1036
|
+
|
|
1037
|
+
#### 4.3 DataFrame Redis 缓存 (`@df_rediscached`)
|
|
1038
|
+
|
|
1039
|
+
```python
|
|
1040
|
+
from pynomad import df_rediscached
|
|
1041
|
+
from pandas import DataFrame
|
|
1042
|
+
|
|
1043
|
+
# 基本用法
|
|
1044
|
+
@df_rediscached(ttl=1800)
|
|
1045
|
+
def load_realtime_data(symbol: str) -> DataFrame:
|
|
1046
|
+
print(f"加载实时数据: {symbol}")
|
|
1047
|
+
# 从实时数据源获取
|
|
1048
|
+
return DataFrame()
|
|
1049
|
+
|
|
1050
|
+
# 完整配置
|
|
1051
|
+
@df_rediscached(
|
|
1052
|
+
name="realtime_cache",
|
|
1053
|
+
host="localhost",
|
|
1054
|
+
port=6379,
|
|
1055
|
+
db=0,
|
|
1056
|
+
username="default",
|
|
1057
|
+
password="password",
|
|
1058
|
+
ttl=300, # 5分钟过期
|
|
1059
|
+
maxsize=1000,
|
|
1060
|
+
enable_encryption=True, # 启用加密
|
|
1061
|
+
salt="realtime_v1" # 加密盐值
|
|
1062
|
+
)
|
|
1063
|
+
def load_tick_data(symbol: str) -> DataFrame:
|
|
1064
|
+
print(f"加载Tick数据: {symbol}")
|
|
1065
|
+
return DataFrame()
|
|
1066
|
+
```
|
|
1067
|
+
|
|
1068
|
+
#### 5.4 使用自定义 Redis 客户端
|
|
1069
|
+
|
|
1070
|
+
```python
|
|
1071
|
+
from redis import Redis, ConnectionPool
|
|
1072
|
+
from pynomad import df_rediscached
|
|
1073
|
+
|
|
1074
|
+
# 创建连接池
|
|
1075
|
+
pool = ConnectionPool(
|
|
1076
|
+
host="localhost",
|
|
1077
|
+
port=6379,
|
|
1078
|
+
db=0,
|
|
1079
|
+
password="password",
|
|
1080
|
+
max_connections=10
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
# 创建 Redis 客户端
|
|
1084
|
+
redis_client = Redis(connection_pool=pool)
|
|
1085
|
+
|
|
1086
|
+
@df_rediscached(
|
|
1087
|
+
name="custom_redis_cache",
|
|
1088
|
+
redis_client=redis_client, # 使用自定义客户端
|
|
1089
|
+
ttl=600
|
|
1090
|
+
)
|
|
1091
|
+
def load_custom_data(symbol: str) -> DataFrame:
|
|
1092
|
+
print(f"加载自定义数据: {symbol}")
|
|
1093
|
+
return DataFrame()
|
|
1094
|
+
```
|
|
1095
|
+
|
|
1096
|
+
#### 4.4 DataFrame SQL 缓存 (`@sqlcached`)
|
|
1097
|
+
|
|
1098
|
+
`@sqlcached` 是专门用于 DataFrame 的 SQL 数据库缓存装饰器,支持 SQLite、MySQL、PostgreSQL 等多种数据库。
|
|
1099
|
+
|
|
1100
|
+
##### SQLite 缓存
|
|
1101
|
+
|
|
1102
|
+
```python
|
|
1103
|
+
from pynomad import sqlcached
|
|
1104
|
+
from pynomad.result import Result
|
|
1105
|
+
from pandas import DataFrame
|
|
1106
|
+
|
|
1107
|
+
@sqlcached(
|
|
1108
|
+
db_type="sqlite",
|
|
1109
|
+
db_url="sqlite:///cache.db", # SQLite 数据库文件
|
|
1110
|
+
ttl=3600
|
|
1111
|
+
)
|
|
1112
|
+
def fetch_data(symbol: str) -> Result[DataFrame]:
|
|
1113
|
+
"""必须声明返回类型为 Result[DataFrame]"""
|
|
1114
|
+
print(f"从 API 获取 {symbol} 数据...")
|
|
1115
|
+
df = DataFrame({
|
|
1116
|
+
"symbol": [symbol],
|
|
1117
|
+
"price": [100.0],
|
|
1118
|
+
"timestamp": ["2024-01-01"]
|
|
1119
|
+
})
|
|
1120
|
+
return Result.success(df)
|
|
1121
|
+
```
|
|
1122
|
+
|
|
1123
|
+
##### MySQL 缓存
|
|
1124
|
+
|
|
1125
|
+
```python
|
|
1126
|
+
from pynomad import sqlcached
|
|
1127
|
+
from pynomad.result import Result
|
|
1128
|
+
from pandas import DataFrame
|
|
1129
|
+
|
|
1130
|
+
@sqlcached(
|
|
1131
|
+
db_type="mysql",
|
|
1132
|
+
host="localhost",
|
|
1133
|
+
port=3306,
|
|
1134
|
+
db_name="cache_db",
|
|
1135
|
+
username="root",
|
|
1136
|
+
password="password",
|
|
1137
|
+
pool_size=5, # 连接池大小
|
|
1138
|
+
max_overflow=10, # 最大溢出连接数
|
|
1139
|
+
pool_timeout=30, # 连接超时(秒)
|
|
1140
|
+
pool_recycle=3600, # 连接回收时间(秒)
|
|
1141
|
+
pool_pre_ping=True, # 连接前测试
|
|
1142
|
+
max_retry_attempts=3, # 最大重试次数
|
|
1143
|
+
retry_delay=1.0, # 重试延迟(秒)
|
|
1144
|
+
ttl=7200
|
|
1145
|
+
)
|
|
1146
|
+
def fetch_market_data(symbol: str) -> Result[DataFrame]:
|
|
1147
|
+
print(f"获取 {symbol} 市场数据...")
|
|
1148
|
+
df = DataFrame({
|
|
1149
|
+
"symbol": [symbol],
|
|
1150
|
+
"open": [100.0],
|
|
1151
|
+
"high": [105.0],
|
|
1152
|
+
"low": [99.0],
|
|
1153
|
+
"close": [104.0]
|
|
1154
|
+
})
|
|
1155
|
+
return Result.success(df)
|
|
1156
|
+
```
|
|
1157
|
+
|
|
1158
|
+
##### PostgreSQL 缓存
|
|
1159
|
+
|
|
1160
|
+
```python
|
|
1161
|
+
from pynomad import sqlcached
|
|
1162
|
+
from pynomad.result import Result
|
|
1163
|
+
from pandas import DataFrame
|
|
1164
|
+
|
|
1165
|
+
@sqlcached(
|
|
1166
|
+
db_type="postgresql",
|
|
1167
|
+
host="localhost",
|
|
1168
|
+
port=5432,
|
|
1169
|
+
db_name="cache_db",
|
|
1170
|
+
username="postgres",
|
|
1171
|
+
password="password",
|
|
1172
|
+
ttl=3600
|
|
1173
|
+
)
|
|
1174
|
+
def fetch_analytics_data(date: str) -> Result[DataFrame]:
|
|
1175
|
+
print(f"获取 {date} 分析数据...")
|
|
1176
|
+
df = DataFrame({
|
|
1177
|
+
"date": [date],
|
|
1178
|
+
"views": [1000],
|
|
1179
|
+
"clicks": [100]
|
|
1180
|
+
})
|
|
1181
|
+
return Result.success(df)
|
|
1182
|
+
```
|
|
1183
|
+
|
|
1184
|
+
##### 完整配置参数
|
|
1185
|
+
|
|
1186
|
+
```python
|
|
1187
|
+
from pynomad import sqlcached
|
|
1188
|
+
|
|
1189
|
+
@sqlcached(
|
|
1190
|
+
name="sql_cache", # 缓存名称
|
|
1191
|
+
second_name="data", # 二级名称
|
|
1192
|
+
maxsize=128, # 最大缓存数量
|
|
1193
|
+
eviction_policy="lru", # 淘汰策略: lru/lfu/fifo/none
|
|
1194
|
+
ttl=3600, # 缓存存活时间(秒)
|
|
1195
|
+
value_loader=custom_loader, # 自定义 ValueLoader
|
|
1196
|
+
|
|
1197
|
+
# 数据库配置
|
|
1198
|
+
db_type="sqlite", # 数据库类型: sqlite/mysql/postgresql
|
|
1199
|
+
db_url="sqlite:///cache.db", # 数据库连接 URL(与 host/port/db_name 互斥)
|
|
1200
|
+
|
|
1201
|
+
# 连接参数(与 db_url 互斥)
|
|
1202
|
+
host="localhost", # 数据库主机
|
|
1203
|
+
port=3306, # 数据库端口
|
|
1204
|
+
db_name="cache_db", # 数据库名称
|
|
1205
|
+
username="root", # 用户名
|
|
1206
|
+
password="password", # 密码
|
|
1207
|
+
|
|
1208
|
+
# 连接池配置
|
|
1209
|
+
pool_size=5, # 连接池大小
|
|
1210
|
+
max_overflow=10, # 最大溢出连接数
|
|
1211
|
+
pool_timeout=30, # 连接超时(秒)
|
|
1212
|
+
pool_recycle=3600, # 连接回收时间(秒)
|
|
1213
|
+
pool_pre_ping=True, # 连接前测试
|
|
1214
|
+
|
|
1215
|
+
# 重试配置
|
|
1216
|
+
max_retry_attempts=3, # 最大重试次数
|
|
1217
|
+
retry_delay=1.0, # 重试延迟(秒)
|
|
1218
|
+
)
|
|
1219
|
+
def load_data(key: str) -> Result[DataFrame]:
|
|
1220
|
+
return Result.success(DataFrame())
|
|
1221
|
+
```
|
|
1222
|
+
|
|
1223
|
+
##### 使用场景
|
|
1224
|
+
|
|
1225
|
+
- **结构化数据持久化**: 适合需要结构化存储的 DataFrame 数据
|
|
1226
|
+
- **需要复杂查询的场景**: 可以通过 SQL 查询筛选缓存数据
|
|
1227
|
+
- **数据分析和报表**: 支持大量数据的存储和查询
|
|
1228
|
+
- **大数据量缓存**: 相比文件缓存,SQL 数据库更擅长处理大数据量
|
|
1229
|
+
|
|
1230
|
+
##### 重要提示
|
|
1231
|
+
|
|
1232
|
+
**必须声明返回类型为 Result[DataFrame]**:
|
|
1233
|
+
|
|
1234
|
+
```python
|
|
1235
|
+
# ✅ 正确
|
|
1236
|
+
@sqlcached(db_type="sqlite", db_url="sqlite:///cache.db", ttl=3600)
|
|
1237
|
+
def fetch_data(symbol: str) -> Result[DataFrame]:
|
|
1238
|
+
df = DataFrame({"symbol": [symbol]})
|
|
1239
|
+
return Result.success(df)
|
|
1240
|
+
|
|
1241
|
+
# ❌ 错误(缺少类型注解)
|
|
1242
|
+
@sqlcached(db_type="sqlite", db_url="sqlite:///cache.db", ttl=3600)
|
|
1243
|
+
def fetch_data(symbol: str):
|
|
1244
|
+
df = DataFrame({"symbol": [symbol]})
|
|
1245
|
+
return Result.success(df)
|
|
1246
|
+
|
|
1247
|
+
# ❌ 错误(返回类型不对)
|
|
1248
|
+
@sqlcached(db_type="sqlite", db_url="sqlite:///cache.db", ttl=3600)
|
|
1249
|
+
def fetch_data(symbol: str) -> DataFrame:
|
|
1250
|
+
df = DataFrame({"symbol": [symbol]})
|
|
1251
|
+
return df
|
|
1252
|
+
```
|
|
1253
|
+
|
|
1254
|
+
---
|
|
1255
|
+
|
|
1256
|
+
#### 4.5 三阶段处理模型详解
|
|
1257
|
+
|
|
1258
|
+
DataFrame 缓存采用独特的三阶段处理模型,实现灵活的数据处理逻辑:
|
|
1259
|
+
|
|
1260
|
+
```
|
|
1261
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
1262
|
+
│ DataFrame 缓存流程 │
|
|
1263
|
+
├─────────────────────────────────────────────────────────────┤
|
|
1264
|
+
│ │
|
|
1265
|
+
│ 1. GET 阶段(缓存提取) │
|
|
1266
|
+
│ ├─ 从缓存读取 DataFrame │
|
|
1267
|
+
│ ├─ 比较当前参数与缓存参数 │
|
|
1268
|
+
│ ├─ 判断缓存是否可用 │
|
|
1269
|
+
│ └─ 返回缓存数据 或 刷新参数 │
|
|
1270
|
+
│ │
|
|
1271
|
+
│ 2. PUT 阶段(数据合并) │
|
|
1272
|
+
│ ├─ 获取新数据(函数执行) │
|
|
1273
|
+
│ ├─ 合并缓存数据和新数据 │
|
|
1274
|
+
│ └─ 缓存合并后的数据 │
|
|
1275
|
+
│ │
|
|
1276
|
+
│ 3. EXTRACT 阶段(数据提取) │
|
|
1277
|
+
│ ├─ 从合并后的数据中提取返回数据 │
|
|
1278
|
+
│ └─ 支持过滤、转换、切片等操作 │
|
|
1279
|
+
│ │
|
|
1280
|
+
└─────────────────────────────────────────────────────────────┘
|
|
1281
|
+
```
|
|
1282
|
+
|
|
1283
|
+
#### 4.6 自定义 ValueLoader
|
|
1284
|
+
|
|
1285
|
+
`ValueLoader` 是实现三阶段处理逻辑的核心接口。
|
|
1286
|
+
|
|
1287
|
+
##### 基本用法(使用默认加载器)
|
|
1288
|
+
|
|
1289
|
+
```python
|
|
1290
|
+
from pynomad import df_memcached
|
|
1291
|
+
from pandas import DataFrame
|
|
1292
|
+
|
|
1293
|
+
# 使用默认的 DefaultDataFrameValueLoader
|
|
1294
|
+
# 默认行为:参数相同返回缓存,参数不同刷新数据
|
|
1295
|
+
@df_memcached(ttl=600)
|
|
1296
|
+
def load_data(symbol: str, start_date: str) -> DataFrame:
|
|
1297
|
+
print(f"加载数据: {symbol}, {start_date}")
|
|
1298
|
+
return DataFrame()
|
|
1299
|
+
```
|
|
1300
|
+
|
|
1301
|
+
##### 自定义 ValueLoader - 增量更新场景
|
|
1302
|
+
|
|
1303
|
+
```python
|
|
1304
|
+
from pynomad.cache.dataframe.types import DataFrameValueLoader
|
|
1305
|
+
from pynomad import Result
|
|
1306
|
+
from pandas import DataFrame
|
|
1307
|
+
from datetime import datetime
|
|
1308
|
+
|
|
1309
|
+
class IncrementalValueLoader(DataFrameValueLoader):
|
|
1310
|
+
"""增量更新 ValueLoader"""
|
|
1311
|
+
|
|
1312
|
+
def get(
|
|
1313
|
+
self,
|
|
1314
|
+
cached_df: DataFrame,
|
|
1315
|
+
extra_params: dict,
|
|
1316
|
+
args: tuple,
|
|
1317
|
+
kwargs: dict
|
|
1318
|
+
) -> Result[DataFrame | dict[str, Any]]:
|
|
1319
|
+
"""GET 阶段:检查缓存是否需要更新"""
|
|
1320
|
+
# 获取缓存的参数
|
|
1321
|
+
last_args = tuple(extra_params.get("args", ()))
|
|
1322
|
+
last_kwargs = extra_params.get("kwargs", {})
|
|
1323
|
+
|
|
1324
|
+
# 参数相同,使用缓存
|
|
1325
|
+
if last_args == args and last_kwargs == kwargs:
|
|
1326
|
+
return Result.success(data=cached_df)
|
|
1327
|
+
|
|
1328
|
+
# 参数不同,返回刷新参数
|
|
1329
|
+
return Result.client_error(
|
|
1330
|
+
exception=Exception("参数变化,需要刷新"),
|
|
1331
|
+
data={"args": args, "kwargs": kwargs}
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
def put(
|
|
1335
|
+
self,
|
|
1336
|
+
cached_df: DataFrame,
|
|
1337
|
+
new_df: DataFrame,
|
|
1338
|
+
extra_params: dict,
|
|
1339
|
+
args: tuple,
|
|
1340
|
+
kwargs: dict
|
|
1341
|
+
) -> Result[DataFrame]:
|
|
1342
|
+
"""PUT 阶段:合并新旧数据(增量更新)"""
|
|
1343
|
+
# 如果缓存为空,直接返回新数据
|
|
1344
|
+
if cached_df.empty:
|
|
1345
|
+
return Result.success(data=new_df)
|
|
1346
|
+
|
|
1347
|
+
# 合并数据(去重)
|
|
1348
|
+
merged_df = pd.concat([cached_df, new_df]).drop_duplicates()
|
|
1349
|
+
return Result.success(data=merged_df)
|
|
1350
|
+
|
|
1351
|
+
def extract(
|
|
1352
|
+
self,
|
|
1353
|
+
merged_df: DataFrame,
|
|
1354
|
+
extra_params: dict,
|
|
1355
|
+
args: tuple,
|
|
1356
|
+
kwargs: dict
|
|
1357
|
+
) -> Result[DataFrame]:
|
|
1358
|
+
"""EXTRACT 阶段:提取返回数据"""
|
|
1359
|
+
# 直接返回合并后的数据
|
|
1360
|
+
return Result.success(data=merged_df)
|
|
1361
|
+
|
|
1362
|
+
# 使用自定义 ValueLoader
|
|
1363
|
+
@df_memcached(value_loader=IncrementalValueLoader(), ttl=600)
|
|
1364
|
+
def load_incremental_data(symbol: str) -> DataFrame:
|
|
1365
|
+
print(f"加载增量数据: {symbol}")
|
|
1366
|
+
return DataFrame()
|
|
1367
|
+
```
|
|
1368
|
+
|
|
1369
|
+
##### 自定义 ValueLoader - 数据过滤场景
|
|
1370
|
+
|
|
1371
|
+
```python
|
|
1372
|
+
class FilteredValueLoader(DataFrameValueLoader):
|
|
1373
|
+
"""支持参数过滤的 ValueLoader"""
|
|
1374
|
+
|
|
1375
|
+
def get(
|
|
1376
|
+
self,
|
|
1377
|
+
cached_df: DataFrame,
|
|
1378
|
+
extra_params: dict,
|
|
1379
|
+
args: tuple,
|
|
1380
|
+
kwargs: dict
|
|
1381
|
+
) -> Result[DataFrame | dict[str, Any]]:
|
|
1382
|
+
"""GET 阶段:直接返回缓存,过滤在 EXTRACT 阶段进行"""
|
|
1383
|
+
if not cached_df.empty:
|
|
1384
|
+
return Result.success(data=cached_df)
|
|
1385
|
+
|
|
1386
|
+
# 缓存为空,需要刷新
|
|
1387
|
+
return Result.client_error(
|
|
1388
|
+
exception=Exception("缓存为空"),
|
|
1389
|
+
data={"args": args, "kwargs": kwargs}
|
|
1390
|
+
)
|
|
1391
|
+
|
|
1392
|
+
def put(
|
|
1393
|
+
self,
|
|
1394
|
+
cached_df: DataFrame,
|
|
1395
|
+
new_df: DataFrame,
|
|
1396
|
+
extra_params: dict,
|
|
1397
|
+
args: tuple,
|
|
1398
|
+
kwargs: dict
|
|
1399
|
+
) -> Result[DataFrame]:
|
|
1400
|
+
"""PUT 阶段:直接缓存新数据(不合并)"""
|
|
1401
|
+
return Result.success(data=new_df)
|
|
1402
|
+
|
|
1403
|
+
def extract(
|
|
1404
|
+
self,
|
|
1405
|
+
merged_df: DataFrame,
|
|
1406
|
+
extra_params: dict,
|
|
1407
|
+
args: tuple,
|
|
1408
|
+
kwargs: dict
|
|
1409
|
+
) -> Result[DataFrame]:
|
|
1410
|
+
"""EXTRACT 阶段:根据参数过滤数据"""
|
|
1411
|
+
# 从 kwargs 中获取过滤条件
|
|
1412
|
+
symbol = kwargs.get("symbol")
|
|
1413
|
+
start_date = kwargs.get("start_date")
|
|
1414
|
+
end_date = kwargs.get("end_date")
|
|
1415
|
+
|
|
1416
|
+
# 应用过滤条件
|
|
1417
|
+
filtered_df = merged_df
|
|
1418
|
+
if symbol:
|
|
1419
|
+
filtered_df = filtered_df[filtered_df["symbol"] == symbol]
|
|
1420
|
+
if start_date:
|
|
1421
|
+
filtered_df = filtered_df[filtered_df["date"] >= start_date]
|
|
1422
|
+
if end_date:
|
|
1423
|
+
filtered_df = filtered_df[filtered_df["date"] <= end_date]
|
|
1424
|
+
|
|
1425
|
+
return Result.success(data=filtered_df)
|
|
1426
|
+
|
|
1427
|
+
# 使用过滤 ValueLoader
|
|
1428
|
+
@df_memcached(value_loader=FilteredValueLoader(), ttl=3600)
|
|
1429
|
+
def load_and_filter_data(
|
|
1430
|
+
symbol: str,
|
|
1431
|
+
start_date: str,
|
|
1432
|
+
end_date: str
|
|
1433
|
+
) -> DataFrame:
|
|
1434
|
+
print(f"加载数据: {symbol}, {start_date} ~ {end_date}")
|
|
1435
|
+
# 从数据库加载全量数据
|
|
1436
|
+
return DataFrame()
|
|
1437
|
+
```
|
|
1438
|
+
|
|
1439
|
+
##### 自定义 ValueLoader - 时间窗口缓存
|
|
1440
|
+
|
|
1441
|
+
```python
|
|
1442
|
+
class TimeWindowValueLoader(DataFrameValueLoader):
|
|
1443
|
+
"""时间窗口缓存 ValueLoader"""
|
|
1444
|
+
|
|
1445
|
+
def get(
|
|
1446
|
+
self,
|
|
1447
|
+
cached_df: DataFrame,
|
|
1448
|
+
extra_params: dict,
|
|
1449
|
+
args: tuple,
|
|
1450
|
+
kwargs: dict
|
|
1451
|
+
) -> Result[DataFrame | dict[str, Any]]:
|
|
1452
|
+
"""GET 阶段:检查缓存是否覆盖请求的时间范围"""
|
|
1453
|
+
if cached_df.empty:
|
|
1454
|
+
return Result.client_error(
|
|
1455
|
+
exception=Exception("缓存为空"),
|
|
1456
|
+
data={"args": args, "kwargs": kwargs}
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
# 获取请求的时间范围
|
|
1460
|
+
start_date = kwargs.get("start_date")
|
|
1461
|
+
end_date = kwargs.get("end_date")
|
|
1462
|
+
|
|
1463
|
+
# 检查缓存是否覆盖请求范围
|
|
1464
|
+
cache_start = cached_df["date"].min()
|
|
1465
|
+
cache_end = cached_df["date"].max()
|
|
1466
|
+
|
|
1467
|
+
if start_date and cache_start > pd.Timestamp(start_date):
|
|
1468
|
+
return Result.client_error(
|
|
1469
|
+
exception=Exception(f"缓存起始时间 {cache_start} 晚于请求时间 {start_date}"),
|
|
1470
|
+
data={"args": args, "kwargs": kwargs}
|
|
1471
|
+
)
|
|
1472
|
+
|
|
1473
|
+
if end_date and cache_end < pd.Timestamp(end_date):
|
|
1474
|
+
return Result.client_error(
|
|
1475
|
+
exception=Exception(f"缓存结束时间 {cache_end} 早于请求时间 {end_date}"),
|
|
1476
|
+
data={"args": args, "kwargs": kwargs}
|
|
1477
|
+
)
|
|
1478
|
+
|
|
1479
|
+
# 缓存覆盖请求范围
|
|
1480
|
+
return Result.success(data=cached_df)
|
|
1481
|
+
|
|
1482
|
+
def put(
|
|
1483
|
+
self,
|
|
1484
|
+
cached_df: DataFrame,
|
|
1485
|
+
new_df: DataFrame,
|
|
1486
|
+
extra_params: dict,
|
|
1487
|
+
args: tuple,
|
|
1488
|
+
kwargs: dict
|
|
1489
|
+
) -> Result[DataFrame]:
|
|
1490
|
+
"""PUT 阶段:扩展缓存的时间范围"""
|
|
1491
|
+
# 合并数据并去重
|
|
1492
|
+
merged_df = pd.concat([cached_df, new_df]).drop_duplicates()
|
|
1493
|
+
return Result.success(data=merged_df)
|
|
1494
|
+
|
|
1495
|
+
def extract(
|
|
1496
|
+
self,
|
|
1497
|
+
merged_df: DataFrame,
|
|
1498
|
+
extra_params: dict,
|
|
1499
|
+
args: tuple,
|
|
1500
|
+
kwargs: dict
|
|
1501
|
+
) -> Result[DataFrame]:
|
|
1502
|
+
"""EXTRACT 阶段:返回请求时间范围内的数据"""
|
|
1503
|
+
start_date = kwargs.get("start_date")
|
|
1504
|
+
end_date = kwargs.get("end_date")
|
|
1505
|
+
|
|
1506
|
+
filtered_df = merged_df
|
|
1507
|
+
if start_date:
|
|
1508
|
+
filtered_df = filtered_df[filtered_df["date"] >= pd.Timestamp(start_date)]
|
|
1509
|
+
if end_date:
|
|
1510
|
+
filtered_df = filtered_df[filtered_df["date"] <= pd.Timestamp(end_date)]
|
|
1511
|
+
|
|
1512
|
+
return Result.success(data=filtered_df)
|
|
1513
|
+
|
|
1514
|
+
# 使用时间窗口 ValueLoader
|
|
1515
|
+
@df_memcached(value_loader=TimeWindowValueLoader(), ttl=7200)
|
|
1516
|
+
def load_time_series_data(
|
|
1517
|
+
symbol: str,
|
|
1518
|
+
start_date: str,
|
|
1519
|
+
end_date: str
|
|
1520
|
+
) -> DataFrame:
|
|
1521
|
+
print(f"加载时间序列: {symbol}, {start_date} ~ {end_date}")
|
|
1522
|
+
return DataFrame()
|
|
1523
|
+
```
|
|
1524
|
+
|
|
1525
|
+
#### 4.7 DataFrame 缓存常见场景
|
|
1526
|
+
|
|
1527
|
+
##### 场景1:股票数据缓存
|
|
1528
|
+
|
|
1529
|
+
```python
|
|
1530
|
+
from pynomad import df_pickled
|
|
1531
|
+
from pandas import DataFrame
|
|
1532
|
+
from datetime import datetime, timedelta
|
|
1533
|
+
|
|
1534
|
+
@df_pickled(
|
|
1535
|
+
name="stock_cache",
|
|
1536
|
+
cache_dir="./stock_cache",
|
|
1537
|
+
ttl=86400 # 24小时
|
|
1538
|
+
)
|
|
1539
|
+
def get_stock_daily(
|
|
1540
|
+
symbol: str,
|
|
1541
|
+
start_date: str,
|
|
1542
|
+
end_date: str
|
|
1543
|
+
) -> DataFrame:
|
|
1544
|
+
"""获取股票日线数据"""
|
|
1545
|
+
print(f"从 API 获取 {symbol} 数据: {start_date} ~ {end_date}")
|
|
1546
|
+
# 模拟数据
|
|
1547
|
+
dates = pd.date_range(start_date, end_date)
|
|
1548
|
+
return DataFrame({
|
|
1549
|
+
"symbol": [symbol] * len(dates),
|
|
1550
|
+
"date": dates,
|
|
1551
|
+
"open": [100.0] * len(dates),
|
|
1552
|
+
"high": [105.0] * len(dates),
|
|
1553
|
+
"low": [98.0] * len(dates),
|
|
1554
|
+
"close": [103.0] * len(dates),
|
|
1555
|
+
"volume": [1000000] * len(dates)
|
|
1556
|
+
})
|
|
1557
|
+
|
|
1558
|
+
# 第一次调用(从 API 加载)
|
|
1559
|
+
df1 = get_stock_daily("AAPL", "2024-01-01", "2024-01-10")
|
|
1560
|
+
# 第二次调用(从缓存读取)
|
|
1561
|
+
df2 = get_stock_daily("AAPL", "2024-01-01", "2024-01-10")
|
|
1562
|
+
```
|
|
1563
|
+
|
|
1564
|
+
##### 场景2:实时行情缓存
|
|
1565
|
+
|
|
1566
|
+
```python
|
|
1567
|
+
from pynomad import df_rediscached
|
|
1568
|
+
|
|
1569
|
+
@df_rediscached(
|
|
1570
|
+
name="realtime_cache",
|
|
1571
|
+
host="localhost",
|
|
1572
|
+
port=6379,
|
|
1573
|
+
ttl=5 # 5秒过期
|
|
1574
|
+
)
|
|
1575
|
+
def get_realtime_quotes(symbols: list[str]) -> DataFrame:
|
|
1576
|
+
"""获取实时行情"""
|
|
1577
|
+
print(f"获取实时行情: {symbols}")
|
|
1578
|
+
# 模拟数据
|
|
1579
|
+
return DataFrame({
|
|
1580
|
+
"symbol": symbols,
|
|
1581
|
+
"price": [100.0 + i for i in range(len(symbols))],
|
|
1582
|
+
"change": [1.0] * len(symbols),
|
|
1583
|
+
"change_pct": [1.0] * len(symbols),
|
|
1584
|
+
"timestamp": [datetime.now()] * len(symbols)
|
|
1585
|
+
})
|
|
1586
|
+
|
|
1587
|
+
# 首次调用(从数据源获取)
|
|
1588
|
+
quotes1 = get_realtime_quotes(["AAPL", "GOOGL", "MSFT"])
|
|
1589
|
+
|
|
1590
|
+
# 5秒内调用(从缓存读取)
|
|
1591
|
+
quotes2 = get_realtime_quotes(["AAPL", "GOOGL", "MSFT"])
|
|
1592
|
+
|
|
1593
|
+
# 5秒后调用(重新获取)
|
|
1594
|
+
# time.sleep(6)
|
|
1595
|
+
# quotes3 = get_realtime_quotes(["AAPL", "GOOGL", "MSFT"])
|
|
1596
|
+
```
|
|
1597
|
+
|
|
1598
|
+
##### 场景3:技术指标缓存
|
|
1599
|
+
|
|
1600
|
+
```python
|
|
1601
|
+
from pynomad import df_memcached
|
|
1602
|
+
|
|
1603
|
+
@df_memcached(
|
|
1604
|
+
name="indicator_cache",
|
|
1605
|
+
ttl=3600 # 1小时
|
|
1606
|
+
)
|
|
1607
|
+
def calculate_sma(
|
|
1608
|
+
df: DataFrame,
|
|
1609
|
+
period: int = 20
|
|
1610
|
+
) -> DataFrame:
|
|
1611
|
+
"""计算简单移动平均线"""
|
|
1612
|
+
print(f"计算 SMA({period})")
|
|
1613
|
+
df = df.copy()
|
|
1614
|
+
df[f"sma_{period}"] = df["close"].rolling(window=period).mean()
|
|
1615
|
+
return df
|
|
1616
|
+
|
|
1617
|
+
# 使用示例
|
|
1618
|
+
import pandas as pd
|
|
1619
|
+
prices = pd.DataFrame({
|
|
1620
|
+
"date": pd.date_range("2024-01-01", periods=100),
|
|
1621
|
+
"close": [100 + i * 0.5 for i in range(100)]
|
|
1622
|
+
})
|
|
1623
|
+
|
|
1624
|
+
# 首次计算
|
|
1625
|
+
result1 = calculate_sma(prices, period=20)
|
|
1626
|
+
|
|
1627
|
+
# 再次使用相同参数(从缓存)
|
|
1628
|
+
result2 = calculate_sma(prices, period=20)
|
|
1629
|
+
```
|
|
1630
|
+
|
|
1631
|
+
#### 4.8 DataFrame 缓存最佳实践
|
|
1632
|
+
|
|
1633
|
+
1. **选择合适的缓存类型**:
|
|
1634
|
+
- 临时数据 → `@df_memcached`
|
|
1635
|
+
- 需要持久化 → `@df_pickled`
|
|
1636
|
+
- 分布式场景 → `@df_rediscached`
|
|
1637
|
+
- 结构化数据/大数据量 → `@sqlcached`
|
|
1638
|
+
|
|
1639
|
+
2. **合理设置 TTL**:
|
|
1640
|
+
- 实时数据 → 短 TTL(秒级)
|
|
1641
|
+
- 日线数据 → 中 TTL(小时级)
|
|
1642
|
+
- 历史数据 → 长 TTL(天级)
|
|
1643
|
+
|
|
1644
|
+
3. **使用自定义 ValueLoader**:
|
|
1645
|
+
- 增量更新场景
|
|
1646
|
+
- 数据过滤场景
|
|
1647
|
+
- 时间窗口缓存
|
|
1648
|
+
|
|
1649
|
+
4. **注意内存管理**:
|
|
1650
|
+
- 设置合理的 `maxsize`
|
|
1651
|
+
- 使用合适的淘汰策略
|
|
1652
|
+
- 定期清理过期缓存
|
|
1653
|
+
|
|
1654
|
+
5. **根据数据量选择存储**:
|
|
1655
|
+
- 小数据量 → 内存或文件缓存
|
|
1656
|
+
- 中等数据量 → Redis 缓存
|
|
1657
|
+
- 大数据量 → SQL 缓存
|
|
1658
|
+
|
|
1659
|
+
#### 4.9 DataFrame 缓存常见问题
|
|
1660
|
+
|
|
1661
|
+
**Q: 为什么函数必须声明返回类型?**
|
|
1662
|
+
|
|
1663
|
+
A: DataFrame 缓存装饰器需要区分返回 `DataFrame` 和 `Result[DataFrame]`,以便正确处理装箱和缓存一致性。这是 Python 动态类型系统的局限性。
|
|
1664
|
+
|
|
1665
|
+
```python
|
|
1666
|
+
# ✅ 正确 - @df_memcached, @df_pickled, @df_rediscached 返回 DataFrame
|
|
1667
|
+
@df_memcached(ttl=60)
|
|
1668
|
+
def load_data() -> DataFrame:
|
|
1669
|
+
return DataFrame()
|
|
1670
|
+
|
|
1671
|
+
# ✅ 正确 - @sqlcached 返回 Result[DataFrame]
|
|
1672
|
+
@sqlcached(db_type="sqlite", db_url="sqlite:///cache.db", ttl=60)
|
|
1673
|
+
def fetch_data() -> Result[DataFrame]:
|
|
1674
|
+
return Result.success(DataFrame())
|
|
1675
|
+
|
|
1676
|
+
# ❌ 错误(缺少类型注解)
|
|
1677
|
+
@df_memcached(ttl=60)
|
|
1678
|
+
def load_data():
|
|
1679
|
+
return DataFrame()
|
|
1680
|
+
```
|
|
1681
|
+
|
|
1682
|
+
**Q: 如何处理大数据量的 DataFrame?**
|
|
1683
|
+
|
|
1684
|
+
A: 建议使用 `@df_pickled`、`@df_rediscached` 或 `@sqlcached`,避免内存溢出。同时可以:
|
|
1685
|
+
- 设置合理的 `maxsize`
|
|
1686
|
+
- 使用数据分片
|
|
1687
|
+
- 在 `extract` 阶段过滤返回的数据
|
|
1688
|
+
- 对于大数据量,优先使用 `@sqlcached`
|
|
1689
|
+
|
|
1690
|
+
**Q: 如何实现缓存预热?**
|
|
1691
|
+
|
|
1692
|
+
A: 在应用启动时,主动调用一次带所有必要参数的函数:
|
|
1693
|
+
|
|
1694
|
+
```python
|
|
1695
|
+
# 预热缓存
|
|
1696
|
+
get_stock_daily("AAPL", "2024-01-01", "2024-12-31")
|
|
1697
|
+
get_stock_daily("GOOGL", "2024-01-01", "2024-12-31")
|
|
1698
|
+
get_stock_daily("MSFT", "2024-01-01", "2024-12-31")
|
|
1699
|
+
```
|
|
1700
|
+
|
|
1701
|
+
---
|
|
1702
|
+
|
|
1703
|
+
### 5. 多级缓存
|
|
1704
|
+
|
|
1705
|
+
#### 双级缓存(L1 + L2)
|
|
1706
|
+
|
|
1707
|
+
```python
|
|
1708
|
+
from pynomad import multi_level_cached
|
|
1709
|
+
|
|
1710
|
+
@multi_level_cached(
|
|
1711
|
+
name="my_cache",
|
|
1712
|
+
l1_code="memory", # L1: 内存缓存
|
|
1713
|
+
l2_code="pickle", # L2: 文件缓存
|
|
1714
|
+
l1_ttl=60, # L1: 1分钟
|
|
1715
|
+
l2_ttl=3600 # L2: 1小时
|
|
1716
|
+
)
|
|
1717
|
+
def get_config(key: str) -> str:
|
|
1718
|
+
print(f"加载配置: {key}")
|
|
1719
|
+
return "config_value"
|
|
1720
|
+
```
|
|
1721
|
+
|
|
1722
|
+
#### 三级缓存(L1 + L2 + L3)
|
|
1723
|
+
|
|
1724
|
+
```python
|
|
1725
|
+
from pynomad import multi_level_cached
|
|
1726
|
+
|
|
1727
|
+
@multi_level_cached(
|
|
1728
|
+
name="my_cache",
|
|
1729
|
+
l1_code="memory", # L1: 内存
|
|
1730
|
+
l2_code="pickle", # L2: 文件
|
|
1731
|
+
l3_code="redis", # L3: Redis
|
|
1732
|
+
l1_ttl=10, # L1: 10秒
|
|
1733
|
+
l2_ttl=60, # L2: 1分钟
|
|
1734
|
+
l3_ttl=300 # L3: 5分钟
|
|
1735
|
+
)
|
|
1736
|
+
def get_data(key: str) -> dict:
|
|
1737
|
+
print(f"获取数据: {key}")
|
|
1738
|
+
return {"key": key, "value": "data"}
|
|
1739
|
+
```
|
|
1740
|
+
|
|
1741
|
+
#### 读穿透策略
|
|
1742
|
+
|
|
1743
|
+
```
|
|
1744
|
+
请求 → L1 缓存
|
|
1745
|
+
├─ 命中 → 返回数据
|
|
1746
|
+
└─ 未命中 → L2 缓存
|
|
1747
|
+
├─ 命中 → 缓存到 L1 → 返回数据
|
|
1748
|
+
└─ 未命中 → L3 缓存
|
|
1749
|
+
├─ 命中 → 缓存到 L2, L1 → 返回数据
|
|
1750
|
+
└─ 未命中 → 执行函数 → 缓存到 L3, L2, L1 → 返回数据
|
|
1751
|
+
```
|
|
1752
|
+
|
|
1753
|
+
#### 写穿透策略
|
|
1754
|
+
|
|
1755
|
+
```
|
|
1756
|
+
函数执行 → 写入 L1
|
|
1757
|
+
→ 写入 L2
|
|
1758
|
+
→ 写入 L3
|
|
1759
|
+
```
|
|
1760
|
+
|
|
1761
|
+
#### 管理缓存
|
|
1762
|
+
|
|
1763
|
+
```python
|
|
1764
|
+
from pynomad import multi_level_cached
|
|
1765
|
+
|
|
1766
|
+
# 获取统计信息
|
|
1767
|
+
@multi_level_cached(name="my_cache", l1_code="memory", l2_code="pickle")
|
|
1768
|
+
def get_data(key: str):
|
|
1769
|
+
return fetch_from_db(key)
|
|
1770
|
+
|
|
1771
|
+
# 访问装饰器实例
|
|
1772
|
+
decorator = get_data.__wrapped__.__closure__[0].cell_contents
|
|
1773
|
+
|
|
1774
|
+
# 清空所有缓存
|
|
1775
|
+
decorator.clear()
|
|
1776
|
+
|
|
1777
|
+
# 获取缓存统计
|
|
1778
|
+
stats = decorator.get_stats()
|
|
1779
|
+
print(stats)
|
|
1780
|
+
# 输出: {'l1': {'size': 10}, 'l2': {'size': 5}}
|
|
1781
|
+
```
|
|
1782
|
+
|
|
1783
|
+
#### 使用场景
|
|
1784
|
+
|
|
1785
|
+
- 需要高性能和高可靠性
|
|
1786
|
+
- 热点数据频繁访问
|
|
1787
|
+
- 降低后端压力
|
|
1788
|
+
- 容灾备份
|
|
1789
|
+
|
|
1790
|
+
---
|
|
1791
|
+
|
|
1792
|
+
### 6. 缓存键生成器
|
|
1793
|
+
|
|
1794
|
+
#### 默认键生成器
|
|
1795
|
+
|
|
1796
|
+
```python
|
|
1797
|
+
from pynomad import memcached
|
|
1798
|
+
|
|
1799
|
+
@memcached
|
|
1800
|
+
def get_user(user_id: int) -> dict:
|
|
1801
|
+
# 键格式: module.function:args_kwargs
|
|
1802
|
+
pass
|
|
1803
|
+
|
|
1804
|
+
# 键示例: mymodule.get_user:(1,):{}
|
|
1805
|
+
```
|
|
1806
|
+
|
|
1807
|
+
#### 模块函数键生成器
|
|
1808
|
+
|
|
1809
|
+
```python
|
|
1810
|
+
from pynomad.cache.decorator.keygenerator import module_function_key_generator
|
|
1811
|
+
|
|
1812
|
+
@memcached(keygenerator=module_function_key_generator)
|
|
1813
|
+
def get_product(product_id: str) -> dict:
|
|
1814
|
+
# 键格式: module.function:args_kwargs
|
|
1815
|
+
pass
|
|
1816
|
+
```
|
|
1817
|
+
|
|
1818
|
+
#### 自定义键生成器
|
|
1819
|
+
|
|
1820
|
+
```python
|
|
1821
|
+
from pynomad import memcached
|
|
1822
|
+
from pynomad.cache.core.types import KeyGenerator
|
|
1823
|
+
|
|
1824
|
+
def custom_key_generator(func, instance, args, kwargs) -> str:
|
|
1825
|
+
"""自定义键生成逻辑"""
|
|
1826
|
+
# 简化键,只使用特定参数
|
|
1827
|
+
return f"{func.__name__}:{args[0]}"
|
|
1828
|
+
|
|
1829
|
+
@memcached(keygenerator=custom_key_generator)
|
|
1830
|
+
def get_data(user_id: int, timestamp: int) -> dict:
|
|
1831
|
+
# 键格式: get_data:1 (忽略 timestamp)
|
|
1832
|
+
pass
|
|
1833
|
+
```
|
|
1834
|
+
|
|
1835
|
+
---
|
|
1836
|
+
|
|
1837
|
+
### 7. 缓存配置详解
|
|
1838
|
+
|
|
1839
|
+
#### 7.1 通用配置参数
|
|
1840
|
+
|
|
1841
|
+
所有缓存类型都支持的通用参数:
|
|
1842
|
+
|
|
1843
|
+
| 参数 | 类型 | 默认值 | 说明 | 适用缓存类型 |
|
|
1844
|
+
|------|------|--------|------|-------------|
|
|
1845
|
+
| `name` | `str` | `"default"` | 缓存名称,用于区分不同的缓存实例 | 所有 |
|
|
1846
|
+
| `ttl` | `int` | `3600` | 缓存存活时间(秒),`0` 表示永不过期 | 所有 |
|
|
1847
|
+
| `maxsize` | `int` | `100` | 最大缓存数量,`0` 表示无限制 | 所有 |
|
|
1848
|
+
| `eviction_policy` | `str` | `"lru"` | 淘汰策略:`lru`/`lfu`/`fifo`/`none` | 内存类缓存 |
|
|
1849
|
+
| `enable_encryption` | `bool` | `False` | 是否启用数据加密 | 持久化缓存 |
|
|
1850
|
+
| `salt` | `str` | `None` | 加密盐值,用于生成加密密钥 | 持久化缓存 |
|
|
1851
|
+
|
|
1852
|
+
**通用配置示例:**
|
|
1853
|
+
|
|
1854
|
+
```python
|
|
1855
|
+
from pynomad import memcached, pickled, rediscached
|
|
1856
|
+
|
|
1857
|
+
# 内存缓存配置
|
|
1858
|
+
@memcached(
|
|
1859
|
+
name="user_cache",
|
|
1860
|
+
ttl=600, # 10分钟过期
|
|
1861
|
+
maxsize=1000, # 最多缓存1000个用户
|
|
1862
|
+
eviction_policy="lru" # LRU淘汰策略
|
|
1863
|
+
)
|
|
1864
|
+
def get_user(user_id: int) -> dict:
|
|
1865
|
+
return fetch_user_from_db(user_id)
|
|
1866
|
+
|
|
1867
|
+
# 文件缓存配置
|
|
1868
|
+
@pickled(
|
|
1869
|
+
name="config_cache",
|
|
1870
|
+
ttl=86400, # 24小时过期
|
|
1871
|
+
maxsize=500,
|
|
1872
|
+
cache_dir="./cache", # 缓存目录
|
|
1873
|
+
enable_encryption=True, # 启用加密
|
|
1874
|
+
salt="app_v1.0" # 加密盐值
|
|
1875
|
+
)
|
|
1876
|
+
def load_app_config() -> dict:
|
|
1877
|
+
return load_config_file()
|
|
1878
|
+
|
|
1879
|
+
# Redis 缓存配置
|
|
1880
|
+
@rediscached(
|
|
1881
|
+
name="product_cache",
|
|
1882
|
+
ttl=1800, # 30分钟过期
|
|
1883
|
+
maxsize=2000,
|
|
1884
|
+
host="localhost",
|
|
1885
|
+
port=6379,
|
|
1886
|
+
db=0,
|
|
1887
|
+
enable_encryption=True,
|
|
1888
|
+
salt="redis_cache_v1"
|
|
1889
|
+
)
|
|
1890
|
+
def get_product(product_id: str) -> dict:
|
|
1891
|
+
return fetch_product_from_db(product_id)
|
|
1892
|
+
```
|
|
1893
|
+
|
|
1894
|
+
---
|
|
1895
|
+
|
|
1896
|
+
#### 7.2 Memory Cache 专属配置
|
|
1897
|
+
|
|
1898
|
+
Memory Cache(内存缓存)的专属配置:
|
|
1899
|
+
|
|
1900
|
+
| 参数 | 类型 | 默认值 | 说明 |
|
|
1901
|
+
|------|------|--------|------|
|
|
1902
|
+
| 无专属参数 | - | - | 使用通用配置 |
|
|
1903
|
+
|
|
1904
|
+
**淘汰策略详解:**
|
|
1905
|
+
|
|
1906
|
+
```python
|
|
1907
|
+
from pynomad import memcached
|
|
1908
|
+
|
|
1909
|
+
# LRU (Least Recently Used) - 最久未使用淘汰
|
|
1910
|
+
@memcached(
|
|
1911
|
+
ttl=60,
|
|
1912
|
+
maxsize=100,
|
|
1913
|
+
eviction_policy="lru" # ✅ 最常用,淘汰最久未访问的数据
|
|
1914
|
+
)
|
|
1915
|
+
def get_lru_cache(key: str):
|
|
1916
|
+
pass
|
|
1917
|
+
|
|
1918
|
+
# LFU (Least Frequently Used) - 最少使用频率淘汰
|
|
1919
|
+
@memcached(
|
|
1920
|
+
ttl=60,
|
|
1921
|
+
maxsize=100,
|
|
1922
|
+
eviction_policy="lfu" # 适用于访问模式稳定的数据
|
|
1923
|
+
)
|
|
1924
|
+
def get_lfu_cache(key: str):
|
|
1925
|
+
pass
|
|
1926
|
+
|
|
1927
|
+
# FIFO (First In First Out) - 先进先出淘汰
|
|
1928
|
+
@memcached(
|
|
1929
|
+
ttl=60,
|
|
1930
|
+
maxsize=100,
|
|
1931
|
+
eviction_policy="fifo" # 适用于时间序列数据
|
|
1932
|
+
)
|
|
1933
|
+
def get_fifo_cache(key: str):
|
|
1934
|
+
pass
|
|
1935
|
+
|
|
1936
|
+
# None - 不淘汰,缓存满时拒绝写入
|
|
1937
|
+
@memcached(
|
|
1938
|
+
ttl=60,
|
|
1939
|
+
maxsize=100,
|
|
1940
|
+
eviction_policy="none" # 适用于大小可控的缓存
|
|
1941
|
+
)
|
|
1942
|
+
def get_none_cache(key: str):
|
|
1943
|
+
pass
|
|
1944
|
+
```
|
|
1945
|
+
|
|
1946
|
+
---
|
|
1947
|
+
|
|
1948
|
+
#### 7.3 Pickle Cache 专属配置
|
|
1949
|
+
|
|
1950
|
+
Pickle Cache(文件缓存)的专属配置:
|
|
1951
|
+
|
|
1952
|
+
| 参数 | 类型 | 默认值 | 说明 |
|
|
1953
|
+
|------|------|--------|------|
|
|
1954
|
+
| `cache_dir` | `str` | `"./.pynomad_cache"` | 缓存文件存储目录 |
|
|
1955
|
+
| `enable_background_cleanup` | `bool` | `False` | 是否启用后台清理任务 |
|
|
1956
|
+
| `cleanup_interval` | `int` | `300` | 后台清理间隔(秒) |
|
|
1957
|
+
|
|
1958
|
+
**完整配置示例:**
|
|
1959
|
+
|
|
1960
|
+
```python
|
|
1961
|
+
from pynomad import pickled
|
|
1962
|
+
|
|
1963
|
+
@pickled(
|
|
1964
|
+
name="historical_data",
|
|
1965
|
+
ttl=86400, # 24小时过期
|
|
1966
|
+
maxsize=500,
|
|
1967
|
+
cache_dir="./cache/data", # 自定义缓存目录
|
|
1968
|
+
enable_encryption=True, # 启用加密
|
|
1969
|
+
salt="finance_v2.0", # 加密盐值
|
|
1970
|
+
enable_background_cleanup=True, # 启用后台清理
|
|
1971
|
+
cleanup_interval=600 # 每10分钟清理一次
|
|
1972
|
+
)
|
|
1973
|
+
def load_historical_data(symbol: str) -> dict:
|
|
1974
|
+
return fetch_historical_data(symbol)
|
|
1975
|
+
```
|
|
1976
|
+
|
|
1977
|
+
**目录结构示例:**
|
|
1978
|
+
|
|
1979
|
+
```
|
|
1980
|
+
cache/
|
|
1981
|
+
└── data/
|
|
1982
|
+
├── historical_data/
|
|
1983
|
+
│ ├── key1.pkl
|
|
1984
|
+
│ ├── key2.pkl
|
|
1985
|
+
│ └── .metadata.json
|
|
1986
|
+
└── historical_data_cleanup.log
|
|
1987
|
+
```
|
|
1988
|
+
|
|
1989
|
+
---
|
|
1990
|
+
|
|
1991
|
+
#### 7.4 Redis Cache 专属配置
|
|
1992
|
+
|
|
1993
|
+
Redis Cache 的专属配置:
|
|
1994
|
+
|
|
1995
|
+
| 参数 | 类型 | 默认值 | 说明 |
|
|
1996
|
+
|------|------|--------|------|
|
|
1997
|
+
| `host` | `str` | `"localhost"` | Redis 主机地址 |
|
|
1998
|
+
| `port` | `int` | `6379` | Redis 端口 |
|
|
1999
|
+
| `db` | `int` | `0` | Redis 数据库编号(0-15) |
|
|
2000
|
+
| `username` | `str` | `None` | Redis 用户名(Redis 6.0+) |
|
|
2001
|
+
| `password` | `str` | `None` | Redis 密码 |
|
|
2002
|
+
| `redis_client` | `Redis` | `None` | 自定义 Redis 客户端实例 |
|
|
2003
|
+
|
|
2004
|
+
**方式一:使用连接参数配置**
|
|
2005
|
+
|
|
2006
|
+
```python
|
|
2007
|
+
from pynomad import rediscached
|
|
2008
|
+
|
|
2009
|
+
@rediscached(
|
|
2010
|
+
name="order_cache",
|
|
2011
|
+
ttl=1800, # 30分钟过期
|
|
2012
|
+
maxsize=1000,
|
|
2013
|
+
host="localhost", # Redis 主机
|
|
2014
|
+
port=6379, # Redis 端口
|
|
2015
|
+
db=0, # 数据库编号
|
|
2016
|
+
username="default", # 用户名(可选)
|
|
2017
|
+
password="your_password", # 密码(可选)
|
|
2018
|
+
enable_encryption=True,
|
|
2019
|
+
salt="redis_v1"
|
|
2020
|
+
)
|
|
2021
|
+
def get_order(order_id: str) -> dict:
|
|
2022
|
+
return fetch_order_from_db(order_id)
|
|
2023
|
+
```
|
|
2024
|
+
|
|
2025
|
+
**方式二:使用自定义 Redis 客户端**
|
|
2026
|
+
|
|
2027
|
+
```python
|
|
2028
|
+
from redis import Redis, ConnectionPool
|
|
2029
|
+
from pynomad import rediscached
|
|
2030
|
+
|
|
2031
|
+
# 创建连接池
|
|
2032
|
+
pool = ConnectionPool(
|
|
2033
|
+
host="localhost",
|
|
2034
|
+
port=6379,
|
|
2035
|
+
db=0,
|
|
2036
|
+
username="default",
|
|
2037
|
+
password="your_password",
|
|
2038
|
+
max_connections=20, # 最大连接数
|
|
2039
|
+
socket_timeout=5, # Socket 超时(秒)
|
|
2040
|
+
socket_connect_timeout=5, # 连接超时(秒)
|
|
2041
|
+
retry_on_timeout=True # 超时重试
|
|
2042
|
+
)
|
|
2043
|
+
|
|
2044
|
+
# 创建 Redis 客户端
|
|
2045
|
+
redis_client = Redis(connection_pool=pool)
|
|
2046
|
+
|
|
2047
|
+
@rediscached(
|
|
2048
|
+
name="product_cache",
|
|
2049
|
+
redis_client=redis_client, # 使用自定义客户端
|
|
2050
|
+
ttl=3600
|
|
2051
|
+
)
|
|
2052
|
+
def get_product(product_id: str) -> dict:
|
|
2053
|
+
return fetch_product_from_db(product_id)
|
|
2054
|
+
```
|
|
2055
|
+
|
|
2056
|
+
**连接池配置建议:**
|
|
2057
|
+
|
|
2058
|
+
```python
|
|
2059
|
+
# 高并发场景
|
|
2060
|
+
pool = ConnectionPool(
|
|
2061
|
+
host="redis.example.com",
|
|
2062
|
+
port=6379,
|
|
2063
|
+
password="password",
|
|
2064
|
+
max_connections=50, # 更多连接
|
|
2065
|
+
socket_timeout=3,
|
|
2066
|
+
socket_connect_timeout=3,
|
|
2067
|
+
decode_responses=False # 保持字节返回(适合二进制数据)
|
|
2068
|
+
)
|
|
2069
|
+
|
|
2070
|
+
# 低延迟场景
|
|
2071
|
+
pool = ConnectionPool(
|
|
2072
|
+
host="127.0.0.1", # 使用本地回环地址
|
|
2073
|
+
port=6379,
|
|
2074
|
+
max_connections=10,
|
|
2075
|
+
socket_timeout=1, # 短超时
|
|
2076
|
+
socket_connect_timeout=1
|
|
2077
|
+
)
|
|
2078
|
+
```
|
|
2079
|
+
|
|
2080
|
+
---
|
|
2081
|
+
|
|
2082
|
+
#### 7.5 SQL Cache 专属配置
|
|
2083
|
+
|
|
2084
|
+
SQL Cache 的专属配置:
|
|
2085
|
+
|
|
2086
|
+
| 参数 | 类型 | 默认值 | 说明 |
|
|
2087
|
+
|------|------|--------|------|
|
|
2088
|
+
| `db_type` | `str` | `"sqlite"` | 数据库类型:`sqlite`/`mysql`/`postgresql` |
|
|
2089
|
+
| `db_url` | `str` | `None` | 完整数据库连接 URL(优先级高于其他参数) |
|
|
2090
|
+
| `host` | `str` | `"localhost"` | 数据库主机地址 |
|
|
2091
|
+
| `port` | `int` | `None` | 数据库端口 |
|
|
2092
|
+
| `db_name` | `str` | `None` | 数据库名称 |
|
|
2093
|
+
| `username` | `str` | `None` | 数据库用户名 |
|
|
2094
|
+
| `password` | `str` | `None` | 数据库密码 |
|
|
2095
|
+
| `schema` | `str` | `"public"` | 数据库 schema(PostgreSQL) |
|
|
2096
|
+
| `pool_size` | `int` | `5` | 连接池大小 |
|
|
2097
|
+
| `max_overflow` | `int` | `10` | 最大溢出连接数 |
|
|
2098
|
+
| `pool_timeout` | `int` | `30` | 连接超时(秒) |
|
|
2099
|
+
| `pool_recycle` | `int` | `3600` | 连接回收时间(秒) |
|
|
2100
|
+
| `pool_pre_ping` | `bool` | `True` | 连接前测试可用性 |
|
|
2101
|
+
| `max_retry_attempts` | `int` | `3` | 最大重试次数 |
|
|
2102
|
+
| `retry_delay` | `float` | `1.0` | 重试延迟(秒) |
|
|
2103
|
+
|
|
2104
|
+
**方式一:使用 db_url(推荐)**
|
|
2105
|
+
|
|
2106
|
+
```python
|
|
2107
|
+
from pynomad import sqlcached
|
|
2108
|
+
from pynomad.result import Result
|
|
2109
|
+
from pandas import DataFrame
|
|
2110
|
+
|
|
2111
|
+
# SQLite
|
|
2112
|
+
@sqlcached(
|
|
2113
|
+
db_type="sqlite",
|
|
2114
|
+
db_url="sqlite:///./cache.db", # 相对路径
|
|
2115
|
+
ttl=3600
|
|
2116
|
+
)
|
|
2117
|
+
def fetch_data(symbol: str) -> Result[DataFrame]:
|
|
2118
|
+
df = fetch_from_api(symbol)
|
|
2119
|
+
return Result.success(df)
|
|
2120
|
+
|
|
2121
|
+
# MySQL
|
|
2122
|
+
@sqlcached(
|
|
2123
|
+
db_type="mysql",
|
|
2124
|
+
db_url="mysql+pymysql://user:password@localhost:3306/cache_db",
|
|
2125
|
+
ttl=3600
|
|
2126
|
+
)
|
|
2127
|
+
def fetch_market_data(symbol: str) -> Result[DataFrame]:
|
|
2128
|
+
df = fetch_from_api(symbol)
|
|
2129
|
+
return Result.success(df)
|
|
2130
|
+
|
|
2131
|
+
# PostgreSQL
|
|
2132
|
+
@sqlcached(
|
|
2133
|
+
db_type="postgresql",
|
|
2134
|
+
db_url="postgresql://user:password@localhost:5432/cache_db",
|
|
2135
|
+
ttl=3600
|
|
2136
|
+
)
|
|
2137
|
+
def fetch_stock_data(symbol: str) -> Result[DataFrame]:
|
|
2138
|
+
df = fetch_from_api(symbol)
|
|
2139
|
+
return Result.success(df)
|
|
2140
|
+
```
|
|
2141
|
+
|
|
2142
|
+
**方式二:使用连接参数**
|
|
2143
|
+
|
|
2144
|
+
```python
|
|
2145
|
+
from pynomad import sqlcached
|
|
2146
|
+
|
|
2147
|
+
@sqlcached(
|
|
2148
|
+
db_type="mysql",
|
|
2149
|
+
host="localhost",
|
|
2150
|
+
port=3306,
|
|
2151
|
+
db_name="cache_db",
|
|
2152
|
+
username="root",
|
|
2153
|
+
password="password",
|
|
2154
|
+
schema="cache_schema", # 自定义 schema
|
|
2155
|
+
pool_size=5, # 连接池大小
|
|
2156
|
+
max_overflow=10, # 最大溢出连接数
|
|
2157
|
+
pool_timeout=30, # 连接超时
|
|
2158
|
+
pool_recycle=3600, # 连接回收时间(1小时)
|
|
2159
|
+
pool_pre_ping=True, # 连接前测试
|
|
2160
|
+
max_retry_attempts=3, # 最大重试次数
|
|
2161
|
+
retry_delay=1.0, # 重试延迟
|
|
2162
|
+
ttl=7200
|
|
2163
|
+
)
|
|
2164
|
+
def fetch_historical_data(symbol: str) -> Result[DataFrame]:
|
|
2165
|
+
df = fetch_from_api(symbol)
|
|
2166
|
+
return Result.success(df)
|
|
2167
|
+
```
|
|
2168
|
+
|
|
2169
|
+
**连接池配置建议:**
|
|
2170
|
+
|
|
2171
|
+
```python
|
|
2172
|
+
# 小型应用
|
|
2173
|
+
pool_size=5,
|
|
2174
|
+
max_overflow=10,
|
|
2175
|
+
pool_timeout=30
|
|
2176
|
+
|
|
2177
|
+
# 中型应用
|
|
2178
|
+
pool_size=10,
|
|
2179
|
+
max_overflow=20,
|
|
2180
|
+
pool_timeout=20
|
|
2181
|
+
|
|
2182
|
+
# 大型应用/高并发
|
|
2183
|
+
pool_size=20,
|
|
2184
|
+
max_overflow=30,
|
|
2185
|
+
pool_timeout=10,
|
|
2186
|
+
pool_recycle=1800 # 30分钟回收,防止连接僵死
|
|
2187
|
+
```
|
|
2188
|
+
|
|
2189
|
+
---
|
|
2190
|
+
|
|
2191
|
+
#### 7.6 DataFrame 缓存专属配置
|
|
2192
|
+
|
|
2193
|
+
DataFrame 缓存除了继承各自存储类型的配置外,还支持以下参数:
|
|
2194
|
+
|
|
2195
|
+
| 参数 | 类型 | 默认值 | 说明 | 适用装饰器 |
|
|
2196
|
+
|------|------|--------|------|-----------|
|
|
2197
|
+
| `value_loader` | `DataFrameValueLoader` | `None` | 自定义数据加载器 | 所有 DataFrame 缓存 |
|
|
2198
|
+
| `keygenerator` | `KeyGenerator` | `None` | 自定义键生成器 | 所有 DataFrame 缓存 |
|
|
2199
|
+
|
|
2200
|
+
**自定义 ValueLoader 配置:**
|
|
2201
|
+
|
|
2202
|
+
```python
|
|
2203
|
+
from pynomad import df_memcached
|
|
2204
|
+
from pynomad.cache.dataframe.types import DataFrameValueLoader
|
|
2205
|
+
from pandas import DataFrame
|
|
2206
|
+
|
|
2207
|
+
class StockDataValueLoader(DataFrameValueLoader):
|
|
2208
|
+
def get(self, cached_df, extra_params, args, kwargs):
|
|
2209
|
+
# GET 阶段逻辑
|
|
2210
|
+
pass
|
|
2211
|
+
|
|
2212
|
+
def put(self, cached_df, new_df, extra_params, args, kwargs):
|
|
2213
|
+
# PUT 阶段逻辑
|
|
2214
|
+
pass
|
|
2215
|
+
|
|
2216
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
2217
|
+
# EXTRACT 阶段逻辑
|
|
2218
|
+
pass
|
|
2219
|
+
|
|
2220
|
+
@df_memcached(
|
|
2221
|
+
name="stock_cache",
|
|
2222
|
+
ttl=600,
|
|
2223
|
+
value_loader=StockDataValueLoader() # 自定义加载器
|
|
2224
|
+
)
|
|
2225
|
+
def get_stock_data(code: str, start_date: str, end_date: str) -> DataFrame:
|
|
2226
|
+
return fetch_stock_from_api(code, start_date, end_date)
|
|
2227
|
+
```
|
|
2228
|
+
|
|
2229
|
+
**自定义 KeyGenerator 配置:**
|
|
2230
|
+
|
|
2231
|
+
```python
|
|
2232
|
+
from pynomad import df_memcached
|
|
2233
|
+
from pynomad.cache.core.keygenerator import KeyGenerator
|
|
2234
|
+
|
|
2235
|
+
def stock_key_generator(func, args, kwargs):
|
|
2236
|
+
# 只使用 code 和 fq 生成键,忽略时间范围
|
|
2237
|
+
code = args[0]
|
|
2238
|
+
fq = kwargs.get("fq", "qfq")
|
|
2239
|
+
return f"{func.__name__}:{code}:{fq}"
|
|
2240
|
+
|
|
2241
|
+
@df_memcached(
|
|
2242
|
+
name="stock_cache",
|
|
2243
|
+
ttl=600,
|
|
2244
|
+
keygenerator=stock_key_generator # 自定义键生成器
|
|
2245
|
+
)
|
|
2246
|
+
def get_stock_data(code: str, start_date: str, end_date: str, fq: str = "qfq") -> DataFrame:
|
|
2247
|
+
return fetch_stock_from_api(code, start_date, end_date, fq)
|
|
2248
|
+
```
|
|
2249
|
+
|
|
2250
|
+
---
|
|
2251
|
+
|
|
2252
|
+
#### 7.7 配置文件集成
|
|
2253
|
+
|
|
2254
|
+
使用 `@inject` 装饰器从 `settings.toml` 配置文件加载缓存配置:
|
|
2255
|
+
|
|
2256
|
+
**配置文件 `settings.toml`:**
|
|
2257
|
+
|
|
2258
|
+
```toml
|
|
2259
|
+
# settings.toml - pynomad 配置文件
|
|
2260
|
+
[cache.redis]
|
|
2261
|
+
host = "redis.example.com"
|
|
2262
|
+
port = 6379
|
|
2263
|
+
db = 0
|
|
2264
|
+
password = "${REDIS_PASSWORD}"
|
|
2265
|
+
ttl = 3600
|
|
2266
|
+
|
|
2267
|
+
[cache.mysql]
|
|
2268
|
+
host = "mysql.example.com"
|
|
2269
|
+
port = 3306
|
|
2270
|
+
db_name = "cache_db"
|
|
2271
|
+
username = "cache_user"
|
|
2272
|
+
password = "${DB_PASSWORD}"
|
|
2273
|
+
pool_size = 10
|
|
2274
|
+
max_overflow = 20
|
|
2275
|
+
|
|
2276
|
+
[cache.postgresql]
|
|
2277
|
+
host = "postgres.example.com"
|
|
2278
|
+
port = 5432
|
|
2279
|
+
db_name = "cache_db"
|
|
2280
|
+
username = "cache_user"
|
|
2281
|
+
password = "${DB_PASSWORD}"
|
|
2282
|
+
pool_size = 5
|
|
2283
|
+
|
|
2284
|
+
[cache.memory]
|
|
2285
|
+
ttl = 600
|
|
2286
|
+
maxsize = 1000
|
|
2287
|
+
|
|
2288
|
+
[cache.pickle]
|
|
2289
|
+
cache_dir = "./cache"
|
|
2290
|
+
enable_encryption = true
|
|
2291
|
+
salt = "app_v1.0"
|
|
2292
|
+
```
|
|
2293
|
+
|
|
2294
|
+
**使用配置类加载 Redis 配置:**
|
|
2295
|
+
|
|
2296
|
+
```python
|
|
2297
|
+
from pynomad.config.auto import inject
|
|
2298
|
+
from pynomad.config.auto.field_info import field
|
|
2299
|
+
from pynomad import rediscached
|
|
2300
|
+
|
|
2301
|
+
@inject(prefix="cache.redis")
|
|
2302
|
+
class RedisCacheConfig:
|
|
2303
|
+
"""Redis 缓存配置"""
|
|
2304
|
+
host: str = field(default="localhost")
|
|
2305
|
+
port: int = field(default=6379)
|
|
2306
|
+
db: int = field(default=0)
|
|
2307
|
+
password: str = field(default="")
|
|
2308
|
+
ttl: int = field(default=3600)
|
|
2309
|
+
|
|
2310
|
+
redis_config = RedisCacheConfig()
|
|
2311
|
+
|
|
2312
|
+
@rediscached(
|
|
2313
|
+
name="user_cache",
|
|
2314
|
+
host=redis_config.host,
|
|
2315
|
+
port=redis_config.port,
|
|
2316
|
+
db=redis_config.db,
|
|
2317
|
+
password=redis_config.password,
|
|
2318
|
+
ttl=redis_config.ttl
|
|
2319
|
+
)
|
|
2320
|
+
def get_user(user_id: int) -> dict:
|
|
2321
|
+
return fetch_user_from_db(user_id)
|
|
2322
|
+
```
|
|
2323
|
+
|
|
2324
|
+
**使用配置类加载 MySQL 缓存配置:**
|
|
2325
|
+
|
|
2326
|
+
```python
|
|
2327
|
+
from pynomad import sqlcached
|
|
2328
|
+
from pynomad.result import Result
|
|
2329
|
+
|
|
2330
|
+
@inject(prefix="cache.mysql")
|
|
2331
|
+
class MySQLCacheConfig:
|
|
2332
|
+
"""MySQL 缓存配置"""
|
|
2333
|
+
host: str = field(default="localhost")
|
|
2334
|
+
port: int = field(default=3306)
|
|
2335
|
+
db_name: str = field(default="cache_db")
|
|
2336
|
+
username: str = field(default="root")
|
|
2337
|
+
password: str = field(default="")
|
|
2338
|
+
pool_size: int = field(default=5)
|
|
2339
|
+
max_overflow: int = field(default=10)
|
|
2340
|
+
|
|
2341
|
+
mysql_config = MySQLCacheConfig()
|
|
2342
|
+
|
|
2343
|
+
@sqlcached(
|
|
2344
|
+
db_type="mysql",
|
|
2345
|
+
host=mysql_config.host,
|
|
2346
|
+
port=mysql_config.port,
|
|
2347
|
+
db_name=mysql_config.db_name,
|
|
2348
|
+
username=mysql_config.username,
|
|
2349
|
+
password=mysql_config.password,
|
|
2350
|
+
pool_size=mysql_config.pool_size,
|
|
2351
|
+
max_overflow=mysql_config.max_overflow,
|
|
2352
|
+
ttl=3600
|
|
2353
|
+
)
|
|
2354
|
+
def fetch_market_data(symbol: str) -> Result[DataFrame]:
|
|
2355
|
+
df = fetch_from_api(symbol)
|
|
2356
|
+
return Result.success(df)
|
|
2357
|
+
```
|
|
2358
|
+
|
|
2359
|
+
**使用配置类加载 Memory 缓存配置:**
|
|
2360
|
+
|
|
2361
|
+
```python
|
|
2362
|
+
from pynomad import memcached
|
|
2363
|
+
|
|
2364
|
+
@inject(prefix="cache.memory")
|
|
2365
|
+
class MemoryCacheConfig:
|
|
2366
|
+
"""内存缓存配置"""
|
|
2367
|
+
ttl: int = field(default=600)
|
|
2368
|
+
maxsize: int = field(default=1000)
|
|
2369
|
+
|
|
2370
|
+
memory_config = MemoryCacheConfig()
|
|
2371
|
+
|
|
2372
|
+
@memcached(
|
|
2373
|
+
name="session_cache",
|
|
2374
|
+
ttl=memory_config.ttl,
|
|
2375
|
+
maxsize=memory_config.maxsize
|
|
2376
|
+
)
|
|
2377
|
+
def get_session_data(session_id: str) -> dict:
|
|
2378
|
+
return fetch_session_from_redis(session_id)
|
|
2379
|
+
```
|
|
2380
|
+
|
|
2381
|
+
**配置文件路径说明:**
|
|
2382
|
+
|
|
2383
|
+
`settings.toml` 文件应放在以下位置之一(按优先级顺序):
|
|
2384
|
+
|
|
2385
|
+
1. 项目根目录:`./settings.toml`
|
|
2386
|
+
2. 配置目录:`./config/settings.toml`
|
|
2387
|
+
3. 用户目录:`~/.pynomad/settings.toml`
|
|
2388
|
+
4. 环境变量指定的路径:`PYNOMAD_CONFIG_PATH`
|
|
2389
|
+
|
|
2390
|
+
**环境变量替换:**
|
|
2391
|
+
|
|
2392
|
+
配置文件支持环境变量替换:
|
|
2393
|
+
|
|
2394
|
+
```toml
|
|
2395
|
+
# settings.toml
|
|
2396
|
+
[cache.redis]
|
|
2397
|
+
password = "${REDIS_PASSWORD}" # 从环境变量读取
|
|
2398
|
+
|
|
2399
|
+
[cache.mysql]
|
|
2400
|
+
password = "${DB_PASSWORD}"
|
|
2401
|
+
```
|
|
2402
|
+
|
|
2403
|
+
**完整示例:项目结构**
|
|
2404
|
+
|
|
2405
|
+
```
|
|
2406
|
+
my_project/
|
|
2407
|
+
├── settings.toml # 配置文件
|
|
2408
|
+
├── main.py # 主程序
|
|
2409
|
+
└── cache/ # 缓存目录
|
|
2410
|
+
```
|
|
2411
|
+
|
|
2412
|
+
**settings.toml:**
|
|
2413
|
+
|
|
2414
|
+
```toml
|
|
2415
|
+
[cache.redis]
|
|
2416
|
+
host = "localhost"
|
|
2417
|
+
port = 6379
|
|
2418
|
+
db = 0
|
|
2419
|
+
password = "${REDIS_PASSWORD}"
|
|
2420
|
+
ttl = 1800
|
|
2421
|
+
|
|
2422
|
+
[cache.mysql]
|
|
2423
|
+
host = "localhost"
|
|
2424
|
+
port = 3306
|
|
2425
|
+
db_name = "cache_db"
|
|
2426
|
+
username = "cache_user"
|
|
2427
|
+
password = "${DB_PASSWORD}"
|
|
2428
|
+
pool_size = 10
|
|
2429
|
+
max_overflow = 20
|
|
2430
|
+
|
|
2431
|
+
[cache.memory]
|
|
2432
|
+
ttl = 600
|
|
2433
|
+
maxsize = 1000
|
|
2434
|
+
|
|
2435
|
+
[cache.pickle]
|
|
2436
|
+
cache_dir = "./cache"
|
|
2437
|
+
enable_encryption = true
|
|
2438
|
+
salt = "my_app_v1"
|
|
2439
|
+
```
|
|
2440
|
+
|
|
2441
|
+
---
|
|
2442
|
+
|
|
2443
|
+
#### 7.8 配置优先级
|
|
2444
|
+
|
|
2445
|
+
参数优先级(从高到低):
|
|
2446
|
+
|
|
2447
|
+
1. **装饰器参数**:直接在装饰器中传入的参数(优先级最高)
|
|
2448
|
+
2. **配置文件**:通过 `@inject` 从配置文件加载的值
|
|
2449
|
+
3. **硬编码默认值**:代码中定义的默认值(优先级最低)
|
|
2450
|
+
|
|
2451
|
+
```python
|
|
2452
|
+
from pynomad import rediscached
|
|
2453
|
+
|
|
2454
|
+
# 优先级1: 装饰器参数(优先级最高)
|
|
2455
|
+
@rediscached(
|
|
2456
|
+
host="custom.host", # ✅ 使用此值
|
|
2457
|
+
ttl=60 # ✅ 使用此值
|
|
2458
|
+
)
|
|
2459
|
+
def my_function():
|
|
2460
|
+
pass
|
|
2461
|
+
|
|
2462
|
+
# 如果配置文件中设置了:
|
|
2463
|
+
# cache.redis.host = "config.host"
|
|
2464
|
+
# cache.redis.ttl = 300
|
|
2465
|
+
#
|
|
2466
|
+
# 实际使用 host="custom.host", ttl=60
|
|
2467
|
+
```
|
|
2468
|
+
|
|
2469
|
+
---
|
|
2470
|
+
|
|
2471
|
+
### 8. 缓存最佳实践
|
|
2472
|
+
|
|
2473
|
+
#### 选择合适的缓存类型
|
|
2474
|
+
|
|
2475
|
+
```python
|
|
2476
|
+
# 临时数据 → Memory Cache
|
|
2477
|
+
@memcached(ttl=60)
|
|
2478
|
+
def get_session_data():
|
|
2479
|
+
pass
|
|
2480
|
+
|
|
2481
|
+
# 需要持久化 → Pickle Cache
|
|
2482
|
+
@pickled(ttl=3600)
|
|
2483
|
+
def get_cached_config():
|
|
2484
|
+
pass
|
|
2485
|
+
|
|
2486
|
+
# 分布式系统 → Redis Cache
|
|
2487
|
+
@rediscached(ttl=300)
|
|
2488
|
+
def get_shared_data():
|
|
2489
|
+
pass
|
|
2490
|
+
|
|
2491
|
+
# DataFrame 结构化数据/大数据量 → SQL Cache
|
|
2492
|
+
@sqlcached(db_type="sqlite")
|
|
2493
|
+
def get_structured_data() -> Result[DataFrame]:
|
|
2494
|
+
return Result.success(DataFrame())
|
|
2495
|
+
```
|
|
2496
|
+
|
|
2497
|
+
#### 合理设置 TTL
|
|
2498
|
+
|
|
2499
|
+
```python
|
|
2500
|
+
# 高频变化的数据 → 短 TTL
|
|
2501
|
+
@memcached(ttl=10)
|
|
2502
|
+
def get_realtime_price():
|
|
2503
|
+
pass
|
|
2504
|
+
|
|
2505
|
+
# 相对稳定的数据 → 长 TTL
|
|
2506
|
+
@pickled(ttl=86400) # 24小时
|
|
2507
|
+
def get_static_config():
|
|
2508
|
+
pass
|
|
2509
|
+
```
|
|
2510
|
+
|
|
2511
|
+
#### 处理缓存失效
|
|
2512
|
+
|
|
2513
|
+
```python
|
|
2514
|
+
from pynomad import memcached, rediscached
|
|
2515
|
+
from pynomad.common.exceptions import NeedsRefreshException
|
|
2516
|
+
|
|
2517
|
+
@memcached(ttl=60)
|
|
2518
|
+
def get_data_with_refresh(key: str) -> dict:
|
|
2519
|
+
# 如果需要强制刷新,抛出 NeedsRefreshException
|
|
2520
|
+
if needs_refresh:
|
|
2521
|
+
raise NeedsRefreshException({"refresh_key": key})
|
|
2522
|
+
return fetch_from_db(key)
|
|
2523
|
+
```
|
|
2524
|
+
|
|
2525
|
+
#### 监控缓存效果
|
|
2526
|
+
|
|
2527
|
+
```python
|
|
2528
|
+
import time
|
|
2529
|
+
from pynomad import memcached
|
|
2530
|
+
|
|
2531
|
+
@memcached(ttl=60)
|
|
2532
|
+
def timed_function():
|
|
2533
|
+
start = time.time()
|
|
2534
|
+
result = expensive_operation()
|
|
2535
|
+
print(f"执行时间: {time.time() - start:.2f}秒")
|
|
2536
|
+
return result
|
|
2537
|
+
|
|
2538
|
+
# 第一次调用(缓存未命中)
|
|
2539
|
+
timed_function() # 执行时间: 2.50秒
|
|
2540
|
+
|
|
2541
|
+
# 第二次调用(缓存命中)
|
|
2542
|
+
timed_function() # 执行时间: 0.00秒
|
|
2543
|
+
```
|
|
2544
|
+
|
|
2545
|
+
---
|
|
2546
|
+
|
|
2547
|
+
### 9. 清除缓存
|
|
2548
|
+
|
|
2549
|
+
#### 9.1 等待 TTL 过期(推荐)
|
|
2550
|
+
|
|
2551
|
+
缓存会在设定的 TTL 时间后自动过期,这是最简单和可靠的方式:
|
|
2552
|
+
|
|
2553
|
+
```python
|
|
2554
|
+
from pynomad import memcached
|
|
2555
|
+
|
|
2556
|
+
@memcached(ttl=60) # 60秒后自动过期
|
|
2557
|
+
def get_data():
|
|
2558
|
+
return expensive_operation()
|
|
2559
|
+
```
|
|
2560
|
+
|
|
2561
|
+
#### 9.2 通过 CacheRegistry 清除指定缓存
|
|
2562
|
+
|
|
2563
|
+
使用缓存名称清除所有关联的缓存:
|
|
2564
|
+
|
|
2565
|
+
```python
|
|
2566
|
+
from pynomad import memcached
|
|
2567
|
+
from pynomad.cache.core.cache_registry import CacheRegistry
|
|
2568
|
+
|
|
2569
|
+
@memcached(name="user_cache", ttl=3600)
|
|
2570
|
+
def get_user(user_id: int) -> dict:
|
|
2571
|
+
return fetch_user_from_db(user_id)
|
|
2572
|
+
|
|
2573
|
+
# 清除所有名为 "user_cache" 的缓存
|
|
2574
|
+
cache = CacheRegistry.get("user_cache")
|
|
2575
|
+
cache.clear()
|
|
2576
|
+
```
|
|
2577
|
+
|
|
2578
|
+
#### 9.3 重启应用
|
|
2579
|
+
|
|
2580
|
+
重启应用可以清除所有内存缓存:
|
|
2581
|
+
|
|
2582
|
+
- **内存缓存**:应用重启时自动清空
|
|
2583
|
+
- **文件/Redis/SQL 缓存**:在缓存过期后失效
|
|
2584
|
+
|
|
2585
|
+
#### 9.4 清除特定键的缓存
|
|
2586
|
+
|
|
2587
|
+
某些缓存类型支持清除特定键:
|
|
2588
|
+
|
|
2589
|
+
```python
|
|
2590
|
+
# Redis 缓存示例
|
|
2591
|
+
from pynomad import rediscached
|
|
2592
|
+
|
|
2593
|
+
@rediscached(name="product_cache", ttl=1800)
|
|
2594
|
+
def get_product(product_id: str) -> dict:
|
|
2595
|
+
return fetch_product_from_db(product_id)
|
|
2596
|
+
|
|
2597
|
+
# 获取缓存实例
|
|
2598
|
+
cache = CacheRegistry.get("product_cache")
|
|
2599
|
+
|
|
2600
|
+
# 删除特定键
|
|
2601
|
+
cache.delete("get_product:12345")
|
|
2602
|
+
```
|
|
2603
|
+
|
|
2604
|
+
---
|
|
2605
|
+
|
|
2606
|
+
### 10. 常见问题
|
|
2607
|
+
|
|
2608
|
+
#### Q: 缓存未生效?
|
|
2609
|
+
|
|
2610
|
+
A: 检查以下几点:
|
|
2611
|
+
1. 函数参数是否可哈希(避免使用列表、字典等)
|
|
2612
|
+
2. TTL 是否设置为 0(永不过期)
|
|
2613
|
+
3. 缓存装饰器是否正确应用
|
|
2614
|
+
|
|
2615
|
+
#### Q: 如何调试缓存?
|
|
2616
|
+
|
|
2617
|
+
```python
|
|
2618
|
+
from pynomad.logsystem.manager import get_logger
|
|
2619
|
+
|
|
2620
|
+
logger = get_logger()
|
|
2621
|
+
logger.set_level("TRACE") # 开启详细日志
|
|
2622
|
+
|
|
2623
|
+
@memcached(ttl=60)
|
|
2624
|
+
def my_function():
|
|
2625
|
+
pass
|
|
2626
|
+
```
|
|
2627
|
+
|
|
2628
|
+
#### Q: DataFrame 缓存报错?
|
|
2629
|
+
|
|
2630
|
+
A: 确保函数声明了正确的返回类型注解:
|
|
2631
|
+
|
|
2632
|
+
```python
|
|
2633
|
+
# ✅ 正确
|
|
2634
|
+
@df_memcached(ttl=60)
|
|
2635
|
+
def load_data() -> DataFrame:
|
|
2636
|
+
return DataFrame()
|
|
2637
|
+
|
|
2638
|
+
# ❌ 错误(缺少类型注解)
|
|
2639
|
+
@df_memcached(ttl=60)
|
|
2640
|
+
def load_data():
|
|
2641
|
+
return DataFrame()
|
|
2642
|
+
```
|
|
2643
|
+
|
|
2644
|
+
---
|
|
2645
|
+
|
|
2646
|
+
### 11. 自定义缓存实践
|
|
2647
|
+
|
|
2648
|
+
#### 10.1 自定义键生成器
|
|
2649
|
+
|
|
2650
|
+
键生成器决定了缓存的存储键,可以根据业务需求自定义键的生成逻辑。
|
|
2651
|
+
|
|
2652
|
+
##### 基本用法
|
|
2653
|
+
|
|
2654
|
+
```python
|
|
2655
|
+
from pynomad import memcached
|
|
2656
|
+
from pynomad.cache.core.types import KeyGenerator
|
|
2657
|
+
|
|
2658
|
+
def custom_key_generator(func, instance, args, kwargs) -> str:
|
|
2659
|
+
"""自定义键生成逻辑"""
|
|
2660
|
+
# 只使用函数名和第一个参数作为键
|
|
2661
|
+
return f"{func.__name__}:{args[0]}"
|
|
2662
|
+
|
|
2663
|
+
@memcached(keygenerator=custom_key_generator)
|
|
2664
|
+
def get_data(user_id: int, timestamp: int) -> dict:
|
|
2665
|
+
# 键格式: get_data:1 (忽略 timestamp)
|
|
2666
|
+
return fetch_from_db(user_id)
|
|
2667
|
+
```
|
|
2668
|
+
|
|
2669
|
+
##### 场景1: 简化键(忽略某些参数)
|
|
2670
|
+
|
|
2671
|
+
```python
|
|
2672
|
+
from pynomad import memcached
|
|
2673
|
+
|
|
2674
|
+
def ignore_timestamp_key_generator(func, instance, args, kwargs) -> str:
|
|
2675
|
+
"""忽略时间戳参数"""
|
|
2676
|
+
# 只使用 user_id 作为键,忽略 timestamp
|
|
2677
|
+
user_id = kwargs.get("user_id") or args[0]
|
|
2678
|
+
return f"{func.__name__}:user_{user_id}"
|
|
2679
|
+
|
|
2680
|
+
@memcached(keygenerator=ignore_timestamp_key_generator)
|
|
2681
|
+
def get_user_data(user_id: int, timestamp: int) -> dict:
|
|
2682
|
+
# 即使 timestamp 变化,也使用相同的缓存
|
|
2683
|
+
return fetch_user_data(user_id)
|
|
2684
|
+
```
|
|
2685
|
+
|
|
2686
|
+
##### 场景2: 组合多个参数
|
|
2687
|
+
|
|
2688
|
+
```python
|
|
2689
|
+
def combined_key_generator(func, instance, args, kwargs) -> str:
|
|
2690
|
+
"""组合多个参数作为键"""
|
|
2691
|
+
user_id = kwargs.get("user_id") or args[0]
|
|
2692
|
+
region = kwargs.get("region") or args[1]
|
|
2693
|
+
# 使用 region 和 user_id 的组合
|
|
2694
|
+
return f"{func.__name__}:{region}:{user_id}"
|
|
2695
|
+
|
|
2696
|
+
@memcached(keygenerator=combined_key_generator)
|
|
2697
|
+
def get_user_data_by_region(user_id: int, region: str) -> dict:
|
|
2698
|
+
return fetch_user_data(user_id, region)
|
|
2699
|
+
```
|
|
2700
|
+
|
|
2701
|
+
#### 10.2 自定义 ValueLoader(DataFrame 缓存)
|
|
2702
|
+
|
|
2703
|
+
ValueLoader 是 DataFrame 缓存三阶段处理的核心,可以实现灵活的数据处理逻辑。
|
|
2704
|
+
|
|
2705
|
+
##### 场景1: 增量更新
|
|
2706
|
+
|
|
2707
|
+
```python
|
|
2708
|
+
from pynomad.cache.dataframe.types import DataFrameValueLoader
|
|
2709
|
+
from pynomad import Result, df_memcached
|
|
2710
|
+
from pandas import DataFrame
|
|
2711
|
+
|
|
2712
|
+
class IncrementalValueLoader(DataFrameValueLoader):
|
|
2713
|
+
"""增量更新 ValueLoader"""
|
|
2714
|
+
|
|
2715
|
+
def get(
|
|
2716
|
+
self,
|
|
2717
|
+
cached_df: DataFrame,
|
|
2718
|
+
extra_params: dict,
|
|
2719
|
+
args: tuple,
|
|
2720
|
+
kwargs: dict
|
|
2721
|
+
) -> Result[DataFrame | dict]:
|
|
2722
|
+
"""GET 阶段:参数变化时需要刷新"""
|
|
2723
|
+
last_args = tuple(extra_params.get("args", ()))
|
|
2724
|
+
if last_args == args:
|
|
2725
|
+
return Result.success(data=cached_df)
|
|
2726
|
+
return Result.client_error(
|
|
2727
|
+
exception=Exception("参数变化"),
|
|
2728
|
+
data={"args": args, "kwargs": kwargs}
|
|
2729
|
+
)
|
|
2730
|
+
|
|
2731
|
+
def put(
|
|
2732
|
+
self,
|
|
2733
|
+
cached_df: DataFrame,
|
|
2734
|
+
new_df: DataFrame,
|
|
2735
|
+
extra_params: dict,
|
|
2736
|
+
args: tuple,
|
|
2737
|
+
kwargs: dict
|
|
2738
|
+
) -> Result[DataFrame]:
|
|
2739
|
+
"""PUT 阶段:合并新旧数据(去重)"""
|
|
2740
|
+
if cached_df.empty:
|
|
2741
|
+
return Result.success(data=new_df)
|
|
2742
|
+
merged_df = pd.concat([cached_df, new_df]).drop_duplicates()
|
|
2743
|
+
return Result.success(data=merged_df)
|
|
2744
|
+
|
|
2745
|
+
def extract(
|
|
2746
|
+
self,
|
|
2747
|
+
merged_df: DataFrame,
|
|
2748
|
+
extra_params: dict,
|
|
2749
|
+
args: tuple,
|
|
2750
|
+
kwargs: dict
|
|
2751
|
+
) -> Result[DataFrame]:
|
|
2752
|
+
"""EXTRACT 阶段:返回合并数据"""
|
|
2753
|
+
return Result.success(data=merged_df)
|
|
2754
|
+
|
|
2755
|
+
@df_memcached(value_loader=IncrementalValueLoader(), ttl=600)
|
|
2756
|
+
def load_incremental_data(symbol: str) -> DataFrame:
|
|
2757
|
+
"""增量加载数据"""
|
|
2758
|
+
return fetch_data(symbol)
|
|
2759
|
+
```
|
|
2760
|
+
|
|
2761
|
+
##### 场景2: 数据过滤
|
|
2762
|
+
|
|
2763
|
+
```python
|
|
2764
|
+
from pynomad.cache.dataframe.types import DataFrameValueLoader
|
|
2765
|
+
|
|
2766
|
+
class FilteredValueLoader(DataFrameValueLoader):
|
|
2767
|
+
"""支持参数过滤的 ValueLoader"""
|
|
2768
|
+
|
|
2769
|
+
def get(
|
|
2770
|
+
self,
|
|
2771
|
+
cached_df: DataFrame,
|
|
2772
|
+
extra_params: dict,
|
|
2773
|
+
args: tuple,
|
|
2774
|
+
kwargs: dict
|
|
2775
|
+
) -> Result[DataFrame | dict]:
|
|
2776
|
+
"""GET 阶段:直接返回缓存"""
|
|
2777
|
+
if not cached_df.empty:
|
|
2778
|
+
return Result.success(data=cached_df)
|
|
2779
|
+
return Result.client_error(
|
|
2780
|
+
exception=Exception("缓存为空"),
|
|
2781
|
+
data={"args": args, "kwargs": kwargs}
|
|
2782
|
+
)
|
|
2783
|
+
|
|
2784
|
+
def put(
|
|
2785
|
+
self,
|
|
2786
|
+
cached_df: DataFrame,
|
|
2787
|
+
new_df: DataFrame,
|
|
2788
|
+
extra_params: dict,
|
|
2789
|
+
args: tuple,
|
|
2790
|
+
kwargs: dict
|
|
2791
|
+
) -> Result[DataFrame]:
|
|
2792
|
+
"""PUT 阶段:缓存新数据"""
|
|
2793
|
+
return Result.success(data=new_df)
|
|
2794
|
+
|
|
2795
|
+
def extract(
|
|
2796
|
+
self,
|
|
2797
|
+
merged_df: DataFrame,
|
|
2798
|
+
extra_params: dict,
|
|
2799
|
+
args: tuple,
|
|
2800
|
+
kwargs: dict
|
|
2801
|
+
) -> Result[DataFrame]:
|
|
2802
|
+
"""EXTRACT 阶段:根据参数过滤数据"""
|
|
2803
|
+
symbol = kwargs.get("symbol")
|
|
2804
|
+
start_date = kwargs.get("start_date")
|
|
2805
|
+
end_date = kwargs.get("end_date")
|
|
2806
|
+
|
|
2807
|
+
filtered_df = merged_df
|
|
2808
|
+
if symbol:
|
|
2809
|
+
filtered_df = filtered_df[filtered_df["symbol"] == symbol]
|
|
2810
|
+
if start_date:
|
|
2811
|
+
filtered_df = filtered_df[filtered_df["date"] >= start_date]
|
|
2812
|
+
if end_date:
|
|
2813
|
+
filtered_df = filtered_df[filtered_df["date"] <= end_date]
|
|
2814
|
+
|
|
2815
|
+
return Result.success(data=filtered_df)
|
|
2816
|
+
|
|
2817
|
+
@df_memcached(value_loader=FilteredValueLoader(), ttl=3600)
|
|
2818
|
+
def load_and_filter_data(symbol: str, start_date: str, end_date: str) -> DataFrame:
|
|
2819
|
+
"""加载并过滤数据"""
|
|
2820
|
+
return fetch_full_data()
|
|
2821
|
+
```
|
|
2822
|
+
|
|
2823
|
+
##### 场景3: 时间窗口缓存
|
|
2824
|
+
|
|
2825
|
+
```python
|
|
2826
|
+
class TimeWindowValueLoader(DataFrameValueLoader):
|
|
2827
|
+
"""时间窗口缓存 ValueLoader"""
|
|
2828
|
+
|
|
2829
|
+
def get(
|
|
2830
|
+
self,
|
|
2831
|
+
cached_df: DataFrame,
|
|
2832
|
+
extra_params: dict,
|
|
2833
|
+
args: tuple,
|
|
2834
|
+
kwargs: dict
|
|
2835
|
+
) -> Result[DataFrame | dict]:
|
|
2836
|
+
"""GET 阶段:检查缓存是否覆盖请求范围"""
|
|
2837
|
+
if cached_df.empty:
|
|
2838
|
+
return Result.client_error(
|
|
2839
|
+
exception=Exception("缓存为空"),
|
|
2840
|
+
data={"args": args, "kwargs": kwargs}
|
|
2841
|
+
)
|
|
2842
|
+
|
|
2843
|
+
start_date = kwargs.get("start_date")
|
|
2844
|
+
end_date = kwargs.get("end_date")
|
|
2845
|
+
|
|
2846
|
+
# 检查缓存是否覆盖请求范围
|
|
2847
|
+
cache_start = cached_df["date"].min()
|
|
2848
|
+
cache_end = cached_df["date"].max()
|
|
2849
|
+
|
|
2850
|
+
if start_date and cache_start > pd.Timestamp(start_date):
|
|
2851
|
+
return Result.client_error(
|
|
2852
|
+
exception=Exception(f"缓存起始时间 {cache_start} 晚于请求时间 {start_date}"),
|
|
2853
|
+
data={"args": args, "kwargs": kwargs}
|
|
2854
|
+
)
|
|
2855
|
+
|
|
2856
|
+
if end_date and cache_end < pd.Timestamp(end_date):
|
|
2857
|
+
return Result.client_error(
|
|
2858
|
+
exception=Exception(f"缓存结束时间 {cache_end} 早于请求时间 {end_date}"),
|
|
2859
|
+
data={"args": args, "kwargs": kwargs}
|
|
2860
|
+
)
|
|
2861
|
+
|
|
2862
|
+
return Result.success(data=cached_df)
|
|
2863
|
+
|
|
2864
|
+
def put(
|
|
2865
|
+
self,
|
|
2866
|
+
cached_df: DataFrame,
|
|
2867
|
+
new_df: DataFrame,
|
|
2868
|
+
extra_params: dict,
|
|
2869
|
+
args: tuple,
|
|
2870
|
+
kwargs: dict
|
|
2871
|
+
) -> Result[DataFrame]:
|
|
2872
|
+
"""PUT 阶段:扩展缓存的时间范围"""
|
|
2873
|
+
merged_df = pd.concat([cached_df, new_df]).drop_duplicates()
|
|
2874
|
+
return Result.success(data=merged_df)
|
|
2875
|
+
|
|
2876
|
+
def extract(
|
|
2877
|
+
self,
|
|
2878
|
+
merged_df: DataFrame,
|
|
2879
|
+
extra_params: dict,
|
|
2880
|
+
args: tuple,
|
|
2881
|
+
kwargs: dict
|
|
2882
|
+
) -> Result[DataFrame]:
|
|
2883
|
+
"""EXTRACT 阶段:返回请求时间范围内的数据"""
|
|
2884
|
+
start_date = kwargs.get("start_date")
|
|
2885
|
+
end_date = kwargs.get("end_date")
|
|
2886
|
+
|
|
2887
|
+
filtered_df = merged_df
|
|
2888
|
+
if start_date:
|
|
2889
|
+
filtered_df = filtered_df[filtered_df["date"] >= pd.Timestamp(start_date)]
|
|
2890
|
+
if end_date:
|
|
2891
|
+
filtered_df = filtered_df[filtered_df["date"] <= pd.Timestamp(end_date)]
|
|
2892
|
+
|
|
2893
|
+
return Result.success(data=filtered_df)
|
|
2894
|
+
|
|
2895
|
+
@df_memcached(value_loader=TimeWindowValueLoader(), ttl=7200)
|
|
2896
|
+
def load_time_series_data(symbol: str, start_date: str, end_date: str) -> DataFrame:
|
|
2897
|
+
"""加载时间序列数据"""
|
|
2898
|
+
return fetch_time_series(symbol, start_date, end_date)
|
|
2899
|
+
```
|
|
2900
|
+
|
|
2901
|
+
#### 10.3 自定义缓存最佳实践
|
|
2902
|
+
|
|
2903
|
+
##### 10.3.1 键生成器设计原则
|
|
2904
|
+
|
|
2905
|
+
键是缓存命中的关键,设计合理的键生成器可以大幅提升缓存效率。
|
|
2906
|
+
|
|
2907
|
+
**基本原则:**
|
|
2908
|
+
- 键应该唯一且确定
|
|
2909
|
+
- 避免包含易变的参数(如时间戳)
|
|
2910
|
+
- 保持键的简洁性
|
|
2911
|
+
|
|
2912
|
+
**实战示例:股票数据缓存的键生成**
|
|
2913
|
+
|
|
2914
|
+
```python
|
|
2915
|
+
from pynomad import df_memcached
|
|
2916
|
+
|
|
2917
|
+
def stock_key_generator(func, instance, args, kwargs):
|
|
2918
|
+
"""股票数据缓存键生成器"""
|
|
2919
|
+
# 获取参数
|
|
2920
|
+
code = kwargs.get("code") or args[0]
|
|
2921
|
+
start_date = kwargs.get("start_date") or args[1]
|
|
2922
|
+
end_date = kwargs.get("end_date") or args[2]
|
|
2923
|
+
fq = kwargs.get("fq", "qfq") # 复权类型,默认前复权
|
|
2924
|
+
|
|
2925
|
+
# ✅ 应该参与键生成的参数
|
|
2926
|
+
# - code: 不同股票的数据应该分别缓存
|
|
2927
|
+
# - fq: 复权类型不同,数据也不同
|
|
2928
|
+
#
|
|
2929
|
+
# ❌ 不应该参与键生成的参数
|
|
2930
|
+
# - start_date + end_date: 时间范围在 EXTRACT 阶段过滤
|
|
2931
|
+
# 这样不同时间范围的请求可以复用同一份缓存
|
|
2932
|
+
# 比如先缓存了 2024-01-01 到 2024-12-31 的数据
|
|
2933
|
+
# 之后请求 2024-01-01 到 2024-03-31 就能直接命中缓存
|
|
2934
|
+
return f"{func.__name__}:{code}:{fq}"
|
|
2935
|
+
|
|
2936
|
+
@df_memcached(keygenerator=stock_key_generator, ttl=3600)
|
|
2937
|
+
def get_stock_data(code: str, start_date: str, end_date: str, fq: str = "qfq") -> DataFrame:
|
|
2938
|
+
"""获取股票数据"""
|
|
2939
|
+
return fetch_stock_from_api(code, start_date, end_date, fq)
|
|
2940
|
+
```
|
|
2941
|
+
|
|
2942
|
+
**参数是否参与键生成的判断标准:**
|
|
2943
|
+
|
|
2944
|
+
| 参数类型 | 应该参与键生成? | 示例 | 原因 |
|
|
2945
|
+
|---------|----------------|------|------|
|
|
2946
|
+
| 数据标识 | ✅ 是 | `code`, `symbol`, `user_id` | 不同标识的数据应该分开缓存 |
|
|
2947
|
+
| 时间范围 | ❌ 否 | `start_date`, `end_date` | 在 EXTRACT 阶段过滤,可复用缓存 |
|
|
2948
|
+
| 业务参数 | ✅ 是 | `fq`, `period`, `interval` | 影响数据内容的参数 |
|
|
2949
|
+
| 查询参数 | ❌ 否 | `fields`, `columns` | 可以在 EXTRACT 阶段过滤 |
|
|
2950
|
+
| 分页参数 | ❌ 否 | `page`, `page_size` | 可以在 EXTRACT 阶段切片 |
|
|
2951
|
+
| 纯控制参数 | ❌ 否 | `format`, `verbose` | 不影响数据内容 |
|
|
2952
|
+
| 时间戳 | ❌ 否 | `timestamp`, `ts` | 每次都变,会导致缓存失效 |
|
|
2953
|
+
|
|
2954
|
+
**反例分析:**
|
|
2955
|
+
|
|
2956
|
+
```python
|
|
2957
|
+
# ❌ 错误示例:时间戳参与键生成
|
|
2958
|
+
@memcached(ttl=60)
|
|
2959
|
+
def get_realtime_price(code: str, timestamp: int) -> dict:
|
|
2960
|
+
# timestamp 每次都不同,导致缓存永远不命中
|
|
2961
|
+
return fetch_price(code, time.time())
|
|
2962
|
+
|
|
2963
|
+
# ✅ 正确示例:移除时间戳
|
|
2964
|
+
@memcached(ttl=60)
|
|
2965
|
+
def get_realtime_price(code: str) -> dict:
|
|
2966
|
+
# 使用 TTL 控制缓存时效,而不是参数
|
|
2967
|
+
return fetch_price(code)
|
|
2968
|
+
|
|
2969
|
+
# ❌ 错误示例:时间范围参与键生成
|
|
2970
|
+
@df_memcached(ttl=3600)
|
|
2971
|
+
def get_stock_data(code: str, start_date: str, end_date: str, fq: str = "qfq") -> DataFrame:
|
|
2972
|
+
# start_date 和 end_date 参与键生成,每次时间参数变化都会从 API 拉取
|
|
2973
|
+
# 三阶段缓存的意义丧失了
|
|
2974
|
+
return fetch_stock_from_api(code, start_date, end_date, fq)
|
|
2975
|
+
|
|
2976
|
+
# ✅ 正确示例:时间范围不参与键生成,在 EXTRACT 阶段过滤
|
|
2977
|
+
@df_memcached(value_loader=StockDataValueLoader(), ttl=3600)
|
|
2978
|
+
def get_stock_data(code: str, start_date: str, end_date: str, fq: str = "qfq") -> DataFrame:
|
|
2979
|
+
# 键只包含 code 和 fq,时间范围在 EXTRACT 阶段过滤
|
|
2980
|
+
# 这样不同时间范围的请求可以复用同一份缓存
|
|
2981
|
+
return fetch_stock_from_api(code, start_date, end_date, fq)
|
|
2982
|
+
|
|
2983
|
+
# ❌ 错误示例:fields 参数参与键生成
|
|
2984
|
+
@df_memcached(ttl=3600)
|
|
2985
|
+
def get_stock_data(code: str, fields: list[str]) -> DataFrame:
|
|
2986
|
+
# fields=["open,close"] 和 fields=["open,close,volume"] 会被视为不同缓存
|
|
2987
|
+
return fetch_all_fields(code)
|
|
2988
|
+
|
|
2989
|
+
# ✅ 正确示例:在 EXTRACT 阶段过滤
|
|
2990
|
+
@df_memcached(ttl=3600)
|
|
2991
|
+
def get_stock_data(code: str, fields: list[str] = None) -> DataFrame:
|
|
2992
|
+
# 始终缓存完整数据,在返回前过滤字段
|
|
2993
|
+
df = fetch_all_fields(code)
|
|
2994
|
+
if fields:
|
|
2995
|
+
return df[fields]
|
|
2996
|
+
return df
|
|
2997
|
+
```
|
|
2998
|
+
|
|
2999
|
+
##### 10.3.2 ValueLoader 各阶段返回值详解
|
|
3000
|
+
|
|
3001
|
+
ValueLoader 的三阶段处理是 DataFrame 缓存的核心,每个阶段的返回值决定了后续的执行流程。
|
|
3002
|
+
|
|
3003
|
+
**阶段返回值总览:**
|
|
3004
|
+
|
|
3005
|
+
```
|
|
3006
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
3007
|
+
│ ValueLoader 执行流程 │
|
|
3008
|
+
├─────────────────────────────────────────────────────────────┤
|
|
3009
|
+
│ │
|
|
3010
|
+
│ GET 阶段 │
|
|
3011
|
+
│ ├─ Result.success(cached_df) → 跳过 PUT 和 EXTRACT │
|
|
3012
|
+
│ │ → 直接返回 cached_df │
|
|
3013
|
+
│ ├─ Result.client_error(...) → 执行函数(获取新数据) │
|
|
3014
|
+
│ │ └─ error.data 包含刷新参数 → 进入 PUT 阶段 │
|
|
3015
|
+
│ └─ Result.server_error(...) → 直接抛出异常 │
|
|
3016
|
+
│ │
|
|
3017
|
+
│ PUT 阶段(仅在 GET 返回 error 时执行) │
|
|
3018
|
+
│ ├─ Result.success(merged_df) → 进入 EXTRACT 阶段 │
|
|
3019
|
+
│ └─ Result.error(...) → 直接抛出异常 │
|
|
3020
|
+
│ │
|
|
3021
|
+
│ EXTRACT 阶段(仅在 PUT 返回 success 时执行) │
|
|
3022
|
+
│ ├─ Result.success(extracted_df) → 最终返回数据 │
|
|
3023
|
+
│ └─ Result.error(...) → 直接抛出异常 │
|
|
3024
|
+
│ │
|
|
3025
|
+
└─────────────────────────────────────────────────────────────┘
|
|
3026
|
+
```
|
|
3027
|
+
|
|
3028
|
+
**GET 阶段详解:**
|
|
3029
|
+
|
|
3030
|
+
```python
|
|
3031
|
+
def get(
|
|
3032
|
+
self,
|
|
3033
|
+
cached_df: DataFrame,
|
|
3034
|
+
extra_params: dict,
|
|
3035
|
+
args: tuple,
|
|
3036
|
+
kwargs: dict
|
|
3037
|
+
) -> Result[DataFrame | dict]:
|
|
3038
|
+
"""
|
|
3039
|
+
GET 阶段:判断缓存是否可用
|
|
3040
|
+
|
|
3041
|
+
参数说明:
|
|
3042
|
+
- cached_df: 缓存中的 DataFrame(可能为空)
|
|
3043
|
+
- extra_params: 缓存时保存的额外信息(如参数、时间戳)
|
|
3044
|
+
- args: 当前调用的位置参数
|
|
3045
|
+
- kwargs: 当前调用的关键字参数
|
|
3046
|
+
|
|
3047
|
+
返回值:
|
|
3048
|
+
1. Result.success(cached_df)
|
|
3049
|
+
- 表示缓存可用,直接使用缓存数据
|
|
3050
|
+
- 跳过 PUT 和 EXTRACT 阶段
|
|
3051
|
+
- 返回 cached_df
|
|
3052
|
+
|
|
3053
|
+
2. Result.client_error(exception, data={"args": args, "kwargs": kwargs})
|
|
3054
|
+
- 表示缓存不可用,需要刷新
|
|
3055
|
+
- error.data 中的参数会传递给 PUT 阶段
|
|
3056
|
+
- 触发函数执行获取新数据
|
|
3057
|
+
|
|
3058
|
+
3. Result.error(exception)
|
|
3059
|
+
- 表示发生错误,直接抛出异常
|
|
3060
|
+
"""
|
|
3061
|
+
```
|
|
3062
|
+
|
|
3063
|
+
**GET 阶段返回值示例:**
|
|
3064
|
+
|
|
3065
|
+
```python
|
|
3066
|
+
# 场景1: 参数相同,直接使用缓存
|
|
3067
|
+
def get(self, cached_df, extra_params, args, kwargs):
|
|
3068
|
+
last_args = tuple(extra_params.get("args", ()))
|
|
3069
|
+
if last_args == args:
|
|
3070
|
+
return Result.success(data=cached_df) # ✅ 命中缓存
|
|
3071
|
+
|
|
3072
|
+
# 场景2: 参数不同,需要刷新
|
|
3073
|
+
def get(self, cached_df, extra_params, args, kwargs):
|
|
3074
|
+
if args != self.last_args:
|
|
3075
|
+
return Result.client_error(
|
|
3076
|
+
exception=Exception("参数变化"),
|
|
3077
|
+
data={"args": args, "kwargs": kwargs} # ✅ 触发刷新
|
|
3078
|
+
)
|
|
3079
|
+
|
|
3080
|
+
# 场景3: 缓存为空,需要刷新
|
|
3081
|
+
def get(self, cached_df, extra_params, args, kwargs):
|
|
3082
|
+
if cached_df.empty:
|
|
3083
|
+
return Result.client_error(
|
|
3084
|
+
exception=Exception("缓存为空"),
|
|
3085
|
+
data={"args": args, "kwargs": kwargs} # ✅ 触发刷新
|
|
3086
|
+
)
|
|
3087
|
+
|
|
3088
|
+
# 场景4: 缓存过期,需要刷新
|
|
3089
|
+
def get(self, cached_df, extra_params, args, kwargs):
|
|
3090
|
+
cache_time = extra_params.get("timestamp", 0)
|
|
3091
|
+
if time.time() - cache_time > self.cache_interval:
|
|
3092
|
+
return Result.client_error(
|
|
3093
|
+
exception=Exception("缓存过期"),
|
|
3094
|
+
data={"args": args, "kwargs": kwargs} # ✅ 触发刷新
|
|
3095
|
+
)
|
|
3096
|
+
|
|
3097
|
+
# 场景5: 缓存不满足条件,需要刷新
|
|
3098
|
+
def get(self, cached_df, extra_params, args, kwargs):
|
|
3099
|
+
required_symbol = args[0]
|
|
3100
|
+
if cached_df.empty or cached_df["symbol"].iloc[0] != required_symbol:
|
|
3101
|
+
return Result.client_error(
|
|
3102
|
+
exception=Exception("缓存不匹配"),
|
|
3103
|
+
data={"args": args, "kwargs": kwargs} # ✅ 触发刷新
|
|
3104
|
+
)
|
|
3105
|
+
```
|
|
3106
|
+
|
|
3107
|
+
**PUT 阶段详解:**
|
|
3108
|
+
|
|
3109
|
+
```python
|
|
3110
|
+
def put(
|
|
3111
|
+
self,
|
|
3112
|
+
cached_df: DataFrame,
|
|
3113
|
+
new_df: DataFrame,
|
|
3114
|
+
extra_params: dict,
|
|
3115
|
+
args: tuple,
|
|
3116
|
+
kwargs: dict
|
|
3117
|
+
) -> Result[DataFrame]:
|
|
3118
|
+
"""
|
|
3119
|
+
PUT 阶段:合并新旧数据
|
|
3120
|
+
|
|
3121
|
+
参数说明:
|
|
3122
|
+
- cached_df: 缓存中的旧 DataFrame(可能为空)
|
|
3123
|
+
- new_df: 函数执行返回的新 DataFrame
|
|
3124
|
+
- extra_params: GET 阶段传递的参数
|
|
3125
|
+
- args: 当前调用的位置参数
|
|
3126
|
+
- kwargs: 当前调用的关键字参数
|
|
3127
|
+
|
|
3128
|
+
返回值:
|
|
3129
|
+
1. Result.success(merged_df)
|
|
3130
|
+
- 合并后的 DataFrame
|
|
3131
|
+
- 传递给 EXTRACT 阶段
|
|
3132
|
+
- merged_df 会被缓存
|
|
3133
|
+
|
|
3134
|
+
2. Result.error(exception)
|
|
3135
|
+
- 发生错误,直接抛出异常
|
|
3136
|
+
"""
|
|
3137
|
+
```
|
|
3138
|
+
|
|
3139
|
+
**PUT 阶段返回值示例:**
|
|
3140
|
+
|
|
3141
|
+
```python
|
|
3142
|
+
# 场景1: 缓存为空,直接返回新数据
|
|
3143
|
+
def put(self, cached_df, new_df, extra_params, args, kwargs):
|
|
3144
|
+
if cached_df.empty:
|
|
3145
|
+
return Result.success(data=new_df) # ✅ 使用新数据
|
|
3146
|
+
|
|
3147
|
+
# 场景2: 增量更新(追加新数据)
|
|
3148
|
+
def put(self, cached_df, new_df, extra_params, args, kwargs):
|
|
3149
|
+
merged_df = pd.concat([cached_df, new_df]).drop_duplicates()
|
|
3150
|
+
return Result.success(data=merged_df) # ✅ 合并数据
|
|
3151
|
+
|
|
3152
|
+
# 场景3: 替换旧数据
|
|
3153
|
+
def put(self, cached_df, new_df, extra_params, args, kwargs):
|
|
3154
|
+
# 直接使用新数据,不合并
|
|
3155
|
+
return Result.success(data=new_df) # ✅ 替换数据
|
|
3156
|
+
|
|
3157
|
+
# 场景4: 智能合并(按索引合并)
|
|
3158
|
+
def put(self, cached_df, new_df, extra_params, args, kwargs):
|
|
3159
|
+
# 假设按 'date' 列去重
|
|
3160
|
+
merged_df = pd.concat([cached_df, new_df])
|
|
3161
|
+
merged_df = merged_df.drop_duplicates(subset=["date"], keep="last")
|
|
3162
|
+
return Result.success(data=merged_df) # ✅ 智能合并
|
|
3163
|
+
|
|
3164
|
+
# 场景5: 数据聚合(统计场景)
|
|
3165
|
+
def put(self, cached_df, new_df, extra_params, args, kwargs):
|
|
3166
|
+
# 聚合相同键的数据
|
|
3167
|
+
merged_df = cached_df.merge(
|
|
3168
|
+
new_df,
|
|
3169
|
+
on=["date"],
|
|
3170
|
+
how="outer",
|
|
3171
|
+
suffixes=("_old", "_new")
|
|
3172
|
+
)
|
|
3173
|
+
return Result.success(data=merged_df) # ✅ 聚合数据
|
|
3174
|
+
```
|
|
3175
|
+
|
|
3176
|
+
**EXTRACT 阶段详解:**
|
|
3177
|
+
|
|
3178
|
+
```python
|
|
3179
|
+
def extract(
|
|
3180
|
+
self,
|
|
3181
|
+
merged_df: DataFrame,
|
|
3182
|
+
extra_params: dict,
|
|
3183
|
+
args: tuple,
|
|
3184
|
+
kwargs: dict
|
|
3185
|
+
) -> Result[DataFrame]:
|
|
3186
|
+
"""
|
|
3187
|
+
EXTRACT 阶段:提取返回数据
|
|
3188
|
+
|
|
3189
|
+
参数说明:
|
|
3190
|
+
- merged_df: PUT 阶段合并后的 DataFrame
|
|
3191
|
+
- extra_params: GET 阶段传递的参数
|
|
3192
|
+
- args: 当前调用的位置参数
|
|
3193
|
+
- kwargs: 当前调用的关键字参数
|
|
3194
|
+
|
|
3195
|
+
返回值:
|
|
3196
|
+
1. Result.success(extracted_df)
|
|
3197
|
+
- 最终返回给用户的 DataFrame
|
|
3198
|
+
- 这个结果不会被缓存
|
|
3199
|
+
|
|
3200
|
+
2. Result.error(exception)
|
|
3201
|
+
- 发生错误,直接抛出异常
|
|
3202
|
+
"""
|
|
3203
|
+
```
|
|
3204
|
+
|
|
3205
|
+
**EXTRACT 阶段返回值示例:**
|
|
3206
|
+
|
|
3207
|
+
```python
|
|
3208
|
+
# 场景1: 直接返回全部数据
|
|
3209
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
3210
|
+
return Result.success(data=merged_df) # ✅ 返回全部
|
|
3211
|
+
|
|
3212
|
+
# 场景2: 按字段过滤
|
|
3213
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
3214
|
+
fields = kwargs.get("fields")
|
|
3215
|
+
if fields:
|
|
3216
|
+
return Result.success(data=merged_df[fields]) # ✅ 字段过滤
|
|
3217
|
+
return Result.success(data=merged_df)
|
|
3218
|
+
|
|
3219
|
+
# 场景3: 按时间范围过滤
|
|
3220
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
3221
|
+
start_date = kwargs.get("start_date")
|
|
3222
|
+
end_date = kwargs.get("end_date")
|
|
3223
|
+
|
|
3224
|
+
filtered_df = merged_df
|
|
3225
|
+
if start_date:
|
|
3226
|
+
filtered_df = filtered_df[filtered_df["date"] >= start_date]
|
|
3227
|
+
if end_date:
|
|
3228
|
+
filtered_df = filtered_df[filtered_df["date"] <= end_date]
|
|
3229
|
+
|
|
3230
|
+
return Result.success(data=filtered_df) # ✅ 时间过滤
|
|
3231
|
+
|
|
3232
|
+
# 场景4: 数据切片(分页)
|
|
3233
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
3234
|
+
page = kwargs.get("page", 1)
|
|
3235
|
+
page_size = kwargs.get("page_size", 100)
|
|
3236
|
+
|
|
3237
|
+
start = (page - 1) * page_size
|
|
3238
|
+
end = start + page_size
|
|
3239
|
+
|
|
3240
|
+
return Result.success(data=merged_df.iloc[start:end]) # ✅ 分页切片
|
|
3241
|
+
|
|
3242
|
+
# 场景5: 数据转换
|
|
3243
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
3244
|
+
# 计算衍生字段
|
|
3245
|
+
df = merged_df.copy()
|
|
3246
|
+
df["pct_change"] = df["close"].pct_change()
|
|
3247
|
+
return Result.success(data=df) # ✅ 数据转换
|
|
3248
|
+
|
|
3249
|
+
# 场景6: 数据聚合
|
|
3250
|
+
def extract(self, merged_df, extra_params, args, kwargs):
|
|
3251
|
+
# 按字段聚合
|
|
3252
|
+
group_by = kwargs.get("group_by")
|
|
3253
|
+
if group_by:
|
|
3254
|
+
aggregated_df = merged_df.groupby(group_by).sum()
|
|
3255
|
+
return Result.success(data=aggregated_df) # ✅ 数据聚合
|
|
3256
|
+
return Result.success(data=merged_df)
|
|
3257
|
+
```
|
|
3258
|
+
|
|
3259
|
+
##### 10.3.3 完整示例:股票数据缓存
|
|
3260
|
+
|
|
3261
|
+
```python
|
|
3262
|
+
from pynomad.cache.dataframe.types import DataFrameValueLoader
|
|
3263
|
+
from pynomad import Result, df_memcached
|
|
3264
|
+
from pandas import DataFrame
|
|
3265
|
+
import pandas as pd
|
|
3266
|
+
|
|
3267
|
+
class StockDataValueLoader(DataFrameValueLoader):
|
|
3268
|
+
"""股票数据 ValueLoader"""
|
|
3269
|
+
|
|
3270
|
+
def get(
|
|
3271
|
+
self,
|
|
3272
|
+
cached_df: DataFrame,
|
|
3273
|
+
extra_params: dict,
|
|
3274
|
+
args: tuple,
|
|
3275
|
+
kwargs: dict
|
|
3276
|
+
) -> Result[DataFrame | dict]:
|
|
3277
|
+
"""
|
|
3278
|
+
GET 阶段:
|
|
3279
|
+
- 检查缓存是否覆盖请求的时间范围
|
|
3280
|
+
- 如果覆盖,返回缓存
|
|
3281
|
+
- 如果不覆盖,返回需要刷新的参数
|
|
3282
|
+
"""
|
|
3283
|
+
# 缓存为空,需要刷新
|
|
3284
|
+
if cached_df.empty:
|
|
3285
|
+
return Result.client_error(
|
|
3286
|
+
exception=Exception("缓存为空"),
|
|
3287
|
+
data={"args": args, "kwargs": kwargs}
|
|
3288
|
+
)
|
|
3289
|
+
|
|
3290
|
+
# 获取请求的时间范围
|
|
3291
|
+
start_date = pd.Timestamp(kwargs.get("start_date") or args[1])
|
|
3292
|
+
end_date = pd.Timestamp(kwargs.get("end_date") or args[2])
|
|
3293
|
+
|
|
3294
|
+
# 检查缓存是否覆盖请求范围
|
|
3295
|
+
cache_start = cached_df["date"].min()
|
|
3296
|
+
cache_end = cached_df["date"].max()
|
|
3297
|
+
|
|
3298
|
+
# 缓存起始时间晚于请求时间,需要刷新
|
|
3299
|
+
if cache_start > start_date:
|
|
3300
|
+
return Result.client_error(
|
|
3301
|
+
exception=Exception(f"缓存从 {cache_start} 开始,但需要从 {start_date} 开始"),
|
|
3302
|
+
data={"args": args, "kwargs": kwargs}
|
|
3303
|
+
)
|
|
3304
|
+
|
|
3305
|
+
# 缓存结束时间早于请求时间,需要刷新
|
|
3306
|
+
if cache_end < end_date:
|
|
3307
|
+
return Result.client_error(
|
|
3308
|
+
exception=Exception(f"缓存到 {cache_end} 结束,但需要到 {end_date} 结束"),
|
|
3309
|
+
data={"args": args, "kwargs": kwargs}
|
|
3310
|
+
)
|
|
3311
|
+
|
|
3312
|
+
# 缓存覆盖请求范围,直接返回
|
|
3313
|
+
return Result.success(data=cached_df)
|
|
3314
|
+
|
|
3315
|
+
def put(
|
|
3316
|
+
self,
|
|
3317
|
+
cached_df: DataFrame,
|
|
3318
|
+
new_df: DataFrame,
|
|
3319
|
+
extra_params: dict,
|
|
3320
|
+
args: tuple,
|
|
3321
|
+
kwargs: dict
|
|
3322
|
+
) -> Result[DataFrame]:
|
|
3323
|
+
"""
|
|
3324
|
+
PUT 阶段:
|
|
3325
|
+
- 合并旧数据和新数据
|
|
3326
|
+
- 按日期去重,保留最新的数据
|
|
3327
|
+
"""
|
|
3328
|
+
# 缓存为空,直接返回新数据
|
|
3329
|
+
if cached_df.empty:
|
|
3330
|
+
return Result.success(data=new_df)
|
|
3331
|
+
|
|
3332
|
+
# 合并数据并按日期去重
|
|
3333
|
+
merged_df = pd.concat([cached_df, new_df])
|
|
3334
|
+
merged_df = merged_df.drop_duplicates(subset=["date"], keep="last")
|
|
3335
|
+
|
|
3336
|
+
# 按日期排序
|
|
3337
|
+
merged_df = merged_df.sort_values("date").reset_index(drop=True)
|
|
3338
|
+
|
|
3339
|
+
return Result.success(data=merged_df)
|
|
3340
|
+
|
|
3341
|
+
def extract(
|
|
3342
|
+
self,
|
|
3343
|
+
merged_df: DataFrame,
|
|
3344
|
+
extra_params: dict,
|
|
3345
|
+
args: tuple,
|
|
3346
|
+
kwargs: dict
|
|
3347
|
+
) -> Result[DataFrame]:
|
|
3348
|
+
"""
|
|
3349
|
+
EXTRACT 阶段:
|
|
3350
|
+
- 根据请求的时间范围过滤数据
|
|
3351
|
+
- 返回指定范围的数据
|
|
3352
|
+
"""
|
|
3353
|
+
start_date = pd.Timestamp(kwargs.get("start_date") or args[1])
|
|
3354
|
+
end_date = pd.Timestamp(kwargs.get("end_date") or args[2])
|
|
3355
|
+
|
|
3356
|
+
# 过滤时间范围
|
|
3357
|
+
filtered_df = merged_df[
|
|
3358
|
+
(merged_df["date"] >= start_date) &
|
|
3359
|
+
(merged_df["date"] <= end_date)
|
|
3360
|
+
].copy()
|
|
3361
|
+
|
|
3362
|
+
return Result.success(data=filtered_df)
|
|
3363
|
+
|
|
3364
|
+
# 使用自定义 ValueLoader
|
|
3365
|
+
@df_memcached(value_loader=StockDataValueLoader(), ttl=86400)
|
|
3366
|
+
def get_stock_data(code: str, start_date: str, end_date: str, fq: str = "qfq") -> DataFrame:
|
|
3367
|
+
"""获取股票数据"""
|
|
3368
|
+
return fetch_stock_from_api(code, start_date, end_date, fq)
|
|
3369
|
+
|
|
3370
|
+
# 使用示例
|
|
3371
|
+
# 第一次请求 2024-01-01 到 2024-01-10,从 API 获取
|
|
3372
|
+
df1 = get_stock_data("AAPL", "2024-01-01", "2024-01-10")
|
|
3373
|
+
|
|
3374
|
+
# 第二次请求相同范围,从缓存返回(不调用 API)
|
|
3375
|
+
df2 = get_stock_data("AAPL", "2024-01-01", "2024-01-10")
|
|
3376
|
+
|
|
3377
|
+
# 第三次请求更大范围 2024-01-01 到 2024-01-20,
|
|
3378
|
+
# GET 判断缓存不完整,PUT 合并新旧数据,EXTRACT 返回完整范围
|
|
3379
|
+
df3 = get_stock_data("AAPL", "2024-01-01", "2024-01-20")
|
|
3380
|
+
```
|
|
3381
|
+
|
|
3382
|
+
##### 10.3.4 自定义缓存注意事项
|
|
3383
|
+
|
|
3384
|
+
1. **确保线程安全**
|
|
3385
|
+
- ValueLoader 的方法会被多线程调用
|
|
3386
|
+
- 避免在 ValueLoader 中使用共享的可变状态
|
|
3387
|
+
|
|
3388
|
+
2. **正确处理 TTL 和淘汰策略**
|
|
3389
|
+
- ValueLoader 主要控制数据处理逻辑
|
|
3390
|
+
- TTL 和淘汰策略由缓存系统自动管理
|
|
3391
|
+
|
|
3392
|
+
3. **考虑缓存穿透和雪崩问题**
|
|
3393
|
+
- GET 阶段检查缓存有效性
|
|
3394
|
+
- 合理设置缓存失效条件
|
|
3395
|
+
|
|
3396
|
+
4. **性能优化**
|
|
3397
|
+
- 避免在 ValueLoader 中进行耗时操作
|
|
3398
|
+
- 使用向量化操作(如 pandas 的向量化方法)
|
|
3399
|
+
|
|
3400
|
+
---
|
|
3401
|
+
|
|
3402
|
+
### 11.4 性能优化建议
|
|
3403
|
+
|
|
3404
|
+
1. **合理设置缓存大小**:根据内存和磁盘空间调整 `maxsize`
|
|
3405
|
+
2. **使用连接池**:Redis 和 SQL 缓存配置连接池参数
|
|
3406
|
+
3. **异步操作**:对于高并发场景,考虑使用异步装饰器
|
|
3407
|
+
4. **批量操作**:减少缓存读写次数,尽量批量处理
|
|
3408
|
+
5. **监控缓存命中率**:定期检查缓存效果,调整 TTL 和大小
|
|
3409
|
+
|
|
3410
|
+
---
|
|
3411
|
+
|
|
3412
|
+
## 项目结构
|
|
3413
|
+
|
|
3414
|
+
```
|
|
3415
|
+
pynomad/
|
|
3416
|
+
├── cache/ # 缓存系统
|
|
3417
|
+
│ ├── core/ # 核心接口
|
|
3418
|
+
│ ├── decorator/ # 缓存装饰器
|
|
3419
|
+
│ ├── dataframe/ # DataFrame 缓存
|
|
3420
|
+
│ └── impl/ # 缓存实现
|
|
3421
|
+
├── config/ # 配置系统
|
|
3422
|
+
│ └── auto/ # 自动配置
|
|
3423
|
+
├── logsystem/ # 日志系统
|
|
3424
|
+
├── naming/ # 命名系统
|
|
3425
|
+
├── result/ # 结果封装
|
|
3426
|
+
└── common/ # 通用工具
|
|
3427
|
+
```
|
|
3428
|
+
|
|
3429
|
+
## 许可证
|
|
3430
|
+
|
|
3431
|
+
MIT License
|
|
3432
|
+
|
|
3433
|
+
## 贡献
|
|
3434
|
+
|
|
3435
|
+
欢迎 Pull Request!
|
|
3436
|
+
|
|
3437
|
+
> ⚠️ **不欢迎 Issue**
|
|
3438
|
+
>
|
|
3439
|
+
> 如有 bug 请自行下载源码进行更改,因为作者正在工厂打螺丝进行着 777 工作制,没有时间更改代码
|
|
3440
|
+
|