rquote 0.3.9__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {rquote-0.3.9/rquote.egg-info → rquote-0.4.0}/PKG-INFO +57 -5
  2. rquote-0.3.9/PKG-INFO → rquote-0.4.0/README.md +53 -15
  3. {rquote-0.3.9 → rquote-0.4.0}/pyproject.toml +5 -2
  4. {rquote-0.3.9 → rquote-0.4.0}/rquote/__init__.py +6 -0
  5. rquote-0.4.0/rquote/cache/__init__.py +14 -0
  6. rquote-0.4.0/rquote/cache/persistent.py +421 -0
  7. rquote-0.4.0/rquote/markets/base.py +196 -0
  8. {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/cn_stock.py +9 -12
  9. {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/future.py +8 -9
  10. {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/hk_stock.py +4 -13
  11. {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/us_stock.py +8 -9
  12. rquote-0.3.9/README.md → rquote-0.4.0/rquote.egg-info/PKG-INFO +67 -3
  13. {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/SOURCES.txt +1 -0
  14. {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/requires.txt +3 -0
  15. rquote-0.4.0/tests/test_cache.py +357 -0
  16. rquote-0.3.9/rquote/cache/__init__.py +0 -9
  17. rquote-0.3.9/rquote/markets/base.py +0 -49
  18. rquote-0.3.9/tests/test_cache.py +0 -71
  19. {rquote-0.3.9 → rquote-0.4.0}/rquote/api/__init__.py +0 -0
  20. {rquote-0.3.9 → rquote-0.4.0}/rquote/api/lists.py +0 -0
  21. {rquote-0.3.9 → rquote-0.4.0}/rquote/api/price.py +0 -0
  22. {rquote-0.3.9 → rquote-0.4.0}/rquote/api/stock_info.py +0 -0
  23. {rquote-0.3.9 → rquote-0.4.0}/rquote/api/tick.py +0 -0
  24. {rquote-0.3.9 → rquote-0.4.0}/rquote/cache/base.py +0 -0
  25. {rquote-0.3.9 → rquote-0.4.0}/rquote/cache/memory.py +0 -0
  26. {rquote-0.3.9 → rquote-0.4.0}/rquote/config.py +0 -0
  27. {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/__init__.py +0 -0
  28. {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/base.py +0 -0
  29. {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/sina.py +0 -0
  30. {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/tencent.py +0 -0
  31. {rquote-0.3.9 → rquote-0.4.0}/rquote/exceptions.py +0 -0
  32. {rquote-0.3.9 → rquote-0.4.0}/rquote/factors/__init__.py +0 -0
  33. {rquote-0.3.9 → rquote-0.4.0}/rquote/factors/technical.py +0 -0
  34. {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/__init__.py +0 -0
  35. {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/factory.py +0 -0
  36. {rquote-0.3.9 → rquote-0.4.0}/rquote/parsers/__init__.py +0 -0
  37. {rquote-0.3.9 → rquote-0.4.0}/rquote/parsers/kline.py +0 -0
  38. {rquote-0.3.9 → rquote-0.4.0}/rquote/plots.py +0 -0
  39. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/__init__.py +0 -0
  40. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/date.py +0 -0
  41. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/helpers.py +0 -0
  42. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/http.py +0 -0
  43. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/logging.py +0 -0
  44. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/web.py +0 -0
  45. {rquote-0.3.9 → rquote-0.4.0}/rquote/utils.py +0 -0
  46. {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/dependency_links.txt +0 -0
  47. {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/top_level.txt +0 -0
  48. {rquote-0.3.9 → rquote-0.4.0}/setup.cfg +0 -0
  49. {rquote-0.3.9 → rquote-0.4.0}/tests/test_api.py +0 -0
  50. {rquote-0.3.9 → rquote-0.4.0}/tests/test_config.py +0 -0
  51. {rquote-0.3.9 → rquote-0.4.0}/tests/test_exceptions.py +0 -0
  52. {rquote-0.3.9 → rquote-0.4.0}/tests/test_utils.py +0 -0
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rquote
3
- Version: 0.3.9
3
+ Version: 0.4.0
4
4
  Summary: Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch
5
- Requires-Python: >=3.6.1
5
+ Requires-Python: >=3.9.0
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: build>=0.9.0
8
8
  Requires-Dist: httpx>=0.20.0
9
9
  Requires-Dist: pandas>=1.0.0
10
10
  Requires-Dist: setuptools>=42
11
11
  Requires-Dist: twine>=3.8.0
12
+ Provides-Extra: persistent
13
+ Requires-Dist: duckdb>=0.9.0; extra == "persistent"
12
14
 
13
15
  # rquote
14
16
 
@@ -60,17 +62,67 @@ sid, name, df = get_price('sz000001', sdate='2024-01-01', edate='2024-02-01')
60
62
 
61
63
  ### 使用缓存
62
64
 
65
+ #### 内存缓存(MemoryCache)
66
+
63
67
  ```python
64
68
  from rquote import get_price, MemoryCache
65
69
 
66
70
  # 创建缓存实例
67
71
  cache = MemoryCache(ttl=3600) # 缓存1小时
68
72
 
69
- # 使用缓存(通过dd参数,向后兼容)
70
- cache_dict = {}
71
- sid, name, df = get_price('sh000001', dd=cache_dict)
73
+ # 使用缓存(通过dd参数传递MemoryCache实例)
74
+ sid, name, df = get_price('sh000001', dd=cache)
75
+
76
+ # 注意:MemoryCache 是内存缓存,数据仅在当前进程运行期间有效
77
+ # 脚本运行结束后,缓存数据会丢失
78
+ ```
79
+
80
+ **缓存生命周期说明:**
81
+ - `MemoryCache` 是纯内存缓存,数据存储在进程内存中
82
+ - 缓存数据仅在当前脚本运行期间有效
83
+ - 脚本运行结束后,所有缓存数据会丢失
84
+
85
+ #### 持久化缓存(PersistentCache)
86
+
87
+ 持久化缓存支持跨进程/跨运行的缓存持久化,数据会保存到本地文件。
88
+
89
+ **安装可选依赖:**
90
+ ```bash
91
+ pip install rquote[persistent]
92
+ # 或
93
+ uv pip install "rquote[persistent]"
94
+ ```
95
+
96
+ **使用持久化缓存:**
97
+ ```python
98
+ from rquote import get_price, PersistentCache
99
+
100
+ # 创建持久化缓存实例
101
+ # 默认使用 duckdb(如果已安装),否则使用 pickle 文件
102
+ cache = PersistentCache(ttl=86400) # 缓存24小时,默认路径:~/.rquote/cache.db
103
+
104
+ # 或指定自定义路径
105
+ cache = PersistentCache(db_path='./my_cache.db', use_duckdb=True)
106
+
107
+ # 使用缓存
108
+ sid, name, df = get_price('sh000001', dd=cache)
109
+
110
+ # 持久化缓存支持智能扩展:
111
+ # - 当请求的结束日期不在缓存中时,会自动从缓存的最新日期向前扩展
112
+ # - 当请求的开始日期不在缓存中时,会自动从缓存的最早日期向后扩展
113
+ # - 数据会自动合并,避免重复请求
114
+
115
+ # 关闭缓存(可选,程序退出时会自动保存)
116
+ cache.close()
72
117
  ```
73
118
 
119
+ **持久化缓存特性:**
120
+ - ✅ 跨进程/跨运行持久化:数据保存在本地文件,下次运行仍可使用
121
+ - ✅ 智能数据合并:相同股票的数据会自动合并,key 不包含日期范围
122
+ - ✅ 智能扩展:当请求的日期范围超出缓存时,自动扩展并合并数据
123
+ - ✅ 支持 TTL:可设置缓存过期时间
124
+ - ✅ 可选 duckdb:如果安装了 duckdb,使用 duckdb 存储(性能更好),否则使用 pickle 文件
125
+
74
126
  ## 主要功能
75
127
 
76
128
  ### 历史价格数据获取
@@ -1,15 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: rquote
3
- Version: 0.3.9
4
- Summary: Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch
5
- Requires-Python: >=3.6.1
6
- Description-Content-Type: text/markdown
7
- Requires-Dist: build>=0.9.0
8
- Requires-Dist: httpx>=0.20.0
9
- Requires-Dist: pandas>=1.0.0
10
- Requires-Dist: setuptools>=42
11
- Requires-Dist: twine>=3.8.0
12
-
13
1
  # rquote
14
2
 
15
3
  `rquote` 是一个提供 A股/港股/美股/ETF基金/期货 历史数据获取的Python库
@@ -60,17 +48,67 @@ sid, name, df = get_price('sz000001', sdate='2024-01-01', edate='2024-02-01')
60
48
 
61
49
  ### 使用缓存
62
50
 
51
+ #### 内存缓存(MemoryCache)
52
+
63
53
  ```python
64
54
  from rquote import get_price, MemoryCache
65
55
 
66
56
  # 创建缓存实例
67
57
  cache = MemoryCache(ttl=3600) # 缓存1小时
68
58
 
69
- # 使用缓存(通过dd参数,向后兼容)
70
- cache_dict = {}
71
- sid, name, df = get_price('sh000001', dd=cache_dict)
59
+ # 使用缓存(通过dd参数传递MemoryCache实例)
60
+ sid, name, df = get_price('sh000001', dd=cache)
61
+
62
+ # 注意:MemoryCache 是内存缓存,数据仅在当前进程运行期间有效
63
+ # 脚本运行结束后,缓存数据会丢失
64
+ ```
65
+
66
+ **缓存生命周期说明:**
67
+ - `MemoryCache` 是纯内存缓存,数据存储在进程内存中
68
+ - 缓存数据仅在当前脚本运行期间有效
69
+ - 脚本运行结束后,所有缓存数据会丢失
70
+
71
+ #### 持久化缓存(PersistentCache)
72
+
73
+ 持久化缓存支持跨进程/跨运行的缓存持久化,数据会保存到本地文件。
74
+
75
+ **安装可选依赖:**
76
+ ```bash
77
+ pip install rquote[persistent]
78
+ # 或
79
+ uv pip install "rquote[persistent]"
72
80
  ```
73
81
 
82
+ **使用持久化缓存:**
83
+ ```python
84
+ from rquote import get_price, PersistentCache
85
+
86
+ # 创建持久化缓存实例
87
+ # 默认使用 duckdb(如果已安装),否则使用 pickle 文件
88
+ cache = PersistentCache(ttl=86400) # 缓存24小时,默认路径:~/.rquote/cache.db
89
+
90
+ # 或指定自定义路径
91
+ cache = PersistentCache(db_path='./my_cache.db', use_duckdb=True)
92
+
93
+ # 使用缓存
94
+ sid, name, df = get_price('sh000001', dd=cache)
95
+
96
+ # 持久化缓存支持智能扩展:
97
+ # - 当请求的结束日期不在缓存中时,会自动从缓存的最新日期向前扩展
98
+ # - 当请求的开始日期不在缓存中时,会自动从缓存的最早日期向后扩展
99
+ # - 数据会自动合并,避免重复请求
100
+
101
+ # 关闭缓存(可选,程序退出时会自动保存)
102
+ cache.close()
103
+ ```
104
+
105
+ **持久化缓存特性:**
106
+ - ✅ 跨进程/跨运行持久化:数据保存在本地文件,下次运行仍可使用
107
+ - ✅ 智能数据合并:相同股票的数据会自动合并,key 不包含日期范围
108
+ - ✅ 智能扩展:当请求的日期范围超出缓存时,自动扩展并合并数据
109
+ - ✅ 支持 TTL:可设置缓存过期时间
110
+ - ✅ 可选 duckdb:如果安装了 duckdb,使用 duckdb 存储(性能更好),否则使用 pickle 文件
111
+
74
112
  ## 主要功能
75
113
 
76
114
  ### 历史价格数据获取
@@ -4,10 +4,11 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rquote"
7
- version = "0.3.9"
7
+ version = "0.4.0"
8
8
  description = "Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch"
9
9
  readme = "README.md"
10
- requires-python = ">=3.6.1"
10
+ # requires-python = ">=3.6.1" # duckdb requires higher python version
11
+ requires-python = ">=3.9.0"
11
12
  dependencies = [
12
13
  "build>=0.9.0",
13
14
  "httpx>=0.20.0",
@@ -15,3 +16,5 @@ dependencies = [
15
16
  "setuptools>=42",
16
17
  "twine>=3.8.0",
17
18
  ]
19
+ [project.optional-dependencies]
20
+ persistent = ["duckdb>=0.9.0"]
@@ -35,6 +35,11 @@ from .plots import PlotUtils
35
35
  from . import config
36
36
  from . import exceptions
37
37
  from .cache import MemoryCache, Cache
38
+ # 尝试导入持久化缓存(可选依赖)
39
+ try:
40
+ from .cache import PersistentCache
41
+ except ImportError:
42
+ PersistentCache = None
38
43
  from .utils.http import HTTPClient
39
44
 
40
45
 
@@ -93,5 +98,6 @@ __all__ = [
93
98
  'exceptions',
94
99
  'MemoryCache',
95
100
  'Cache',
101
+ 'PersistentCache',
96
102
  'HTTPClient',
97
103
  ]
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 缓存模块
4
+ """
5
+ from .base import Cache
6
+ from .memory import MemoryCache
7
+
8
+ # 尝试导入持久化缓存(可选依赖)
9
+ try:
10
+ from .persistent import PersistentCache
11
+ __all__ = ['Cache', 'MemoryCache', 'PersistentCache']
12
+ except ImportError:
13
+ __all__ = ['Cache', 'MemoryCache']
14
+
@@ -0,0 +1,421 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 持久化缓存实现
4
+ """
5
+ import os
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Optional, Any, Tuple
9
+ import pandas as pd
10
+ from .base import Cache
11
+
12
+ # 尝试导入 duckdb(可选依赖)
13
+ try:
14
+ import duckdb
15
+ DUCKDB_AVAILABLE = True
16
+ except ImportError:
17
+ DUCKDB_AVAILABLE = False
18
+ duckdb = None
19
+
20
+
21
+ class PersistentCache(Cache):
22
+ """持久化缓存实现,使用 duckdb 或文件系统存储数据"""
23
+
24
+ def __init__(self, db_path: Optional[str] = None, use_duckdb: bool = True, ttl: Optional[int] = None):
25
+ """
26
+ 初始化持久化缓存
27
+
28
+ Args:
29
+ db_path: 数据库文件路径,默认为 ~/.rquote/cache.db
30
+ use_duckdb: 是否使用 duckdb(如果可用),否则使用 pickle 文件
31
+ ttl: 默认过期时间(秒),None 表示不过期
32
+ """
33
+ self.use_duckdb = use_duckdb and DUCKDB_AVAILABLE
34
+ self.ttl = ttl
35
+
36
+ if db_path is None:
37
+ # 默认路径:~/.rquote/cache.db 或 ~/.rquote/cache.pkl
38
+ home = Path.home()
39
+ cache_dir = home / '.rquote'
40
+ cache_dir.mkdir(exist_ok=True)
41
+ if self.use_duckdb:
42
+ db_path = str(cache_dir / 'cache.db')
43
+ else:
44
+ db_path = str(cache_dir / 'cache.pkl')
45
+
46
+ self.db_path = db_path
47
+
48
+ if self.use_duckdb:
49
+ self._init_duckdb()
50
+ else:
51
+ self._init_pickle()
52
+
53
+ def _init_duckdb(self):
54
+ """初始化 duckdb 数据库"""
55
+ self.conn = duckdb.connect(self.db_path)
56
+ # 创建缓存表
57
+ self.conn.execute("""
58
+ CREATE TABLE IF NOT EXISTS cache_data (
59
+ cache_key TEXT PRIMARY KEY,
60
+ symbol TEXT NOT NULL,
61
+ name TEXT,
62
+ data BLOB,
63
+ earliest_date TEXT,
64
+ latest_date TEXT,
65
+ freq TEXT,
66
+ fq TEXT,
67
+ updated_at TIMESTAMP,
68
+ expire_at TIMESTAMP
69
+ )
70
+ """)
71
+ self.conn.execute("""
72
+ CREATE INDEX IF NOT EXISTS idx_symbol_freq_fq
73
+ ON cache_data(symbol, freq, fq)
74
+ """)
75
+
76
+ def _init_pickle(self):
77
+ """初始化 pickle 存储"""
78
+ import pickle
79
+ self.pickle = pickle
80
+ if os.path.exists(self.db_path):
81
+ try:
82
+ with open(self.db_path, 'rb') as f:
83
+ self._cache_data = self.pickle.load(f)
84
+ except:
85
+ self._cache_data = {}
86
+ else:
87
+ self._cache_data = {}
88
+
89
+ def _save_pickle(self):
90
+ """保存 pickle 数据"""
91
+ import pickle
92
+ with open(self.db_path, 'wb') as f:
93
+ self.pickle.dump(self._cache_data, f)
94
+
95
+ def _extract_key_parts(self, key: str) -> Tuple[str, str, str, str, str]:
96
+ """
97
+ 从完整 key 中提取各部分
98
+
99
+ Args:
100
+ key: 完整 key,格式如 "symbol:sdate:edate:freq:days:fq"
101
+
102
+ Returns:
103
+ (symbol, sdate, edate, freq, fq)
104
+ """
105
+ parts = key.split(':')
106
+ if len(parts) >= 6:
107
+ return parts[0], parts[1], parts[2], parts[3], parts[5]
108
+ elif len(parts) >= 4:
109
+ return parts[0], parts[1] if len(parts) > 1 else '', parts[2] if len(parts) > 2 else '', parts[3], parts[4] if len(parts) > 4 else 'qfq'
110
+ else:
111
+ return parts[0] if parts else '', '', '', 'day', 'qfq'
112
+
113
+ def _get_base_key(self, symbol: str, freq: str, fq: str) -> str:
114
+ """生成基础 key(不包含日期)"""
115
+ return f"{symbol}:{freq}:{fq}"
116
+
117
+ def _parse_date(self, date_str: str) -> Optional[pd.Timestamp]:
118
+ """解析日期字符串"""
119
+ if not date_str:
120
+ return None
121
+ try:
122
+ return pd.to_datetime(date_str)
123
+ except:
124
+ return None
125
+
126
+ def _get_dataframe_date_range(self, df: pd.DataFrame) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
127
+ """获取 DataFrame 的日期范围"""
128
+ if df.empty or not isinstance(df.index, pd.DatetimeIndex):
129
+ return None, None
130
+ return df.index.min(), df.index.max()
131
+
132
+ def _filter_dataframe_by_date(self, df: pd.DataFrame, sdate: Optional[str] = None,
133
+ edate: Optional[str] = None) -> pd.DataFrame:
134
+ """根据日期范围过滤 DataFrame"""
135
+ if df.empty:
136
+ return df
137
+
138
+ if not isinstance(df.index, pd.DatetimeIndex):
139
+ return df
140
+
141
+ start_date = self._parse_date(sdate) if sdate else None
142
+ end_date = self._parse_date(edate) if edate else None
143
+
144
+ if start_date is not None and end_date is not None:
145
+ mask = (df.index >= start_date) & (df.index <= end_date)
146
+ return df[mask]
147
+ elif start_date is not None:
148
+ return df[df.index >= start_date]
149
+ elif end_date is not None:
150
+ return df[df.index <= end_date]
151
+ else:
152
+ return df
153
+
154
+ def _merge_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
155
+ """合并两个 DataFrame,去重并排序"""
156
+ if df1.empty:
157
+ return df2
158
+ if df2.empty:
159
+ return df1
160
+
161
+ # 合并并去重
162
+ combined = pd.concat([df1, df2])
163
+ combined = combined[~combined.index.duplicated(keep='last')]
164
+ combined = combined.sort_index()
165
+ return combined
166
+
167
+ def get(self, key: str) -> Optional[Any]:
168
+ """
169
+ 获取缓存数据
170
+
171
+ Args:
172
+ key: 缓存 key,格式如 "symbol:sdate:edate:freq:days:fq"
173
+
174
+ Returns:
175
+ (symbol, name, DataFrame) 或 None
176
+ """
177
+ symbol, sdate, edate, freq, fq = self._extract_key_parts(key)
178
+ base_key = self._get_base_key(symbol, freq, fq)
179
+
180
+ if self.use_duckdb:
181
+ return self._get_duckdb(base_key, symbol, sdate, edate, freq, fq)
182
+ else:
183
+ return self._get_pickle(base_key, symbol, sdate, edate, freq, fq)
184
+
185
+ def _get_duckdb(self, base_key: str, symbol: str, sdate: str, edate: str,
186
+ freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
187
+ """从 duckdb 获取数据"""
188
+ result = self.conn.execute("""
189
+ SELECT name, data, earliest_date, latest_date, expire_at
190
+ FROM cache_data
191
+ WHERE cache_key = ?
192
+ """, [base_key]).fetchone()
193
+
194
+ if not result:
195
+ return None
196
+
197
+ name, data_blob, earliest_date, latest_date, expire_at = result
198
+
199
+ # 检查过期
200
+ if self.ttl and expire_at:
201
+ expire_ts = pd.to_datetime(expire_at)
202
+ if pd.Timestamp.now() > expire_ts:
203
+ self.delete(base_key)
204
+ return None
205
+
206
+ # 反序列化 DataFrame
207
+ import pickle
208
+ df = pickle.loads(data_blob)
209
+
210
+ # 获取缓存数据的日期范围
211
+ cached_earliest = self._parse_date(earliest_date)
212
+ cached_latest = self._parse_date(latest_date)
213
+
214
+ # 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
215
+ request_sdate = self._parse_date(sdate) if sdate else None
216
+ request_edate = self._parse_date(edate) if edate else None
217
+
218
+ # 检查是否有重叠
219
+ if request_edate and cached_earliest and request_edate < cached_earliest:
220
+ # 请求的结束日期早于缓存的最早日期,无重叠
221
+ return None
222
+ if request_sdate and cached_latest and request_sdate > cached_latest:
223
+ # 请求的开始日期晚于缓存的最晚日期,无重叠
224
+ return None
225
+
226
+ # 有重叠,返回缓存中可用的部分数据
227
+ # 计算实际可用的日期范围
228
+ actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
229
+ actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
230
+
231
+ # 过滤数据
232
+ filtered_df = self._filter_dataframe_by_date(
233
+ df,
234
+ actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
235
+ actual_edate.strftime('%Y-%m-%d') if actual_edate else None
236
+ )
237
+
238
+ if filtered_df.empty:
239
+ return None
240
+
241
+ return (symbol, name, filtered_df)
242
+
243
+ def _get_pickle(self, base_key: str, symbol: str, sdate: str, edate: str,
244
+ freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
245
+ """从 pickle 文件获取数据"""
246
+ if base_key not in self._cache_data:
247
+ return None
248
+
249
+ cache_entry = self._cache_data[base_key]
250
+
251
+ # 检查过期
252
+ if self.ttl and 'expire_at' in cache_entry:
253
+ expire_ts = cache_entry['expire_at']
254
+ if pd.Timestamp.now() > expire_ts:
255
+ del self._cache_data[base_key]
256
+ self._save_pickle()
257
+ return None
258
+
259
+ df = cache_entry['data']
260
+ name = cache_entry.get('name', '')
261
+ earliest_date = cache_entry.get('earliest_date')
262
+ latest_date = cache_entry.get('latest_date')
263
+
264
+ # 获取缓存数据的日期范围
265
+ cached_earliest = self._parse_date(earliest_date)
266
+ cached_latest = self._parse_date(latest_date)
267
+
268
+ # 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
269
+ request_sdate = self._parse_date(sdate) if sdate else None
270
+ request_edate = self._parse_date(edate) if edate else None
271
+
272
+ # 检查是否有重叠
273
+ if request_edate and cached_earliest and request_edate < cached_earliest:
274
+ # 请求的结束日期早于缓存的最早日期,无重叠
275
+ return None
276
+ if request_sdate and cached_latest and request_sdate > cached_latest:
277
+ # 请求的开始日期晚于缓存的最晚日期,无重叠
278
+ return None
279
+
280
+ # 有重叠,返回缓存中可用的部分数据
281
+ # 计算实际可用的日期范围
282
+ actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
283
+ actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
284
+
285
+ # 过滤数据
286
+ filtered_df = self._filter_dataframe_by_date(
287
+ df,
288
+ actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
289
+ actual_edate.strftime('%Y-%m-%d') if actual_edate else None
290
+ )
291
+
292
+ if filtered_df.empty:
293
+ return None
294
+
295
+ return (symbol, name, filtered_df)
296
+
297
+ def put(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
298
+ """
299
+ 存储缓存数据
300
+
301
+ Args:
302
+ key: 缓存 key
303
+ value: (symbol, name, DataFrame) 元组
304
+ ttl: 过期时间(秒)
305
+ """
306
+ if not isinstance(value, tuple) or len(value) != 3:
307
+ return
308
+
309
+ symbol, name, df = value
310
+ if not isinstance(df, pd.DataFrame) or df.empty:
311
+ return
312
+
313
+ _, _, _, freq, fq = self._extract_key_parts(key)
314
+ base_key = self._get_base_key(symbol, freq, fq)
315
+
316
+ # 尝试从基础 key 获取完整数据并合并
317
+ existing = self._get_raw(base_key)
318
+ if existing:
319
+ _, existing_name, existing_df = existing
320
+ # 使用新数据的 name(如果有)
321
+ if not name:
322
+ name = existing_name
323
+ # 合并数据
324
+ df = self._merge_dataframes(existing_df, df)
325
+
326
+ # 获取日期范围
327
+ earliest_date, latest_date = self._get_dataframe_date_range(df)
328
+ earliest_str = earliest_date.strftime('%Y-%m-%d') if earliest_date else None
329
+ latest_str = latest_date.strftime('%Y-%m-%d') if latest_date else None
330
+
331
+ # 计算过期时间
332
+ expire_at = None
333
+ if ttl or self.ttl:
334
+ expire_seconds = (ttl or self.ttl)
335
+ expire_at = pd.Timestamp.now() + pd.Timedelta(seconds=expire_seconds)
336
+
337
+ if self.use_duckdb:
338
+ self._put_duckdb(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
339
+ else:
340
+ self._put_pickle(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
341
+
342
+ def _get_raw(self, base_key: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
343
+ """获取原始数据(不进行日期过滤)"""
344
+ if self.use_duckdb:
345
+ result = self.conn.execute("""
346
+ SELECT name, data
347
+ FROM cache_data
348
+ WHERE cache_key = ?
349
+ """, [base_key]).fetchone()
350
+
351
+ if not result:
352
+ return None
353
+
354
+ import pickle
355
+ df = pickle.loads(result[1])
356
+ return (base_key.split(':')[0], result[0], df)
357
+ else:
358
+ if base_key not in self._cache_data:
359
+ return None
360
+ cache_entry = self._cache_data[base_key]
361
+ return (base_key.split(':')[0], cache_entry.get('name', ''), cache_entry['data'])
362
+
363
+ def _put_duckdb(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
364
+ earliest_date: Optional[str], latest_date: Optional[str],
365
+ freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
366
+ """存储到 duckdb"""
367
+ import pickle
368
+ data_blob = pickle.dumps(df)
369
+
370
+ self.conn.execute("""
371
+ INSERT OR REPLACE INTO cache_data
372
+ (cache_key, symbol, name, data, earliest_date, latest_date, freq, fq, updated_at, expire_at)
373
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
374
+ """, [base_key, symbol, name, data_blob, earliest_date, latest_date, freq, fq,
375
+ pd.Timestamp.now(), expire_at])
376
+ self.conn.commit()
377
+
378
+ def _put_pickle(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
379
+ earliest_date: Optional[str], latest_date: Optional[str],
380
+ freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
381
+ """存储到 pickle 文件"""
382
+ self._cache_data[base_key] = {
383
+ 'symbol': symbol,
384
+ 'name': name,
385
+ 'data': df,
386
+ 'earliest_date': earliest_date,
387
+ 'latest_date': latest_date,
388
+ 'freq': freq,
389
+ 'fq': fq,
390
+ 'updated_at': pd.Timestamp.now(),
391
+ 'expire_at': expire_at
392
+ }
393
+ self._save_pickle()
394
+
395
+ def delete(self, key: str) -> None:
396
+ """删除缓存"""
397
+ symbol, _, _, freq, fq = self._extract_key_parts(key)
398
+ base_key = self._get_base_key(symbol, freq, fq)
399
+
400
+ if self.use_duckdb:
401
+ self.conn.execute("DELETE FROM cache_data WHERE cache_key = ?", [base_key])
402
+ self.conn.commit()
403
+ else:
404
+ if base_key in self._cache_data:
405
+ del self._cache_data[base_key]
406
+ self._save_pickle()
407
+
408
+ def clear(self) -> None:
409
+ """清空所有缓存"""
410
+ if self.use_duckdb:
411
+ self.conn.execute("DELETE FROM cache_data")
412
+ self.conn.commit()
413
+ else:
414
+ self._cache_data.clear()
415
+ self._save_pickle()
416
+
417
+ def close(self):
418
+ """关闭连接"""
419
+ if self.use_duckdb:
420
+ self.conn.close()
421
+