rquote 0.3.8__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rquote/__init__.py +6 -0
- rquote/api/price.py +43 -2
- rquote/cache/__init__.py +6 -1
- rquote/cache/persistent.py +421 -0
- rquote/markets/base.py +150 -3
- rquote/markets/cn_stock.py +13 -13
- rquote/markets/future.py +98 -13
- rquote/markets/hk_stock.py +5 -14
- rquote/markets/us_stock.py +8 -9
- rquote/parsers/kline.py +12 -4
- {rquote-0.3.8.dist-info → rquote-0.4.0.dist-info}/METADATA +57 -5
- {rquote-0.3.8.dist-info → rquote-0.4.0.dist-info}/RECORD +14 -13
- {rquote-0.3.8.dist-info → rquote-0.4.0.dist-info}/WHEEL +0 -0
- {rquote-0.3.8.dist-info → rquote-0.4.0.dist-info}/top_level.txt +0 -0
rquote/__init__.py
CHANGED
|
@@ -35,6 +35,11 @@ from .plots import PlotUtils
|
|
|
35
35
|
from . import config
|
|
36
36
|
from . import exceptions
|
|
37
37
|
from .cache import MemoryCache, Cache
|
|
38
|
+
# 尝试导入持久化缓存(可选依赖)
|
|
39
|
+
try:
|
|
40
|
+
from .cache import PersistentCache
|
|
41
|
+
except ImportError:
|
|
42
|
+
PersistentCache = None
|
|
38
43
|
from .utils.http import HTTPClient
|
|
39
44
|
|
|
40
45
|
|
|
@@ -93,5 +98,6 @@ __all__ = [
|
|
|
93
98
|
'exceptions',
|
|
94
99
|
'MemoryCache',
|
|
95
100
|
'Cache',
|
|
101
|
+
'PersistentCache',
|
|
96
102
|
'HTTPClient',
|
|
97
103
|
]
|
rquote/api/price.py
CHANGED
|
@@ -11,6 +11,33 @@ from ..utils.date import check_date_format
|
|
|
11
11
|
from ..exceptions import SymbolError
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def _normalize_dataframe_index(df: pd.DataFrame) -> pd.DataFrame:
|
|
15
|
+
"""
|
|
16
|
+
统一处理 DataFrame 索引,转换为 DatetimeIndex
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
df: 输入的 DataFrame
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
处理后的 DataFrame,索引为 DatetimeIndex
|
|
23
|
+
"""
|
|
24
|
+
if df.empty:
|
|
25
|
+
return df
|
|
26
|
+
|
|
27
|
+
# 如果已经是 DatetimeIndex,直接返回
|
|
28
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
29
|
+
return df
|
|
30
|
+
|
|
31
|
+
# 尝试转换为 DatetimeIndex
|
|
32
|
+
try:
|
|
33
|
+
df.index = pd.to_datetime(df.index)
|
|
34
|
+
except (ValueError, TypeError) as e:
|
|
35
|
+
# 如果转换失败,保持原样(可能是其他类型的索引)
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
return df
|
|
39
|
+
|
|
40
|
+
|
|
14
41
|
def get_price(i: str, sdate: str = '', edate: str = '', freq: str = 'day',
|
|
15
42
|
days: int = 320, fq: str = 'qfq', dd=None) -> Tuple[str, str, pd.DataFrame]:
|
|
16
43
|
'''
|
|
@@ -49,7 +76,12 @@ def get_price(i: str, sdate: str = '', edate: str = '', freq: str = 'day',
|
|
|
49
76
|
market = MarketFactory.create_from_symbol(i, cache=cache)
|
|
50
77
|
|
|
51
78
|
# 调用市场的get_price方法
|
|
52
|
-
|
|
79
|
+
symbol, name, df = market.get_price(i, sdate=sdate, edate=edate, freq=freq, days=days, fq=fq)
|
|
80
|
+
|
|
81
|
+
# 统一后处理:转换索引为 DatetimeIndex
|
|
82
|
+
df = _normalize_dataframe_index(df)
|
|
83
|
+
|
|
84
|
+
return symbol, name, df
|
|
53
85
|
|
|
54
86
|
|
|
55
87
|
def get_price_longer(i: str, l: int = 2, dd=None) -> Tuple[str, str, pd.DataFrame]:
|
|
@@ -65,7 +97,16 @@ def get_price_longer(i: str, l: int = 2, dd=None) -> Tuple[str, str, pd.DataFram
|
|
|
65
97
|
(symbol, name, DataFrame)
|
|
66
98
|
"""
|
|
67
99
|
_, name, a = get_price(i, dd=dd)
|
|
68
|
-
|
|
100
|
+
# 使用 DatetimeIndex 的格式化方法(get_price 已统一转换为 DatetimeIndex)
|
|
101
|
+
if isinstance(a.index, pd.DatetimeIndex) and len(a.index) > 0:
|
|
102
|
+
d1 = a.index[0].strftime('%Y%m%d')
|
|
103
|
+
else:
|
|
104
|
+
# 降级处理:如果索引不是 DatetimeIndex(理论上不应该发生),尝试格式化
|
|
105
|
+
try:
|
|
106
|
+
d1 = str(a.index[0])[:8] if len(str(a.index[0])) >= 8 else str(a.index[0])
|
|
107
|
+
except:
|
|
108
|
+
d1 = a.index.format()[0] if hasattr(a.index, 'format') else str(a.index[0])
|
|
109
|
+
|
|
69
110
|
for y in range(1, l):
|
|
70
111
|
d0 = str(int(d1[:4]) - 1) + d1[4:]
|
|
71
112
|
a = pd.concat((get_price(i, d0, d1, dd=dd)[2], a), 0).drop_duplicates()
|
rquote/cache/__init__.py
CHANGED
|
@@ -5,5 +5,10 @@
|
|
|
5
5
|
from .base import Cache
|
|
6
6
|
from .memory import MemoryCache
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
# 尝试导入持久化缓存(可选依赖)
|
|
9
|
+
try:
|
|
10
|
+
from .persistent import PersistentCache
|
|
11
|
+
__all__ = ['Cache', 'MemoryCache', 'PersistentCache']
|
|
12
|
+
except ImportError:
|
|
13
|
+
__all__ = ['Cache', 'MemoryCache']
|
|
9
14
|
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
持久化缓存实现
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, Any, Tuple
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from .base import Cache
|
|
11
|
+
|
|
12
|
+
# 尝试导入 duckdb(可选依赖)
|
|
13
|
+
try:
|
|
14
|
+
import duckdb
|
|
15
|
+
DUCKDB_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
DUCKDB_AVAILABLE = False
|
|
18
|
+
duckdb = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PersistentCache(Cache):
|
|
22
|
+
"""持久化缓存实现,使用 duckdb 或文件系统存储数据"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db_path: Optional[str] = None, use_duckdb: bool = True, ttl: Optional[int] = None):
|
|
25
|
+
"""
|
|
26
|
+
初始化持久化缓存
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db_path: 数据库文件路径,默认为 ~/.rquote/cache.db
|
|
30
|
+
use_duckdb: 是否使用 duckdb(如果可用),否则使用 pickle 文件
|
|
31
|
+
ttl: 默认过期时间(秒),None 表示不过期
|
|
32
|
+
"""
|
|
33
|
+
self.use_duckdb = use_duckdb and DUCKDB_AVAILABLE
|
|
34
|
+
self.ttl = ttl
|
|
35
|
+
|
|
36
|
+
if db_path is None:
|
|
37
|
+
# 默认路径:~/.rquote/cache.db 或 ~/.rquote/cache.pkl
|
|
38
|
+
home = Path.home()
|
|
39
|
+
cache_dir = home / '.rquote'
|
|
40
|
+
cache_dir.mkdir(exist_ok=True)
|
|
41
|
+
if self.use_duckdb:
|
|
42
|
+
db_path = str(cache_dir / 'cache.db')
|
|
43
|
+
else:
|
|
44
|
+
db_path = str(cache_dir / 'cache.pkl')
|
|
45
|
+
|
|
46
|
+
self.db_path = db_path
|
|
47
|
+
|
|
48
|
+
if self.use_duckdb:
|
|
49
|
+
self._init_duckdb()
|
|
50
|
+
else:
|
|
51
|
+
self._init_pickle()
|
|
52
|
+
|
|
53
|
+
def _init_duckdb(self):
|
|
54
|
+
"""初始化 duckdb 数据库"""
|
|
55
|
+
self.conn = duckdb.connect(self.db_path)
|
|
56
|
+
# 创建缓存表
|
|
57
|
+
self.conn.execute("""
|
|
58
|
+
CREATE TABLE IF NOT EXISTS cache_data (
|
|
59
|
+
cache_key TEXT PRIMARY KEY,
|
|
60
|
+
symbol TEXT NOT NULL,
|
|
61
|
+
name TEXT,
|
|
62
|
+
data BLOB,
|
|
63
|
+
earliest_date TEXT,
|
|
64
|
+
latest_date TEXT,
|
|
65
|
+
freq TEXT,
|
|
66
|
+
fq TEXT,
|
|
67
|
+
updated_at TIMESTAMP,
|
|
68
|
+
expire_at TIMESTAMP
|
|
69
|
+
)
|
|
70
|
+
""")
|
|
71
|
+
self.conn.execute("""
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_symbol_freq_fq
|
|
73
|
+
ON cache_data(symbol, freq, fq)
|
|
74
|
+
""")
|
|
75
|
+
|
|
76
|
+
def _init_pickle(self):
|
|
77
|
+
"""初始化 pickle 存储"""
|
|
78
|
+
import pickle
|
|
79
|
+
self.pickle = pickle
|
|
80
|
+
if os.path.exists(self.db_path):
|
|
81
|
+
try:
|
|
82
|
+
with open(self.db_path, 'rb') as f:
|
|
83
|
+
self._cache_data = self.pickle.load(f)
|
|
84
|
+
except:
|
|
85
|
+
self._cache_data = {}
|
|
86
|
+
else:
|
|
87
|
+
self._cache_data = {}
|
|
88
|
+
|
|
89
|
+
def _save_pickle(self):
|
|
90
|
+
"""保存 pickle 数据"""
|
|
91
|
+
import pickle
|
|
92
|
+
with open(self.db_path, 'wb') as f:
|
|
93
|
+
self.pickle.dump(self._cache_data, f)
|
|
94
|
+
|
|
95
|
+
def _extract_key_parts(self, key: str) -> Tuple[str, str, str, str, str]:
|
|
96
|
+
"""
|
|
97
|
+
从完整 key 中提取各部分
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
key: 完整 key,格式如 "symbol:sdate:edate:freq:days:fq"
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
(symbol, sdate, edate, freq, fq)
|
|
104
|
+
"""
|
|
105
|
+
parts = key.split(':')
|
|
106
|
+
if len(parts) >= 6:
|
|
107
|
+
return parts[0], parts[1], parts[2], parts[3], parts[5]
|
|
108
|
+
elif len(parts) >= 4:
|
|
109
|
+
return parts[0], parts[1] if len(parts) > 1 else '', parts[2] if len(parts) > 2 else '', parts[3], parts[4] if len(parts) > 4 else 'qfq'
|
|
110
|
+
else:
|
|
111
|
+
return parts[0] if parts else '', '', '', 'day', 'qfq'
|
|
112
|
+
|
|
113
|
+
def _get_base_key(self, symbol: str, freq: str, fq: str) -> str:
|
|
114
|
+
"""生成基础 key(不包含日期)"""
|
|
115
|
+
return f"{symbol}:{freq}:{fq}"
|
|
116
|
+
|
|
117
|
+
def _parse_date(self, date_str: str) -> Optional[pd.Timestamp]:
|
|
118
|
+
"""解析日期字符串"""
|
|
119
|
+
if not date_str:
|
|
120
|
+
return None
|
|
121
|
+
try:
|
|
122
|
+
return pd.to_datetime(date_str)
|
|
123
|
+
except:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
def _get_dataframe_date_range(self, df: pd.DataFrame) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
|
|
127
|
+
"""获取 DataFrame 的日期范围"""
|
|
128
|
+
if df.empty or not isinstance(df.index, pd.DatetimeIndex):
|
|
129
|
+
return None, None
|
|
130
|
+
return df.index.min(), df.index.max()
|
|
131
|
+
|
|
132
|
+
def _filter_dataframe_by_date(self, df: pd.DataFrame, sdate: Optional[str] = None,
|
|
133
|
+
edate: Optional[str] = None) -> pd.DataFrame:
|
|
134
|
+
"""根据日期范围过滤 DataFrame"""
|
|
135
|
+
if df.empty:
|
|
136
|
+
return df
|
|
137
|
+
|
|
138
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
139
|
+
return df
|
|
140
|
+
|
|
141
|
+
start_date = self._parse_date(sdate) if sdate else None
|
|
142
|
+
end_date = self._parse_date(edate) if edate else None
|
|
143
|
+
|
|
144
|
+
if start_date is not None and end_date is not None:
|
|
145
|
+
mask = (df.index >= start_date) & (df.index <= end_date)
|
|
146
|
+
return df[mask]
|
|
147
|
+
elif start_date is not None:
|
|
148
|
+
return df[df.index >= start_date]
|
|
149
|
+
elif end_date is not None:
|
|
150
|
+
return df[df.index <= end_date]
|
|
151
|
+
else:
|
|
152
|
+
return df
|
|
153
|
+
|
|
154
|
+
def _merge_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
155
|
+
"""合并两个 DataFrame,去重并排序"""
|
|
156
|
+
if df1.empty:
|
|
157
|
+
return df2
|
|
158
|
+
if df2.empty:
|
|
159
|
+
return df1
|
|
160
|
+
|
|
161
|
+
# 合并并去重
|
|
162
|
+
combined = pd.concat([df1, df2])
|
|
163
|
+
combined = combined[~combined.index.duplicated(keep='last')]
|
|
164
|
+
combined = combined.sort_index()
|
|
165
|
+
return combined
|
|
166
|
+
|
|
167
|
+
def get(self, key: str) -> Optional[Any]:
|
|
168
|
+
"""
|
|
169
|
+
获取缓存数据
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
key: 缓存 key,格式如 "symbol:sdate:edate:freq:days:fq"
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
(symbol, name, DataFrame) 或 None
|
|
176
|
+
"""
|
|
177
|
+
symbol, sdate, edate, freq, fq = self._extract_key_parts(key)
|
|
178
|
+
base_key = self._get_base_key(symbol, freq, fq)
|
|
179
|
+
|
|
180
|
+
if self.use_duckdb:
|
|
181
|
+
return self._get_duckdb(base_key, symbol, sdate, edate, freq, fq)
|
|
182
|
+
else:
|
|
183
|
+
return self._get_pickle(base_key, symbol, sdate, edate, freq, fq)
|
|
184
|
+
|
|
185
|
+
def _get_duckdb(self, base_key: str, symbol: str, sdate: str, edate: str,
|
|
186
|
+
freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
|
|
187
|
+
"""从 duckdb 获取数据"""
|
|
188
|
+
result = self.conn.execute("""
|
|
189
|
+
SELECT name, data, earliest_date, latest_date, expire_at
|
|
190
|
+
FROM cache_data
|
|
191
|
+
WHERE cache_key = ?
|
|
192
|
+
""", [base_key]).fetchone()
|
|
193
|
+
|
|
194
|
+
if not result:
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
name, data_blob, earliest_date, latest_date, expire_at = result
|
|
198
|
+
|
|
199
|
+
# 检查过期
|
|
200
|
+
if self.ttl and expire_at:
|
|
201
|
+
expire_ts = pd.to_datetime(expire_at)
|
|
202
|
+
if pd.Timestamp.now() > expire_ts:
|
|
203
|
+
self.delete(base_key)
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
# 反序列化 DataFrame
|
|
207
|
+
import pickle
|
|
208
|
+
df = pickle.loads(data_blob)
|
|
209
|
+
|
|
210
|
+
# 获取缓存数据的日期范围
|
|
211
|
+
cached_earliest = self._parse_date(earliest_date)
|
|
212
|
+
cached_latest = self._parse_date(latest_date)
|
|
213
|
+
|
|
214
|
+
# 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
|
|
215
|
+
request_sdate = self._parse_date(sdate) if sdate else None
|
|
216
|
+
request_edate = self._parse_date(edate) if edate else None
|
|
217
|
+
|
|
218
|
+
# 检查是否有重叠
|
|
219
|
+
if request_edate and cached_earliest and request_edate < cached_earliest:
|
|
220
|
+
# 请求的结束日期早于缓存的最早日期,无重叠
|
|
221
|
+
return None
|
|
222
|
+
if request_sdate and cached_latest and request_sdate > cached_latest:
|
|
223
|
+
# 请求的开始日期晚于缓存的最晚日期,无重叠
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
# 有重叠,返回缓存中可用的部分数据
|
|
227
|
+
# 计算实际可用的日期范围
|
|
228
|
+
actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
|
|
229
|
+
actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
|
|
230
|
+
|
|
231
|
+
# 过滤数据
|
|
232
|
+
filtered_df = self._filter_dataframe_by_date(
|
|
233
|
+
df,
|
|
234
|
+
actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
|
|
235
|
+
actual_edate.strftime('%Y-%m-%d') if actual_edate else None
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if filtered_df.empty:
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
return (symbol, name, filtered_df)
|
|
242
|
+
|
|
243
|
+
def _get_pickle(self, base_key: str, symbol: str, sdate: str, edate: str,
|
|
244
|
+
freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
|
|
245
|
+
"""从 pickle 文件获取数据"""
|
|
246
|
+
if base_key not in self._cache_data:
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
cache_entry = self._cache_data[base_key]
|
|
250
|
+
|
|
251
|
+
# 检查过期
|
|
252
|
+
if self.ttl and 'expire_at' in cache_entry:
|
|
253
|
+
expire_ts = cache_entry['expire_at']
|
|
254
|
+
if pd.Timestamp.now() > expire_ts:
|
|
255
|
+
del self._cache_data[base_key]
|
|
256
|
+
self._save_pickle()
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
df = cache_entry['data']
|
|
260
|
+
name = cache_entry.get('name', '')
|
|
261
|
+
earliest_date = cache_entry.get('earliest_date')
|
|
262
|
+
latest_date = cache_entry.get('latest_date')
|
|
263
|
+
|
|
264
|
+
# 获取缓存数据的日期范围
|
|
265
|
+
cached_earliest = self._parse_date(earliest_date)
|
|
266
|
+
cached_latest = self._parse_date(latest_date)
|
|
267
|
+
|
|
268
|
+
# 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
|
|
269
|
+
request_sdate = self._parse_date(sdate) if sdate else None
|
|
270
|
+
request_edate = self._parse_date(edate) if edate else None
|
|
271
|
+
|
|
272
|
+
# 检查是否有重叠
|
|
273
|
+
if request_edate and cached_earliest and request_edate < cached_earliest:
|
|
274
|
+
# 请求的结束日期早于缓存的最早日期,无重叠
|
|
275
|
+
return None
|
|
276
|
+
if request_sdate and cached_latest and request_sdate > cached_latest:
|
|
277
|
+
# 请求的开始日期晚于缓存的最晚日期,无重叠
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
# 有重叠,返回缓存中可用的部分数据
|
|
281
|
+
# 计算实际可用的日期范围
|
|
282
|
+
actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
|
|
283
|
+
actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
|
|
284
|
+
|
|
285
|
+
# 过滤数据
|
|
286
|
+
filtered_df = self._filter_dataframe_by_date(
|
|
287
|
+
df,
|
|
288
|
+
actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
|
|
289
|
+
actual_edate.strftime('%Y-%m-%d') if actual_edate else None
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if filtered_df.empty:
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
return (symbol, name, filtered_df)
|
|
296
|
+
|
|
297
|
+
def put(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
|
298
|
+
"""
|
|
299
|
+
存储缓存数据
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
key: 缓存 key
|
|
303
|
+
value: (symbol, name, DataFrame) 元组
|
|
304
|
+
ttl: 过期时间(秒)
|
|
305
|
+
"""
|
|
306
|
+
if not isinstance(value, tuple) or len(value) != 3:
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
symbol, name, df = value
|
|
310
|
+
if not isinstance(df, pd.DataFrame) or df.empty:
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
_, _, _, freq, fq = self._extract_key_parts(key)
|
|
314
|
+
base_key = self._get_base_key(symbol, freq, fq)
|
|
315
|
+
|
|
316
|
+
# 尝试从基础 key 获取完整数据并合并
|
|
317
|
+
existing = self._get_raw(base_key)
|
|
318
|
+
if existing:
|
|
319
|
+
_, existing_name, existing_df = existing
|
|
320
|
+
# 使用新数据的 name(如果有)
|
|
321
|
+
if not name:
|
|
322
|
+
name = existing_name
|
|
323
|
+
# 合并数据
|
|
324
|
+
df = self._merge_dataframes(existing_df, df)
|
|
325
|
+
|
|
326
|
+
# 获取日期范围
|
|
327
|
+
earliest_date, latest_date = self._get_dataframe_date_range(df)
|
|
328
|
+
earliest_str = earliest_date.strftime('%Y-%m-%d') if earliest_date else None
|
|
329
|
+
latest_str = latest_date.strftime('%Y-%m-%d') if latest_date else None
|
|
330
|
+
|
|
331
|
+
# 计算过期时间
|
|
332
|
+
expire_at = None
|
|
333
|
+
if ttl or self.ttl:
|
|
334
|
+
expire_seconds = (ttl or self.ttl)
|
|
335
|
+
expire_at = pd.Timestamp.now() + pd.Timedelta(seconds=expire_seconds)
|
|
336
|
+
|
|
337
|
+
if self.use_duckdb:
|
|
338
|
+
self._put_duckdb(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
|
|
339
|
+
else:
|
|
340
|
+
self._put_pickle(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
|
|
341
|
+
|
|
342
|
+
def _get_raw(self, base_key: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
|
|
343
|
+
"""获取原始数据(不进行日期过滤)"""
|
|
344
|
+
if self.use_duckdb:
|
|
345
|
+
result = self.conn.execute("""
|
|
346
|
+
SELECT name, data
|
|
347
|
+
FROM cache_data
|
|
348
|
+
WHERE cache_key = ?
|
|
349
|
+
""", [base_key]).fetchone()
|
|
350
|
+
|
|
351
|
+
if not result:
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
import pickle
|
|
355
|
+
df = pickle.loads(result[1])
|
|
356
|
+
return (base_key.split(':')[0], result[0], df)
|
|
357
|
+
else:
|
|
358
|
+
if base_key not in self._cache_data:
|
|
359
|
+
return None
|
|
360
|
+
cache_entry = self._cache_data[base_key]
|
|
361
|
+
return (base_key.split(':')[0], cache_entry.get('name', ''), cache_entry['data'])
|
|
362
|
+
|
|
363
|
+
def _put_duckdb(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
|
|
364
|
+
earliest_date: Optional[str], latest_date: Optional[str],
|
|
365
|
+
freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
|
|
366
|
+
"""存储到 duckdb"""
|
|
367
|
+
import pickle
|
|
368
|
+
data_blob = pickle.dumps(df)
|
|
369
|
+
|
|
370
|
+
self.conn.execute("""
|
|
371
|
+
INSERT OR REPLACE INTO cache_data
|
|
372
|
+
(cache_key, symbol, name, data, earliest_date, latest_date, freq, fq, updated_at, expire_at)
|
|
373
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
374
|
+
""", [base_key, symbol, name, data_blob, earliest_date, latest_date, freq, fq,
|
|
375
|
+
pd.Timestamp.now(), expire_at])
|
|
376
|
+
self.conn.commit()
|
|
377
|
+
|
|
378
|
+
def _put_pickle(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
|
|
379
|
+
earliest_date: Optional[str], latest_date: Optional[str],
|
|
380
|
+
freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
|
|
381
|
+
"""存储到 pickle 文件"""
|
|
382
|
+
self._cache_data[base_key] = {
|
|
383
|
+
'symbol': symbol,
|
|
384
|
+
'name': name,
|
|
385
|
+
'data': df,
|
|
386
|
+
'earliest_date': earliest_date,
|
|
387
|
+
'latest_date': latest_date,
|
|
388
|
+
'freq': freq,
|
|
389
|
+
'fq': fq,
|
|
390
|
+
'updated_at': pd.Timestamp.now(),
|
|
391
|
+
'expire_at': expire_at
|
|
392
|
+
}
|
|
393
|
+
self._save_pickle()
|
|
394
|
+
|
|
395
|
+
def delete(self, key: str) -> None:
|
|
396
|
+
"""删除缓存"""
|
|
397
|
+
symbol, _, _, freq, fq = self._extract_key_parts(key)
|
|
398
|
+
base_key = self._get_base_key(symbol, freq, fq)
|
|
399
|
+
|
|
400
|
+
if self.use_duckdb:
|
|
401
|
+
self.conn.execute("DELETE FROM cache_data WHERE cache_key = ?", [base_key])
|
|
402
|
+
self.conn.commit()
|
|
403
|
+
else:
|
|
404
|
+
if base_key in self._cache_data:
|
|
405
|
+
del self._cache_data[base_key]
|
|
406
|
+
self._save_pickle()
|
|
407
|
+
|
|
408
|
+
def clear(self) -> None:
|
|
409
|
+
"""清空所有缓存"""
|
|
410
|
+
if self.use_duckdb:
|
|
411
|
+
self.conn.execute("DELETE FROM cache_data")
|
|
412
|
+
self.conn.commit()
|
|
413
|
+
else:
|
|
414
|
+
self._cache_data.clear()
|
|
415
|
+
self._save_pickle()
|
|
416
|
+
|
|
417
|
+
def close(self):
|
|
418
|
+
"""关闭连接"""
|
|
419
|
+
if self.use_duckdb:
|
|
420
|
+
self.conn.close()
|
|
421
|
+
|
rquote/markets/base.py
CHANGED
|
@@ -5,9 +5,16 @@
|
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from typing import Tuple, Optional
|
|
7
7
|
import pandas as pd
|
|
8
|
+
from datetime import datetime, timedelta
|
|
8
9
|
from ..cache import Cache
|
|
9
10
|
from ..data_sources.base import DataSource
|
|
10
11
|
|
|
12
|
+
# 尝试导入持久化缓存(可选依赖)
|
|
13
|
+
try:
|
|
14
|
+
from ..cache.persistent import PersistentCache
|
|
15
|
+
except ImportError:
|
|
16
|
+
PersistentCache = None
|
|
17
|
+
|
|
11
18
|
|
|
12
19
|
class Market(ABC):
|
|
13
20
|
"""市场基类"""
|
|
@@ -23,11 +30,54 @@ class Market(ABC):
|
|
|
23
30
|
self.data_source = data_source
|
|
24
31
|
self.cache = cache
|
|
25
32
|
|
|
26
|
-
@abstractmethod
|
|
27
33
|
def get_price(self, symbol: str, sdate: str = '', edate: str = '',
|
|
28
34
|
freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
|
|
29
|
-
"""
|
|
30
|
-
|
|
35
|
+
"""
|
|
36
|
+
获取价格数据(模板方法,统一处理缓存逻辑)
|
|
37
|
+
|
|
38
|
+
子类可以重写此方法以处理特殊情况,但建议调用 super().get_price() 来使用缓存功能
|
|
39
|
+
或者实现 _fetch_price_data 方法,让基类自动处理缓存
|
|
40
|
+
"""
|
|
41
|
+
symbol = self.normalize_symbol(symbol)
|
|
42
|
+
cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
|
|
43
|
+
|
|
44
|
+
# 如果是持久化缓存且是日级别数据,使用智能扩展逻辑
|
|
45
|
+
if PersistentCache and isinstance(self.cache, PersistentCache) and freq == 'day':
|
|
46
|
+
return self._get_price_with_persistent_cache(
|
|
47
|
+
symbol, sdate, edate, freq, days, fq,
|
|
48
|
+
lambda s, sd, ed, f, d, fq_param: self._fetch_price_data(s, sd, ed, f, d, fq_param)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# 普通缓存逻辑
|
|
52
|
+
cached = self._get_cached(cache_key)
|
|
53
|
+
if cached:
|
|
54
|
+
return cached
|
|
55
|
+
|
|
56
|
+
# 从数据源获取
|
|
57
|
+
result = self._fetch_price_data(symbol, sdate, edate, freq, days, fq)
|
|
58
|
+
self._put_cache(cache_key, result)
|
|
59
|
+
return result
|
|
60
|
+
|
|
61
|
+
def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
|
|
62
|
+
freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
|
|
63
|
+
"""
|
|
64
|
+
从数据源获取价格数据(子类需要实现)
|
|
65
|
+
|
|
66
|
+
这个方法只负责获取数据,不处理缓存。缓存逻辑由 get_price 统一处理。
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
symbol: 股票代码(已标准化)
|
|
70
|
+
sdate: 开始日期
|
|
71
|
+
edate: 结束日期
|
|
72
|
+
freq: 频率
|
|
73
|
+
days: 天数
|
|
74
|
+
fq: 复权方式
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
(symbol, name, DataFrame)
|
|
78
|
+
"""
|
|
79
|
+
# 默认实现:子类应该重写此方法
|
|
80
|
+
return (symbol, '', pd.DataFrame())
|
|
31
81
|
|
|
32
82
|
@abstractmethod
|
|
33
83
|
def normalize_symbol(self, symbol: str) -> str:
|
|
@@ -46,4 +96,101 @@ class Market(ABC):
|
|
|
46
96
|
"""存入缓存"""
|
|
47
97
|
if self.cache:
|
|
48
98
|
self.cache.put(key, value)
|
|
99
|
+
|
|
100
|
+
def _get_price_with_persistent_cache(self, symbol: str, sdate: str, edate: str,
|
|
101
|
+
freq: str, days: int, fq: str,
|
|
102
|
+
fetch_func) -> Tuple[str, str, pd.DataFrame]:
|
|
103
|
+
"""
|
|
104
|
+
使用持久化缓存的智能扩展逻辑
|
|
105
|
+
|
|
106
|
+
当请求的 edate 不在缓存中时,从缓存的最新日期向前扩展到 edate
|
|
107
|
+
当请求的 sdate 不在缓存中时,从缓存的最早日期向后扩展到 sdate
|
|
108
|
+
"""
|
|
109
|
+
cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
|
|
110
|
+
|
|
111
|
+
# 尝试从缓存获取
|
|
112
|
+
cached = self._get_cached(cache_key)
|
|
113
|
+
if cached:
|
|
114
|
+
_, name, cached_df = cached
|
|
115
|
+
|
|
116
|
+
# 检查是否需要扩展
|
|
117
|
+
if cached_df.empty or not isinstance(cached_df.index, pd.DatetimeIndex):
|
|
118
|
+
# 缓存为空或索引不是日期,直接获取新数据
|
|
119
|
+
result = fetch_func(symbol, sdate, edate, freq, days, fq)
|
|
120
|
+
self._put_cache(cache_key, result)
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
cached_earliest = cached_df.index.min()
|
|
124
|
+
cached_latest = cached_df.index.max()
|
|
125
|
+
request_sdate = pd.to_datetime(sdate) if sdate else None
|
|
126
|
+
request_edate = pd.to_datetime(edate) if edate else None
|
|
127
|
+
|
|
128
|
+
need_extend_forward = False # 需要向前扩展(更新日期)
|
|
129
|
+
need_extend_backward = False # 需要向后扩展(更早日期)
|
|
130
|
+
extend_sdate = sdate
|
|
131
|
+
extend_edate = edate
|
|
132
|
+
|
|
133
|
+
# 检查是否需要向前扩展
|
|
134
|
+
if request_edate and request_edate > cached_latest:
|
|
135
|
+
need_extend_forward = True
|
|
136
|
+
# 从缓存的最新日期+1天开始,扩展到请求的 edate
|
|
137
|
+
extend_sdate = (cached_latest + pd.Timedelta(days=1)).strftime('%Y-%m-%d')
|
|
138
|
+
extend_edate = edate
|
|
139
|
+
|
|
140
|
+
# 检查是否需要向后扩展
|
|
141
|
+
if request_sdate and request_sdate < cached_earliest:
|
|
142
|
+
need_extend_backward = True
|
|
143
|
+
# 从请求的 sdate 开始,扩展到缓存的最早日期-1天
|
|
144
|
+
extend_sdate = sdate
|
|
145
|
+
extend_edate = (cached_earliest - pd.Timedelta(days=1)).strftime('%Y-%m-%d')
|
|
146
|
+
|
|
147
|
+
# 如果需要扩展,获取缺失的数据
|
|
148
|
+
if need_extend_forward or need_extend_backward:
|
|
149
|
+
# 获取扩展的数据
|
|
150
|
+
extended_result = fetch_func(symbol, extend_sdate, extend_edate, freq, days, fq)
|
|
151
|
+
_, _, extended_df = extended_result
|
|
152
|
+
|
|
153
|
+
if not extended_df.empty:
|
|
154
|
+
# 合并数据
|
|
155
|
+
merged_df = pd.concat([cached_df, extended_df])
|
|
156
|
+
merged_df = merged_df[~merged_df.index.duplicated(keep='last')]
|
|
157
|
+
merged_df = merged_df.sort_index()
|
|
158
|
+
|
|
159
|
+
# 过滤到请求的日期范围
|
|
160
|
+
if request_sdate or request_edate:
|
|
161
|
+
if request_sdate and request_edate:
|
|
162
|
+
mask = (merged_df.index >= request_sdate) & (merged_df.index <= request_edate)
|
|
163
|
+
elif request_sdate:
|
|
164
|
+
mask = merged_df.index >= request_sdate
|
|
165
|
+
else:
|
|
166
|
+
mask = merged_df.index <= request_edate
|
|
167
|
+
merged_df = merged_df[mask]
|
|
168
|
+
|
|
169
|
+
result = (symbol, name, merged_df)
|
|
170
|
+
# 更新缓存(使用原始 key,PersistentCache 会智能合并)
|
|
171
|
+
self._put_cache(cache_key, result)
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
# 不需要扩展,直接返回缓存的数据
|
|
175
|
+
# 过滤到请求的日期范围
|
|
176
|
+
if request_sdate or request_edate:
|
|
177
|
+
if request_sdate and request_edate:
|
|
178
|
+
mask = (cached_df.index >= request_sdate) & (cached_df.index <= request_edate)
|
|
179
|
+
elif request_sdate:
|
|
180
|
+
mask = cached_df.index >= request_sdate
|
|
181
|
+
else:
|
|
182
|
+
mask = cached_df.index <= request_edate
|
|
183
|
+
filtered_df = cached_df[mask]
|
|
184
|
+
return (symbol, name, filtered_df)
|
|
185
|
+
|
|
186
|
+
return (symbol, name, cached_df)
|
|
187
|
+
|
|
188
|
+
# 缓存未命中,直接获取
|
|
189
|
+
if fetch_func:
|
|
190
|
+
result = fetch_func(symbol, sdate, edate, freq, days, fq)
|
|
191
|
+
self._put_cache(cache_key, result)
|
|
192
|
+
return result
|
|
193
|
+
else:
|
|
194
|
+
# 如果没有提供 fetch_func,返回空数据
|
|
195
|
+
return (symbol, '', pd.DataFrame())
|
|
49
196
|
|
rquote/markets/cn_stock.py
CHANGED
|
@@ -27,21 +27,20 @@ class CNStockMarket(Market):
|
|
|
27
27
|
"""获取A股价格数据"""
|
|
28
28
|
symbol = self.normalize_symbol(symbol)
|
|
29
29
|
|
|
30
|
-
#
|
|
31
|
-
cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
|
|
32
|
-
cached = self._get_cached(cache_key)
|
|
33
|
-
if cached:
|
|
34
|
-
return cached
|
|
35
|
-
|
|
36
|
-
# 特殊处理BK(板块)代码
|
|
30
|
+
# 特殊处理BK(板块)代码(不使用缓存)
|
|
37
31
|
if symbol[:2] == 'BK':
|
|
38
32
|
return self._get_bk_price(symbol)
|
|
39
33
|
|
|
40
|
-
# 特殊处理PT
|
|
34
|
+
# 特殊处理PT代码(不使用缓存)
|
|
41
35
|
if symbol[:2] == 'pt':
|
|
42
36
|
return self._get_pt_price(symbol, sdate, edate, freq, days, fq)
|
|
43
37
|
|
|
44
|
-
#
|
|
38
|
+
# 使用基类的缓存逻辑
|
|
39
|
+
return super().get_price(symbol, sdate, edate, freq, days, fq)
|
|
40
|
+
|
|
41
|
+
def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
|
|
42
|
+
freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
|
|
43
|
+
"""从数据源获取A股价格数据"""
|
|
45
44
|
try:
|
|
46
45
|
raw_data = self.data_source.fetch_kline(
|
|
47
46
|
symbol, freq=freq, sdate=sdate, edate=edate, days=days, fq=fq
|
|
@@ -51,9 +50,7 @@ class CNStockMarket(Market):
|
|
|
51
50
|
parser = KlineParser()
|
|
52
51
|
name, df = parser.parse_tencent_kline(raw_data, symbol)
|
|
53
52
|
|
|
54
|
-
|
|
55
|
-
self._put_cache(cache_key, result)
|
|
56
|
-
return result
|
|
53
|
+
return (symbol, name, df)
|
|
57
54
|
except (DataSourceError, ParseError) as e:
|
|
58
55
|
logger.warning(f'Failed to fetch {symbol} using new architecture: {e}')
|
|
59
56
|
# 降级到旧方法
|
|
@@ -82,7 +79,10 @@ class CNStockMarket(Market):
|
|
|
82
79
|
name = data['data']['name']
|
|
83
80
|
df = pd.DataFrame([i.split(',') for i in data['data']['klines']],
|
|
84
81
|
columns=['date', 'open', 'close', 'high', 'low', 'vol', 'money', 'p'])
|
|
85
|
-
df = df.set_index(['date'])
|
|
82
|
+
df = df.set_index(['date'])
|
|
83
|
+
# 转换数值列
|
|
84
|
+
for col in ['open', 'close', 'high', 'low', 'vol', 'money', 'p']:
|
|
85
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
86
86
|
|
|
87
87
|
result = (symbol, name, df)
|
|
88
88
|
self._put_cache(symbol, result)
|
rquote/markets/future.py
CHANGED
|
@@ -25,15 +25,19 @@ class FutureMarket(Market):
|
|
|
25
25
|
"""获取期货价格数据"""
|
|
26
26
|
symbol = self.normalize_symbol(symbol)
|
|
27
27
|
|
|
28
|
-
# 特殊处理BTC
|
|
28
|
+
# 特殊处理BTC(不使用缓存)
|
|
29
29
|
if symbol[2:5].lower() == 'btc':
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if cached:
|
|
35
|
-
return cached
|
|
30
|
+
if freq in ('min', '1min', 'minute'):
|
|
31
|
+
return self._get_btc_minute_price(symbol)
|
|
32
|
+
else:
|
|
33
|
+
return self._get_btc_price(symbol)
|
|
36
34
|
|
|
35
|
+
# 使用基类的缓存逻辑
|
|
36
|
+
return super().get_price(symbol, sdate, edate, freq, days, fq)
|
|
37
|
+
|
|
38
|
+
def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
|
|
39
|
+
freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
|
|
40
|
+
"""从数据源获取期货价格数据"""
|
|
37
41
|
future_code = symbol[2:] # 去掉'fu'前缀
|
|
38
42
|
|
|
39
43
|
try:
|
|
@@ -41,15 +45,13 @@ class FutureMarket(Market):
|
|
|
41
45
|
parser = KlineParser()
|
|
42
46
|
df = parser.parse_sina_future_kline(raw_data, freq=freq)
|
|
43
47
|
|
|
44
|
-
|
|
45
|
-
self._put_cache(cache_key, result)
|
|
46
|
-
return result
|
|
48
|
+
return (symbol, future_code, df)
|
|
47
49
|
except (DataSourceError, ParseError) as e:
|
|
48
50
|
logger.warning(f'Failed to fetch {symbol} using new architecture, falling back: {e}')
|
|
49
51
|
return self._get_price_fallback(symbol, future_code, freq)
|
|
50
52
|
|
|
51
53
|
def _get_btc_price(self, symbol: str) -> Tuple[str, str, pd.DataFrame]:
|
|
52
|
-
"""
|
|
54
|
+
"""获取比特币日线价格"""
|
|
53
55
|
url = 'https://quotes.sina.cn/fx/api/openapi.php/BtcService.getDayKLine?symbol=btcbtcusd'
|
|
54
56
|
response = hget(url)
|
|
55
57
|
if not response:
|
|
@@ -60,12 +62,95 @@ class FutureMarket(Market):
|
|
|
60
62
|
columns=['date', 'open', 'high', 'low', 'close', 'vol', 'amount'])
|
|
61
63
|
for col in ['open', 'high', 'low', 'close', 'vol', 'amount']:
|
|
62
64
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
63
|
-
df = df.set_index('date')
|
|
65
|
+
df = df.set_index('date')
|
|
64
66
|
|
|
65
67
|
result = (symbol, 'BTC', df)
|
|
66
68
|
self._put_cache(symbol, result)
|
|
67
69
|
return result
|
|
68
70
|
|
|
71
|
+
def _get_btc_minute_price(self, symbol: str, datalen: int = 1440) -> Tuple[str, str, pd.DataFrame]:
|
|
72
|
+
"""
|
|
73
|
+
获取比特币分钟级价格
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
symbol: 股票代码(如 'fuBTC')
|
|
77
|
+
datalen: 数据长度,默认1440(24小时,每分钟1条)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
(symbol, name, DataFrame)
|
|
81
|
+
"""
|
|
82
|
+
cache_key = f"{symbol}:min:{datalen}"
|
|
83
|
+
cached = self._get_cached(cache_key)
|
|
84
|
+
if cached:
|
|
85
|
+
return cached
|
|
86
|
+
|
|
87
|
+
url = f'https://quotes.sina.cn/fx/api/openapi.php/BtcService.getMinKline?symbol=btcbtcusd&scale=1&datalen={datalen}&callback=var%20_btcbtcusd'
|
|
88
|
+
response = hget(url)
|
|
89
|
+
if not response:
|
|
90
|
+
raise DataSourceError("Failed to fetch BTC minute data")
|
|
91
|
+
|
|
92
|
+
# 解析 JavaScript callback 格式: var _btcbtcusd({...})
|
|
93
|
+
text = response.text
|
|
94
|
+
|
|
95
|
+
# 移除开头的注释和脚本标签
|
|
96
|
+
if '*/' in text:
|
|
97
|
+
text = text.split('*/', 1)[1]
|
|
98
|
+
text = text.strip()
|
|
99
|
+
|
|
100
|
+
# 查找 JSON 部分(从第一个 { 开始)
|
|
101
|
+
json_start = text.find('{')
|
|
102
|
+
if json_start == -1:
|
|
103
|
+
raise DataSourceError("Invalid BTC minute data format: no JSON found")
|
|
104
|
+
|
|
105
|
+
# 提取 JSON 部分,需要找到匹配的最后一个 }
|
|
106
|
+
# 格式: var _btcbtcusd({...}) 或 var _btcbtcusd({...});
|
|
107
|
+
json_str = text[json_start:]
|
|
108
|
+
# 移除末尾可能的 ); 或 )
|
|
109
|
+
json_str = json_str.rstrip(');').rstrip(')')
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
data = json.loads(json_str)
|
|
113
|
+
except json.JSONDecodeError as e:
|
|
114
|
+
raise DataSourceError(f"Failed to parse BTC minute data JSON: {e}")
|
|
115
|
+
|
|
116
|
+
# 检查返回状态
|
|
117
|
+
if data.get('result', {}).get('status', {}).get('code') != 0:
|
|
118
|
+
raise DataSourceError(f"BTC API error: {data.get('result', {}).get('status', {}).get('msg', 'Unknown error')}")
|
|
119
|
+
|
|
120
|
+
# 提取数据
|
|
121
|
+
kline_data = data.get('result', {}).get('data', [])
|
|
122
|
+
if not kline_data:
|
|
123
|
+
raise DataSourceError("No BTC minute data returned")
|
|
124
|
+
|
|
125
|
+
# 转换为 DataFrame
|
|
126
|
+
# 数据格式: {"d":"2025-11-16 15:35:00","o":"95835.37","h":"95919.90","l":"95835.37","c":"95919.89","v":"6","a":"551441.4297"}
|
|
127
|
+
records = []
|
|
128
|
+
for item in kline_data:
|
|
129
|
+
records.append({
|
|
130
|
+
'date': item.get('d', ''),
|
|
131
|
+
'open': item.get('o', '0'),
|
|
132
|
+
'high': item.get('h', '0'),
|
|
133
|
+
'low': item.get('l', '0'),
|
|
134
|
+
'close': item.get('c', '0'),
|
|
135
|
+
'vol': item.get('v', '0'),
|
|
136
|
+
'amount': item.get('a', '0')
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
df = pd.DataFrame(records)
|
|
140
|
+
if df.empty:
|
|
141
|
+
raise DataSourceError("Empty BTC minute data")
|
|
142
|
+
|
|
143
|
+
# 转换数据类型
|
|
144
|
+
for col in ['open', 'high', 'low', 'close', 'vol', 'amount']:
|
|
145
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
146
|
+
|
|
147
|
+
# 设置索引
|
|
148
|
+
df = df.set_index('date')
|
|
149
|
+
|
|
150
|
+
result = (symbol, 'BTC', df)
|
|
151
|
+
self._put_cache(cache_key, result)
|
|
152
|
+
return result
|
|
153
|
+
|
|
69
154
|
def _get_price_fallback(self, symbol: str, future_code: str, freq: str) -> Tuple[str, str, pd.DataFrame]:
|
|
70
155
|
"""降级方法"""
|
|
71
156
|
from ..utils.helpers import load_js_var_json
|
|
@@ -84,7 +169,7 @@ class FutureMarket(Market):
|
|
|
84
169
|
df.columns = ['date', 'open', 'high', 'low', 'close', 'vol', 'p', 's']
|
|
85
170
|
for col in ['open', 'high', 'low', 'close', 'vol', 'p', 's']:
|
|
86
171
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
87
|
-
df = df.set_index('date')
|
|
172
|
+
df = df.set_index('date')
|
|
88
173
|
result = (symbol, future_code, df)
|
|
89
174
|
|
|
90
175
|
self._put_cache(f"{symbol}:{freq}", result)
|
rquote/markets/hk_stock.py
CHANGED
|
@@ -19,27 +19,18 @@ class HKStockMarket(Market):
|
|
|
19
19
|
return 'hk' + symbol
|
|
20
20
|
return symbol
|
|
21
21
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
"""
|
|
25
|
-
symbol = self.normalize_symbol(symbol)
|
|
26
|
-
|
|
27
|
-
cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
|
|
28
|
-
cached = self._get_cached(cache_key)
|
|
29
|
-
if cached:
|
|
30
|
-
return cached
|
|
31
|
-
|
|
22
|
+
def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
|
|
23
|
+
freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
|
|
24
|
+
"""从数据源获取港股价格数据"""
|
|
32
25
|
try:
|
|
33
26
|
raw_data = self.data_source.fetch_kline(
|
|
34
27
|
symbol, freq=freq, sdate=sdate, edate=edate, days=days, fq=fq
|
|
35
28
|
)
|
|
36
29
|
|
|
37
30
|
parser = KlineParser()
|
|
38
|
-
name, df = parser.parse_tencent_kline(raw_data, symbol)
|
|
31
|
+
name, df = parser.parse_tencent_kline(raw_data, symbol, fq=fq)
|
|
39
32
|
|
|
40
|
-
|
|
41
|
-
self._put_cache(cache_key, result)
|
|
42
|
-
return result
|
|
33
|
+
return (symbol, name, df)
|
|
43
34
|
except (DataSourceError, ParseError) as e:
|
|
44
35
|
logger.warning(f'Failed to fetch {symbol}: {e}')
|
|
45
36
|
raise
|
rquote/markets/us_stock.py
CHANGED
|
@@ -25,15 +25,16 @@ class USStockMarket(Market):
|
|
|
25
25
|
"""获取美股价格数据"""
|
|
26
26
|
symbol = self.normalize_symbol(symbol)
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
cached = self._get_cached(cache_key)
|
|
30
|
-
if cached:
|
|
31
|
-
return cached
|
|
32
|
-
|
|
33
|
-
# 特殊处理分钟数据
|
|
28
|
+
# 特殊处理分钟数据(不使用缓存)
|
|
34
29
|
if freq in ('min', '1min', 'minute'):
|
|
35
30
|
return self._get_minute_data(symbol)
|
|
36
31
|
|
|
32
|
+
# 使用基类的缓存逻辑
|
|
33
|
+
return super().get_price(symbol, sdate, edate, freq, days, fq)
|
|
34
|
+
|
|
35
|
+
def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
|
|
36
|
+
freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
|
|
37
|
+
"""从数据源获取美股价格数据"""
|
|
37
38
|
try:
|
|
38
39
|
raw_data = self.data_source.fetch_kline(
|
|
39
40
|
symbol, freq=freq, sdate=sdate, edate=edate, days=days, fq=fq
|
|
@@ -42,9 +43,7 @@ class USStockMarket(Market):
|
|
|
42
43
|
parser = KlineParser()
|
|
43
44
|
name, df = parser.parse_tencent_kline(raw_data, symbol)
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
self._put_cache(cache_key, result)
|
|
47
|
-
return result
|
|
46
|
+
return (symbol, name, df)
|
|
48
47
|
except (DataSourceError, ParseError) as e:
|
|
49
48
|
logger.warning(f'Failed to fetch {symbol}: {e}')
|
|
50
49
|
raise
|
rquote/parsers/kline.py
CHANGED
|
@@ -11,7 +11,7 @@ class KlineParser:
|
|
|
11
11
|
"""K线数据解析器"""
|
|
12
12
|
|
|
13
13
|
@staticmethod
|
|
14
|
-
def parse_tencent_kline(data: Dict[str, Any], symbol: str) -> Tuple[str, pd.DataFrame]:
|
|
14
|
+
def parse_tencent_kline(data: Dict[str, Any], symbol: str, fq: str = 'qfq') -> Tuple[str, pd.DataFrame]:
|
|
15
15
|
"""
|
|
16
16
|
解析腾讯K线数据
|
|
17
17
|
|
|
@@ -27,9 +27,17 @@ class KlineParser:
|
|
|
27
27
|
if not symbol_data:
|
|
28
28
|
raise ParseError(f"No data for symbol {symbol}")
|
|
29
29
|
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
# 查找时间键,优先使用与fq参数匹配的键
|
|
31
|
+
# 根据fq参数确定优先级:qfq -> qfqday优先,hfq -> hfqday优先,否则day优先
|
|
32
|
+
if fq == 'qfq':
|
|
33
|
+
time_keys = ['qfqday', 'day', 'hfqday', 'qfqweek', 'week', 'hfqweek',
|
|
34
|
+
'qfqmonth', 'month', 'hfqmonth']
|
|
35
|
+
elif fq == 'hfq':
|
|
36
|
+
time_keys = ['hfqday', 'day', 'qfqday', 'hfqweek', 'week', 'qfqweek',
|
|
37
|
+
'hfqmonth', 'month', 'qfqmonth']
|
|
38
|
+
else:
|
|
39
|
+
time_keys = ['day', 'qfqday', 'hfqday', 'week', 'qfqweek', 'hfqweek',
|
|
40
|
+
'month', 'qfqmonth', 'hfqmonth']
|
|
33
41
|
tk = None
|
|
34
42
|
for tkt in time_keys:
|
|
35
43
|
if tkt in symbol_data:
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rquote
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch
|
|
5
|
-
Requires-Python: >=3.
|
|
5
|
+
Requires-Python: >=3.9.0
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: build>=0.9.0
|
|
8
8
|
Requires-Dist: httpx>=0.20.0
|
|
9
9
|
Requires-Dist: pandas>=1.0.0
|
|
10
10
|
Requires-Dist: setuptools>=42
|
|
11
11
|
Requires-Dist: twine>=3.8.0
|
|
12
|
+
Provides-Extra: persistent
|
|
13
|
+
Requires-Dist: duckdb>=0.9.0; extra == "persistent"
|
|
12
14
|
|
|
13
15
|
# rquote
|
|
14
16
|
|
|
@@ -60,17 +62,67 @@ sid, name, df = get_price('sz000001', sdate='2024-01-01', edate='2024-02-01')
|
|
|
60
62
|
|
|
61
63
|
### 使用缓存
|
|
62
64
|
|
|
65
|
+
#### 内存缓存(MemoryCache)
|
|
66
|
+
|
|
63
67
|
```python
|
|
64
68
|
from rquote import get_price, MemoryCache
|
|
65
69
|
|
|
66
70
|
# 创建缓存实例
|
|
67
71
|
cache = MemoryCache(ttl=3600) # 缓存1小时
|
|
68
72
|
|
|
69
|
-
# 使用缓存(通过dd
|
|
70
|
-
|
|
71
|
-
|
|
73
|
+
# 使用缓存(通过dd参数传递MemoryCache实例)
|
|
74
|
+
sid, name, df = get_price('sh000001', dd=cache)
|
|
75
|
+
|
|
76
|
+
# 注意:MemoryCache 是内存缓存,数据仅在当前进程运行期间有效
|
|
77
|
+
# 脚本运行结束后,缓存数据会丢失
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**缓存生命周期说明:**
|
|
81
|
+
- `MemoryCache` 是纯内存缓存,数据存储在进程内存中
|
|
82
|
+
- 缓存数据仅在当前脚本运行期间有效
|
|
83
|
+
- 脚本运行结束后,所有缓存数据会丢失
|
|
84
|
+
|
|
85
|
+
#### 持久化缓存(PersistentCache)
|
|
86
|
+
|
|
87
|
+
持久化缓存支持跨进程/跨运行的缓存持久化,数据会保存到本地文件。
|
|
88
|
+
|
|
89
|
+
**安装可选依赖:**
|
|
90
|
+
```bash
|
|
91
|
+
pip install rquote[persistent]
|
|
92
|
+
# 或
|
|
93
|
+
uv pip install "rquote[persistent]"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**使用持久化缓存:**
|
|
97
|
+
```python
|
|
98
|
+
from rquote import get_price, PersistentCache
|
|
99
|
+
|
|
100
|
+
# 创建持久化缓存实例
|
|
101
|
+
# 默认使用 duckdb(如果已安装),否则使用 pickle 文件
|
|
102
|
+
cache = PersistentCache(ttl=86400) # 缓存24小时,默认路径:~/.rquote/cache.db
|
|
103
|
+
|
|
104
|
+
# 或指定自定义路径
|
|
105
|
+
cache = PersistentCache(db_path='./my_cache.db', use_duckdb=True)
|
|
106
|
+
|
|
107
|
+
# 使用缓存
|
|
108
|
+
sid, name, df = get_price('sh000001', dd=cache)
|
|
109
|
+
|
|
110
|
+
# 持久化缓存支持智能扩展:
|
|
111
|
+
# - 当请求的结束日期不在缓存中时,会自动从缓存的最新日期向前扩展
|
|
112
|
+
# - 当请求的开始日期不在缓存中时,会自动从缓存的最早日期向后扩展
|
|
113
|
+
# - 数据会自动合并,避免重复请求
|
|
114
|
+
|
|
115
|
+
# 关闭缓存(可选,程序退出时会自动保存)
|
|
116
|
+
cache.close()
|
|
72
117
|
```
|
|
73
118
|
|
|
119
|
+
**持久化缓存特性:**
|
|
120
|
+
- ✅ 跨进程/跨运行持久化:数据保存在本地文件,下次运行仍可使用
|
|
121
|
+
- ✅ 智能数据合并:相同股票的数据会自动合并,key 不包含日期范围
|
|
122
|
+
- ✅ 智能扩展:当请求的日期范围超出缓存时,自动扩展并合并数据
|
|
123
|
+
- ✅ 支持 TTL:可设置缓存过期时间
|
|
124
|
+
- ✅ 可选 duckdb:如果安装了 duckdb,使用 duckdb 存储(性能更好),否则使用 pickle 文件
|
|
125
|
+
|
|
74
126
|
## 主要功能
|
|
75
127
|
|
|
76
128
|
### 历史价格数据获取
|
|
@@ -1,16 +1,17 @@
|
|
|
1
|
-
rquote/__init__.py,sha256
|
|
1
|
+
rquote/__init__.py,sha256=HMXqZ_wfGoRqw1V3xm2MyBGYKB9ooGWIRnk60bisLZo,2370
|
|
2
2
|
rquote/config.py,sha256=noep_VzY_nJehnkPQb4mkwzpeYLwkU1riqofQJ6Hhw0,1108
|
|
3
3
|
rquote/exceptions.py,sha256=lJH2GC5dDhMoW_OtlBc03wlUn684-7jNPyF1NjmfVIE,569
|
|
4
4
|
rquote/plots.py,sha256=UQn4sjhIzVwagfhUDM738b2HHjKo4tRdU2UCs_1-FbY,2341
|
|
5
5
|
rquote/utils.py,sha256=bH0ZFIo-ZelNztzPS6BXFShXE3yGA9USI_P9INN0Y-s,310
|
|
6
6
|
rquote/api/__init__.py,sha256=ptizO--im80HaxlzxkJo9BKdJPEnbu00R9UDgcoA0mU,656
|
|
7
7
|
rquote/api/lists.py,sha256=fRebS02Fi0qe6KpWBA-9W1UG0It6__DmRlNimtMa7L8,5331
|
|
8
|
-
rquote/api/price.py,sha256=
|
|
8
|
+
rquote/api/price.py,sha256=I5lZl6cUQRlE4AtzNbR-uGZt1ho9vgP1cgNFDjaigMA,3575
|
|
9
9
|
rquote/api/stock_info.py,sha256=912ICdIBr8z2lKWDbq3gG0E94czTPvbx9aXsKUi-QkE,1537
|
|
10
10
|
rquote/api/tick.py,sha256=nEcjuAjtBHUaD8KPRLg643piVa21PhKDQvkVWNwvvME,1431
|
|
11
|
-
rquote/cache/__init__.py,sha256=
|
|
11
|
+
rquote/cache/__init__.py,sha256=S393I5Wmp0QooaRka9n7bvDUdEbg3jUhm6u815T86rM,317
|
|
12
12
|
rquote/cache/base.py,sha256=orzG4Yo-6gzVG027j1-LTZPT718JohnCdLDnOLoLUQ4,515
|
|
13
13
|
rquote/cache/memory.py,sha256=7z4keb3q91pzI4ASQWy1MU8T5nbWLCEUjJcStv_3hvk,1933
|
|
14
|
+
rquote/cache/persistent.py,sha256=_ASNobFDYvZ51XIux3NZN4M24Z-rs6gOcWKGRpbd6tg,15741
|
|
14
15
|
rquote/data_sources/__init__.py,sha256=WCe1aam4677jM5G6wP4a-dQFTeBzcU5PJqsKieAVMBo,215
|
|
15
16
|
rquote/data_sources/base.py,sha256=JuKsTMxH7y8yRxHg3JbLzQwXPr43rS4pnwc5625u2U4,443
|
|
16
17
|
rquote/data_sources/sina.py,sha256=T_3Dl0Mwlhx8CKRJll_UKobYecRWltGaIOiGkpHS43Q,3300
|
|
@@ -18,21 +19,21 @@ rquote/data_sources/tencent.py,sha256=ayt1O85pheLwzX3z5c6Qij1NrmUywcsz6YcSVzdDoM
|
|
|
18
19
|
rquote/factors/__init__.py,sha256=_ZbH2XxYtXwCJpvRVdNvGncoPSpMqrtlYmf1_fMGIjM,116
|
|
19
20
|
rquote/factors/technical.py,sha256=dPDs3pDEDRV9iQJBrSoKpGFLQMjOqyoBdN2rUntpOUU,4235
|
|
20
21
|
rquote/markets/__init__.py,sha256=k4F8cZgb-phqemMqhZXFPdOKsR4P--DD3d5i21vKhbg,365
|
|
21
|
-
rquote/markets/base.py,sha256=
|
|
22
|
-
rquote/markets/cn_stock.py,sha256=
|
|
22
|
+
rquote/markets/base.py,sha256=EYd9sDK0tfqtwXTXY9E6RtniWp0JNZ3REYGQ0b1Q7xU,8151
|
|
23
|
+
rquote/markets/cn_stock.py,sha256=nu2ebTE4a6FAJkvpMN0FEPuqwom_hqTRjnUg96cQGKc,8320
|
|
23
24
|
rquote/markets/factory.py,sha256=4Txpuok0LBOLT_vAiIU-NslwVnYF7sKHCdlacAboxpo,2875
|
|
24
|
-
rquote/markets/future.py,sha256=
|
|
25
|
-
rquote/markets/hk_stock.py,sha256=
|
|
26
|
-
rquote/markets/us_stock.py,sha256=
|
|
25
|
+
rquote/markets/future.py,sha256=yGMyUu9Fv75jbzPbvW6_36otEeebSij7vnzow_zyEn8,7358
|
|
26
|
+
rquote/markets/hk_stock.py,sha256=AhRJpWp027ACew9ogxkVCJXbqbYQ1AkbFwDJccXbvAs,1183
|
|
27
|
+
rquote/markets/us_stock.py,sha256=GT4IxCMTgxb0JSkDa6acZ6PpHLhK6lrskI0ftiYxGCA,2603
|
|
27
28
|
rquote/parsers/__init__.py,sha256=q4g-FgpzxKBPfhJiQH3B5MEeZWUIXlyre-vAnOnfYmA,110
|
|
28
|
-
rquote/parsers/kline.py,sha256=
|
|
29
|
+
rquote/parsers/kline.py,sha256=g6k8W76-4hpYsuBgvwmb5G6ZkzHOJDX-JrVVXYksw4c,4020
|
|
29
30
|
rquote/utils/__init__.py,sha256=-ZHABqFHQeJrCCsgnqEYWR57jl7GduCKn2V3hpFi-pE,348
|
|
30
31
|
rquote/utils/date.py,sha256=nhK3xQ2kFvKhdkPw-2HR2V0PSzBfXmX8L7laG7VmG2E,913
|
|
31
32
|
rquote/utils/helpers.py,sha256=V07n9BtRS8bEJH023Kca78-unk7iD3B9hn2UjELetYs,354
|
|
32
33
|
rquote/utils/http.py,sha256=X0Alhnu0CNqyQeOt6ivUWmh2XwrWxXd2lSpQOKDdnzw,3249
|
|
33
34
|
rquote/utils/logging.py,sha256=cbeRH4ODazn7iyQmGoEBT2lH5LX4Ca3zDfs_20J1T28,566
|
|
34
35
|
rquote/utils/web.py,sha256=I8_pcThW6VUvahuRHdtp32iZwr85hEt1hB6TgznMy_U,3854
|
|
35
|
-
rquote-0.
|
|
36
|
-
rquote-0.
|
|
37
|
-
rquote-0.
|
|
38
|
-
rquote-0.
|
|
36
|
+
rquote-0.4.0.dist-info/METADATA,sha256=vdD7qOgz0sX7iiQJ-jHpWNKuFCcMzk4j4cYFuxB-6iw,13213
|
|
37
|
+
rquote-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
+
rquote-0.4.0.dist-info/top_level.txt,sha256=CehAiaZx7Fo8HGoV2zd5GhILUW1jQEN8YS-cWMlrK9Y,7
|
|
39
|
+
rquote-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|