rquote 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rquote/__init__.py CHANGED
@@ -35,6 +35,11 @@ from .plots import PlotUtils
35
35
  from . import config
36
36
  from . import exceptions
37
37
  from .cache import MemoryCache, Cache
38
+ # 尝试导入持久化缓存(可选依赖)
39
+ try:
40
+ from .cache import PersistentCache
41
+ except ImportError:
42
+ PersistentCache = None
38
43
  from .utils.http import HTTPClient
39
44
 
40
45
 
@@ -93,5 +98,6 @@ __all__ = [
93
98
  'exceptions',
94
99
  'MemoryCache',
95
100
  'Cache',
101
+ 'PersistentCache',
96
102
  'HTTPClient',
97
103
  ]
rquote/cache/__init__.py CHANGED
@@ -5,5 +5,10 @@
5
5
  from .base import Cache
6
6
  from .memory import MemoryCache
7
7
 
8
- __all__ = ['Cache', 'MemoryCache']
8
+ # 尝试导入持久化缓存(可选依赖)
9
+ try:
10
+ from .persistent import PersistentCache
11
+ __all__ = ['Cache', 'MemoryCache', 'PersistentCache']
12
+ except ImportError:
13
+ __all__ = ['Cache', 'MemoryCache']
9
14
 
@@ -0,0 +1,421 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 持久化缓存实现
4
+ """
5
+ import os
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Optional, Any, Tuple
9
+ import pandas as pd
10
+ from .base import Cache
11
+
12
+ # 尝试导入 duckdb(可选依赖)
13
+ try:
14
+ import duckdb
15
+ DUCKDB_AVAILABLE = True
16
+ except ImportError:
17
+ DUCKDB_AVAILABLE = False
18
+ duckdb = None
19
+
20
+
21
+ class PersistentCache(Cache):
22
+ """持久化缓存实现,使用 duckdb 或文件系统存储数据"""
23
+
24
+ def __init__(self, db_path: Optional[str] = None, use_duckdb: bool = True, ttl: Optional[int] = None):
25
+ """
26
+ 初始化持久化缓存
27
+
28
+ Args:
29
+ db_path: 数据库文件路径,默认为 ~/.rquote/cache.db
30
+ use_duckdb: 是否使用 duckdb(如果可用),否则使用 pickle 文件
31
+ ttl: 默认过期时间(秒),None 表示不过期
32
+ """
33
+ self.use_duckdb = use_duckdb and DUCKDB_AVAILABLE
34
+ self.ttl = ttl
35
+
36
+ if db_path is None:
37
+ # 默认路径:~/.rquote/cache.db 或 ~/.rquote/cache.pkl
38
+ home = Path.home()
39
+ cache_dir = home / '.rquote'
40
+ cache_dir.mkdir(exist_ok=True)
41
+ if self.use_duckdb:
42
+ db_path = str(cache_dir / 'cache.db')
43
+ else:
44
+ db_path = str(cache_dir / 'cache.pkl')
45
+
46
+ self.db_path = db_path
47
+
48
+ if self.use_duckdb:
49
+ self._init_duckdb()
50
+ else:
51
+ self._init_pickle()
52
+
53
+ def _init_duckdb(self):
54
+ """初始化 duckdb 数据库"""
55
+ self.conn = duckdb.connect(self.db_path)
56
+ # 创建缓存表
57
+ self.conn.execute("""
58
+ CREATE TABLE IF NOT EXISTS cache_data (
59
+ cache_key TEXT PRIMARY KEY,
60
+ symbol TEXT NOT NULL,
61
+ name TEXT,
62
+ data BLOB,
63
+ earliest_date TEXT,
64
+ latest_date TEXT,
65
+ freq TEXT,
66
+ fq TEXT,
67
+ updated_at TIMESTAMP,
68
+ expire_at TIMESTAMP
69
+ )
70
+ """)
71
+ self.conn.execute("""
72
+ CREATE INDEX IF NOT EXISTS idx_symbol_freq_fq
73
+ ON cache_data(symbol, freq, fq)
74
+ """)
75
+
76
+ def _init_pickle(self):
77
+ """初始化 pickle 存储"""
78
+ import pickle
79
+ self.pickle = pickle
80
+ if os.path.exists(self.db_path):
81
+ try:
82
+ with open(self.db_path, 'rb') as f:
83
+ self._cache_data = self.pickle.load(f)
84
+ except:
85
+ self._cache_data = {}
86
+ else:
87
+ self._cache_data = {}
88
+
89
+ def _save_pickle(self):
90
+ """保存 pickle 数据"""
91
+ import pickle
92
+ with open(self.db_path, 'wb') as f:
93
+ self.pickle.dump(self._cache_data, f)
94
+
95
+ def _extract_key_parts(self, key: str) -> Tuple[str, str, str, str, str]:
96
+ """
97
+ 从完整 key 中提取各部分
98
+
99
+ Args:
100
+ key: 完整 key,格式如 "symbol:sdate:edate:freq:days:fq"
101
+
102
+ Returns:
103
+ (symbol, sdate, edate, freq, fq)
104
+ """
105
+ parts = key.split(':')
106
+ if len(parts) >= 6:
107
+ return parts[0], parts[1], parts[2], parts[3], parts[5]
108
+ elif len(parts) >= 4:
109
+ return parts[0], parts[1] if len(parts) > 1 else '', parts[2] if len(parts) > 2 else '', parts[3], parts[4] if len(parts) > 4 else 'qfq'
110
+ else:
111
+ return parts[0] if parts else '', '', '', 'day', 'qfq'
112
+
113
+ def _get_base_key(self, symbol: str, freq: str, fq: str) -> str:
114
+ """生成基础 key(不包含日期)"""
115
+ return f"{symbol}:{freq}:{fq}"
116
+
117
+ def _parse_date(self, date_str: str) -> Optional[pd.Timestamp]:
118
+ """解析日期字符串"""
119
+ if not date_str:
120
+ return None
121
+ try:
122
+ return pd.to_datetime(date_str)
123
+ except:
124
+ return None
125
+
126
+ def _get_dataframe_date_range(self, df: pd.DataFrame) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
127
+ """获取 DataFrame 的日期范围"""
128
+ if df.empty or not isinstance(df.index, pd.DatetimeIndex):
129
+ return None, None
130
+ return df.index.min(), df.index.max()
131
+
132
+ def _filter_dataframe_by_date(self, df: pd.DataFrame, sdate: Optional[str] = None,
133
+ edate: Optional[str] = None) -> pd.DataFrame:
134
+ """根据日期范围过滤 DataFrame"""
135
+ if df.empty:
136
+ return df
137
+
138
+ if not isinstance(df.index, pd.DatetimeIndex):
139
+ return df
140
+
141
+ start_date = self._parse_date(sdate) if sdate else None
142
+ end_date = self._parse_date(edate) if edate else None
143
+
144
+ if start_date is not None and end_date is not None:
145
+ mask = (df.index >= start_date) & (df.index <= end_date)
146
+ return df[mask]
147
+ elif start_date is not None:
148
+ return df[df.index >= start_date]
149
+ elif end_date is not None:
150
+ return df[df.index <= end_date]
151
+ else:
152
+ return df
153
+
154
+ def _merge_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
155
+ """合并两个 DataFrame,去重并排序"""
156
+ if df1.empty:
157
+ return df2
158
+ if df2.empty:
159
+ return df1
160
+
161
+ # 合并并去重
162
+ combined = pd.concat([df1, df2])
163
+ combined = combined[~combined.index.duplicated(keep='last')]
164
+ combined = combined.sort_index()
165
+ return combined
166
+
167
+ def get(self, key: str) -> Optional[Any]:
168
+ """
169
+ 获取缓存数据
170
+
171
+ Args:
172
+ key: 缓存 key,格式如 "symbol:sdate:edate:freq:days:fq"
173
+
174
+ Returns:
175
+ (symbol, name, DataFrame) 或 None
176
+ """
177
+ symbol, sdate, edate, freq, fq = self._extract_key_parts(key)
178
+ base_key = self._get_base_key(symbol, freq, fq)
179
+
180
+ if self.use_duckdb:
181
+ return self._get_duckdb(base_key, symbol, sdate, edate, freq, fq)
182
+ else:
183
+ return self._get_pickle(base_key, symbol, sdate, edate, freq, fq)
184
+
185
+ def _get_duckdb(self, base_key: str, symbol: str, sdate: str, edate: str,
186
+ freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
187
+ """从 duckdb 获取数据"""
188
+ result = self.conn.execute("""
189
+ SELECT name, data, earliest_date, latest_date, expire_at
190
+ FROM cache_data
191
+ WHERE cache_key = ?
192
+ """, [base_key]).fetchone()
193
+
194
+ if not result:
195
+ return None
196
+
197
+ name, data_blob, earliest_date, latest_date, expire_at = result
198
+
199
+ # 检查过期
200
+ if self.ttl and expire_at:
201
+ expire_ts = pd.to_datetime(expire_at)
202
+ if pd.Timestamp.now() > expire_ts:
203
+ self.delete(base_key)
204
+ return None
205
+
206
+ # 反序列化 DataFrame
207
+ import pickle
208
+ df = pickle.loads(data_blob)
209
+
210
+ # 获取缓存数据的日期范围
211
+ cached_earliest = self._parse_date(earliest_date)
212
+ cached_latest = self._parse_date(latest_date)
213
+
214
+ # 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
215
+ request_sdate = self._parse_date(sdate) if sdate else None
216
+ request_edate = self._parse_date(edate) if edate else None
217
+
218
+ # 检查是否有重叠
219
+ if request_edate and cached_earliest and request_edate < cached_earliest:
220
+ # 请求的结束日期早于缓存的最早日期,无重叠
221
+ return None
222
+ if request_sdate and cached_latest and request_sdate > cached_latest:
223
+ # 请求的开始日期晚于缓存的最晚日期,无重叠
224
+ return None
225
+
226
+ # 有重叠,返回缓存中可用的部分数据
227
+ # 计算实际可用的日期范围
228
+ actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
229
+ actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
230
+
231
+ # 过滤数据
232
+ filtered_df = self._filter_dataframe_by_date(
233
+ df,
234
+ actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
235
+ actual_edate.strftime('%Y-%m-%d') if actual_edate else None
236
+ )
237
+
238
+ if filtered_df.empty:
239
+ return None
240
+
241
+ return (symbol, name, filtered_df)
242
+
243
+ def _get_pickle(self, base_key: str, symbol: str, sdate: str, edate: str,
244
+ freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
245
+ """从 pickle 文件获取数据"""
246
+ if base_key not in self._cache_data:
247
+ return None
248
+
249
+ cache_entry = self._cache_data[base_key]
250
+
251
+ # 检查过期
252
+ if self.ttl and 'expire_at' in cache_entry:
253
+ expire_ts = cache_entry['expire_at']
254
+ if pd.Timestamp.now() > expire_ts:
255
+ del self._cache_data[base_key]
256
+ self._save_pickle()
257
+ return None
258
+
259
+ df = cache_entry['data']
260
+ name = cache_entry.get('name', '')
261
+ earliest_date = cache_entry.get('earliest_date')
262
+ latest_date = cache_entry.get('latest_date')
263
+
264
+ # 获取缓存数据的日期范围
265
+ cached_earliest = self._parse_date(earliest_date)
266
+ cached_latest = self._parse_date(latest_date)
267
+
268
+ # 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
269
+ request_sdate = self._parse_date(sdate) if sdate else None
270
+ request_edate = self._parse_date(edate) if edate else None
271
+
272
+ # 检查是否有重叠
273
+ if request_edate and cached_earliest and request_edate < cached_earliest:
274
+ # 请求的结束日期早于缓存的最早日期,无重叠
275
+ return None
276
+ if request_sdate and cached_latest and request_sdate > cached_latest:
277
+ # 请求的开始日期晚于缓存的最晚日期,无重叠
278
+ return None
279
+
280
+ # 有重叠,返回缓存中可用的部分数据
281
+ # 计算实际可用的日期范围
282
+ actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
283
+ actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
284
+
285
+ # 过滤数据
286
+ filtered_df = self._filter_dataframe_by_date(
287
+ df,
288
+ actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
289
+ actual_edate.strftime('%Y-%m-%d') if actual_edate else None
290
+ )
291
+
292
+ if filtered_df.empty:
293
+ return None
294
+
295
+ return (symbol, name, filtered_df)
296
+
297
+ def put(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
298
+ """
299
+ 存储缓存数据
300
+
301
+ Args:
302
+ key: 缓存 key
303
+ value: (symbol, name, DataFrame) 元组
304
+ ttl: 过期时间(秒)
305
+ """
306
+ if not isinstance(value, tuple) or len(value) != 3:
307
+ return
308
+
309
+ symbol, name, df = value
310
+ if not isinstance(df, pd.DataFrame) or df.empty:
311
+ return
312
+
313
+ _, _, _, freq, fq = self._extract_key_parts(key)
314
+ base_key = self._get_base_key(symbol, freq, fq)
315
+
316
+ # 尝试从基础 key 获取完整数据并合并
317
+ existing = self._get_raw(base_key)
318
+ if existing:
319
+ _, existing_name, existing_df = existing
320
+ # 使用新数据的 name(如果有)
321
+ if not name:
322
+ name = existing_name
323
+ # 合并数据
324
+ df = self._merge_dataframes(existing_df, df)
325
+
326
+ # 获取日期范围
327
+ earliest_date, latest_date = self._get_dataframe_date_range(df)
328
+ earliest_str = earliest_date.strftime('%Y-%m-%d') if earliest_date else None
329
+ latest_str = latest_date.strftime('%Y-%m-%d') if latest_date else None
330
+
331
+ # 计算过期时间
332
+ expire_at = None
333
+ if ttl or self.ttl:
334
+ expire_seconds = (ttl or self.ttl)
335
+ expire_at = pd.Timestamp.now() + pd.Timedelta(seconds=expire_seconds)
336
+
337
+ if self.use_duckdb:
338
+ self._put_duckdb(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
339
+ else:
340
+ self._put_pickle(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
341
+
342
+ def _get_raw(self, base_key: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
343
+ """获取原始数据(不进行日期过滤)"""
344
+ if self.use_duckdb:
345
+ result = self.conn.execute("""
346
+ SELECT name, data
347
+ FROM cache_data
348
+ WHERE cache_key = ?
349
+ """, [base_key]).fetchone()
350
+
351
+ if not result:
352
+ return None
353
+
354
+ import pickle
355
+ df = pickle.loads(result[1])
356
+ return (base_key.split(':')[0], result[0], df)
357
+ else:
358
+ if base_key not in self._cache_data:
359
+ return None
360
+ cache_entry = self._cache_data[base_key]
361
+ return (base_key.split(':')[0], cache_entry.get('name', ''), cache_entry['data'])
362
+
363
+ def _put_duckdb(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
364
+ earliest_date: Optional[str], latest_date: Optional[str],
365
+ freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
366
+ """存储到 duckdb"""
367
+ import pickle
368
+ data_blob = pickle.dumps(df)
369
+
370
+ self.conn.execute("""
371
+ INSERT OR REPLACE INTO cache_data
372
+ (cache_key, symbol, name, data, earliest_date, latest_date, freq, fq, updated_at, expire_at)
373
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
374
+ """, [base_key, symbol, name, data_blob, earliest_date, latest_date, freq, fq,
375
+ pd.Timestamp.now(), expire_at])
376
+ self.conn.commit()
377
+
378
+ def _put_pickle(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
379
+ earliest_date: Optional[str], latest_date: Optional[str],
380
+ freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
381
+ """存储到 pickle 文件"""
382
+ self._cache_data[base_key] = {
383
+ 'symbol': symbol,
384
+ 'name': name,
385
+ 'data': df,
386
+ 'earliest_date': earliest_date,
387
+ 'latest_date': latest_date,
388
+ 'freq': freq,
389
+ 'fq': fq,
390
+ 'updated_at': pd.Timestamp.now(),
391
+ 'expire_at': expire_at
392
+ }
393
+ self._save_pickle()
394
+
395
+ def delete(self, key: str) -> None:
396
+ """删除缓存"""
397
+ symbol, _, _, freq, fq = self._extract_key_parts(key)
398
+ base_key = self._get_base_key(symbol, freq, fq)
399
+
400
+ if self.use_duckdb:
401
+ self.conn.execute("DELETE FROM cache_data WHERE cache_key = ?", [base_key])
402
+ self.conn.commit()
403
+ else:
404
+ if base_key in self._cache_data:
405
+ del self._cache_data[base_key]
406
+ self._save_pickle()
407
+
408
+ def clear(self) -> None:
409
+ """清空所有缓存"""
410
+ if self.use_duckdb:
411
+ self.conn.execute("DELETE FROM cache_data")
412
+ self.conn.commit()
413
+ else:
414
+ self._cache_data.clear()
415
+ self._save_pickle()
416
+
417
+ def close(self):
418
+ """关闭连接"""
419
+ if self.use_duckdb:
420
+ self.conn.close()
421
+
rquote/markets/base.py CHANGED
@@ -5,9 +5,16 @@
5
5
  from abc import ABC, abstractmethod
6
6
  from typing import Tuple, Optional
7
7
  import pandas as pd
8
+ from datetime import datetime, timedelta
8
9
  from ..cache import Cache
9
10
  from ..data_sources.base import DataSource
10
11
 
12
+ # 尝试导入持久化缓存(可选依赖)
13
+ try:
14
+ from ..cache.persistent import PersistentCache
15
+ except ImportError:
16
+ PersistentCache = None
17
+
11
18
 
12
19
  class Market(ABC):
13
20
  """市场基类"""
@@ -23,11 +30,54 @@ class Market(ABC):
23
30
  self.data_source = data_source
24
31
  self.cache = cache
25
32
 
26
- @abstractmethod
27
33
  def get_price(self, symbol: str, sdate: str = '', edate: str = '',
28
34
  freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
29
- """获取价格数据"""
30
- pass
35
+ """
36
+ 获取价格数据(模板方法,统一处理缓存逻辑)
37
+
38
+ 子类可以重写此方法以处理特殊情况,但建议调用 super().get_price() 来使用缓存功能
39
+ 或者实现 _fetch_price_data 方法,让基类自动处理缓存
40
+ """
41
+ symbol = self.normalize_symbol(symbol)
42
+ cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
43
+
44
+ # 如果是持久化缓存且是日级别数据,使用智能扩展逻辑
45
+ if PersistentCache and isinstance(self.cache, PersistentCache) and freq == 'day':
46
+ return self._get_price_with_persistent_cache(
47
+ symbol, sdate, edate, freq, days, fq,
48
+ lambda s, sd, ed, f, d, fq_param: self._fetch_price_data(s, sd, ed, f, d, fq_param)
49
+ )
50
+
51
+ # 普通缓存逻辑
52
+ cached = self._get_cached(cache_key)
53
+ if cached:
54
+ return cached
55
+
56
+ # 从数据源获取
57
+ result = self._fetch_price_data(symbol, sdate, edate, freq, days, fq)
58
+ self._put_cache(cache_key, result)
59
+ return result
60
+
61
+ def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
62
+ freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
63
+ """
64
+ 从数据源获取价格数据(子类需要实现)
65
+
66
+ 这个方法只负责获取数据,不处理缓存。缓存逻辑由 get_price 统一处理。
67
+
68
+ Args:
69
+ symbol: 股票代码(已标准化)
70
+ sdate: 开始日期
71
+ edate: 结束日期
72
+ freq: 频率
73
+ days: 天数
74
+ fq: 复权方式
75
+
76
+ Returns:
77
+ (symbol, name, DataFrame)
78
+ """
79
+ # 默认实现:子类应该重写此方法
80
+ return (symbol, '', pd.DataFrame())
31
81
 
32
82
  @abstractmethod
33
83
  def normalize_symbol(self, symbol: str) -> str:
@@ -46,4 +96,101 @@ class Market(ABC):
46
96
  """存入缓存"""
47
97
  if self.cache:
48
98
  self.cache.put(key, value)
99
+
100
+ def _get_price_with_persistent_cache(self, symbol: str, sdate: str, edate: str,
101
+ freq: str, days: int, fq: str,
102
+ fetch_func) -> Tuple[str, str, pd.DataFrame]:
103
+ """
104
+ 使用持久化缓存的智能扩展逻辑
105
+
106
+ 当请求的 edate 不在缓存中时,从缓存的最新日期向前扩展到 edate
107
+ 当请求的 sdate 不在缓存中时,从缓存的最早日期向后扩展到 sdate
108
+ """
109
+ cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
110
+
111
+ # 尝试从缓存获取
112
+ cached = self._get_cached(cache_key)
113
+ if cached:
114
+ _, name, cached_df = cached
115
+
116
+ # 检查是否需要扩展
117
+ if cached_df.empty or not isinstance(cached_df.index, pd.DatetimeIndex):
118
+ # 缓存为空或索引不是日期,直接获取新数据
119
+ result = fetch_func(symbol, sdate, edate, freq, days, fq)
120
+ self._put_cache(cache_key, result)
121
+ return result
122
+
123
+ cached_earliest = cached_df.index.min()
124
+ cached_latest = cached_df.index.max()
125
+ request_sdate = pd.to_datetime(sdate) if sdate else None
126
+ request_edate = pd.to_datetime(edate) if edate else None
127
+
128
+ need_extend_forward = False # 需要向前扩展(更新日期)
129
+ need_extend_backward = False # 需要向后扩展(更早日期)
130
+ extend_sdate = sdate
131
+ extend_edate = edate
132
+
133
+ # 检查是否需要向前扩展
134
+ if request_edate and request_edate > cached_latest:
135
+ need_extend_forward = True
136
+ # 从缓存的最新日期+1天开始,扩展到请求的 edate
137
+ extend_sdate = (cached_latest + pd.Timedelta(days=1)).strftime('%Y-%m-%d')
138
+ extend_edate = edate
139
+
140
+ # 检查是否需要向后扩展
141
+ if request_sdate and request_sdate < cached_earliest:
142
+ need_extend_backward = True
143
+ # 从请求的 sdate 开始,扩展到缓存的最早日期-1天
144
+ extend_sdate = sdate
145
+ extend_edate = (cached_earliest - pd.Timedelta(days=1)).strftime('%Y-%m-%d')
146
+
147
+ # 如果需要扩展,获取缺失的数据
148
+ if need_extend_forward or need_extend_backward:
149
+ # 获取扩展的数据
150
+ extended_result = fetch_func(symbol, extend_sdate, extend_edate, freq, days, fq)
151
+ _, _, extended_df = extended_result
152
+
153
+ if not extended_df.empty:
154
+ # 合并数据
155
+ merged_df = pd.concat([cached_df, extended_df])
156
+ merged_df = merged_df[~merged_df.index.duplicated(keep='last')]
157
+ merged_df = merged_df.sort_index()
158
+
159
+ # 过滤到请求的日期范围
160
+ if request_sdate or request_edate:
161
+ if request_sdate and request_edate:
162
+ mask = (merged_df.index >= request_sdate) & (merged_df.index <= request_edate)
163
+ elif request_sdate:
164
+ mask = merged_df.index >= request_sdate
165
+ else:
166
+ mask = merged_df.index <= request_edate
167
+ merged_df = merged_df[mask]
168
+
169
+ result = (symbol, name, merged_df)
170
+ # 更新缓存(使用原始 key,PersistentCache 会智能合并)
171
+ self._put_cache(cache_key, result)
172
+ return result
173
+
174
+ # 不需要扩展,直接返回缓存的数据
175
+ # 过滤到请求的日期范围
176
+ if request_sdate or request_edate:
177
+ if request_sdate and request_edate:
178
+ mask = (cached_df.index >= request_sdate) & (cached_df.index <= request_edate)
179
+ elif request_sdate:
180
+ mask = cached_df.index >= request_sdate
181
+ else:
182
+ mask = cached_df.index <= request_edate
183
+ filtered_df = cached_df[mask]
184
+ return (symbol, name, filtered_df)
185
+
186
+ return (symbol, name, cached_df)
187
+
188
+ # 缓存未命中,直接获取
189
+ if fetch_func:
190
+ result = fetch_func(symbol, sdate, edate, freq, days, fq)
191
+ self._put_cache(cache_key, result)
192
+ return result
193
+ else:
194
+ # 如果没有提供 fetch_func,返回空数据
195
+ return (symbol, '', pd.DataFrame())
49
196
 
@@ -27,21 +27,20 @@ class CNStockMarket(Market):
27
27
  """获取A股价格数据"""
28
28
  symbol = self.normalize_symbol(symbol)
29
29
 
30
- # 检查缓存
31
- cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
32
- cached = self._get_cached(cache_key)
33
- if cached:
34
- return cached
35
-
36
- # 特殊处理BK(板块)代码
30
+ # 特殊处理BK(板块)代码(不使用缓存)
37
31
  if symbol[:2] == 'BK':
38
32
  return self._get_bk_price(symbol)
39
33
 
40
- # 特殊处理PT代码
34
+ # 特殊处理PT代码(不使用缓存)
41
35
  if symbol[:2] == 'pt':
42
36
  return self._get_pt_price(symbol, sdate, edate, freq, days, fq)
43
37
 
44
- # 使用数据源获取数据
38
+ # 使用基类的缓存逻辑
39
+ return super().get_price(symbol, sdate, edate, freq, days, fq)
40
+
41
+ def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
42
+ freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
43
+ """从数据源获取A股价格数据"""
45
44
  try:
46
45
  raw_data = self.data_source.fetch_kline(
47
46
  symbol, freq=freq, sdate=sdate, edate=edate, days=days, fq=fq
@@ -51,9 +50,7 @@ class CNStockMarket(Market):
51
50
  parser = KlineParser()
52
51
  name, df = parser.parse_tencent_kline(raw_data, symbol)
53
52
 
54
- result = (symbol, name, df)
55
- self._put_cache(cache_key, result)
56
- return result
53
+ return (symbol, name, df)
57
54
  except (DataSourceError, ParseError) as e:
58
55
  logger.warning(f'Failed to fetch {symbol} using new architecture: {e}')
59
56
  # 降级到旧方法
rquote/markets/future.py CHANGED
@@ -25,18 +25,19 @@ class FutureMarket(Market):
25
25
  """获取期货价格数据"""
26
26
  symbol = self.normalize_symbol(symbol)
27
27
 
28
- # 特殊处理BTC
28
+ # 特殊处理BTC(不使用缓存)
29
29
  if symbol[2:5].lower() == 'btc':
30
30
  if freq in ('min', '1min', 'minute'):
31
31
  return self._get_btc_minute_price(symbol)
32
32
  else:
33
33
  return self._get_btc_price(symbol)
34
34
 
35
- cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}"
36
- cached = self._get_cached(cache_key)
37
- if cached:
38
- return cached
39
-
35
+ # 使用基类的缓存逻辑
36
+ return super().get_price(symbol, sdate, edate, freq, days, fq)
37
+
38
+ def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
39
+ freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
40
+ """从数据源获取期货价格数据"""
40
41
  future_code = symbol[2:] # 去掉'fu'前缀
41
42
 
42
43
  try:
@@ -44,9 +45,7 @@ class FutureMarket(Market):
44
45
  parser = KlineParser()
45
46
  df = parser.parse_sina_future_kline(raw_data, freq=freq)
46
47
 
47
- result = (symbol, future_code, df)
48
- self._put_cache(cache_key, result)
49
- return result
48
+ return (symbol, future_code, df)
50
49
  except (DataSourceError, ParseError) as e:
51
50
  logger.warning(f'Failed to fetch {symbol} using new architecture, falling back: {e}')
52
51
  return self._get_price_fallback(symbol, future_code, freq)
@@ -19,16 +19,9 @@ class HKStockMarket(Market):
19
19
  return 'hk' + symbol
20
20
  return symbol
21
21
 
22
- def get_price(self, symbol: str, sdate: str = '', edate: str = '',
23
- freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
24
- """获取港股价格数据"""
25
- symbol = self.normalize_symbol(symbol)
26
-
27
- cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
28
- cached = self._get_cached(cache_key)
29
- if cached:
30
- return cached
31
-
22
+ def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
23
+ freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
24
+ """从数据源获取港股价格数据"""
32
25
  try:
33
26
  raw_data = self.data_source.fetch_kline(
34
27
  symbol, freq=freq, sdate=sdate, edate=edate, days=days, fq=fq
@@ -37,9 +30,7 @@ class HKStockMarket(Market):
37
30
  parser = KlineParser()
38
31
  name, df = parser.parse_tencent_kline(raw_data, symbol, fq=fq)
39
32
 
40
- result = (symbol, name, df)
41
- self._put_cache(cache_key, result)
42
- return result
33
+ return (symbol, name, df)
43
34
  except (DataSourceError, ParseError) as e:
44
35
  logger.warning(f'Failed to fetch {symbol}: {e}')
45
36
  raise
@@ -25,15 +25,16 @@ class USStockMarket(Market):
25
25
  """获取美股价格数据"""
26
26
  symbol = self.normalize_symbol(symbol)
27
27
 
28
- cache_key = f"{symbol}:{sdate}:{edate}:{freq}:{days}:{fq}"
29
- cached = self._get_cached(cache_key)
30
- if cached:
31
- return cached
32
-
33
- # 特殊处理分钟数据
28
+ # 特殊处理分钟数据(不使用缓存)
34
29
  if freq in ('min', '1min', 'minute'):
35
30
  return self._get_minute_data(symbol)
36
31
 
32
+ # 使用基类的缓存逻辑
33
+ return super().get_price(symbol, sdate, edate, freq, days, fq)
34
+
35
+ def _fetch_price_data(self, symbol: str, sdate: str = '', edate: str = '',
36
+ freq: str = 'day', days: int = 320, fq: str = 'qfq') -> Tuple[str, str, pd.DataFrame]:
37
+ """从数据源获取美股价格数据"""
37
38
  try:
38
39
  raw_data = self.data_source.fetch_kline(
39
40
  symbol, freq=freq, sdate=sdate, edate=edate, days=days, fq=fq
@@ -42,9 +43,7 @@ class USStockMarket(Market):
42
43
  parser = KlineParser()
43
44
  name, df = parser.parse_tencent_kline(raw_data, symbol)
44
45
 
45
- result = (symbol, name, df)
46
- self._put_cache(cache_key, result)
47
- return result
46
+ return (symbol, name, df)
48
47
  except (DataSourceError, ParseError) as e:
49
48
  logger.warning(f'Failed to fetch {symbol}: {e}')
50
49
  raise
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rquote
3
- Version: 0.3.9
3
+ Version: 0.4.0
4
4
  Summary: Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch
5
- Requires-Python: >=3.6.1
5
+ Requires-Python: >=3.9.0
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: build>=0.9.0
8
8
  Requires-Dist: httpx>=0.20.0
9
9
  Requires-Dist: pandas>=1.0.0
10
10
  Requires-Dist: setuptools>=42
11
11
  Requires-Dist: twine>=3.8.0
12
+ Provides-Extra: persistent
13
+ Requires-Dist: duckdb>=0.9.0; extra == "persistent"
12
14
 
13
15
  # rquote
14
16
 
@@ -60,17 +62,67 @@ sid, name, df = get_price('sz000001', sdate='2024-01-01', edate='2024-02-01')
60
62
 
61
63
  ### 使用缓存
62
64
 
65
+ #### 内存缓存(MemoryCache)
66
+
63
67
  ```python
64
68
  from rquote import get_price, MemoryCache
65
69
 
66
70
  # 创建缓存实例
67
71
  cache = MemoryCache(ttl=3600) # 缓存1小时
68
72
 
69
- # 使用缓存(通过dd参数,向后兼容)
70
- cache_dict = {}
71
- sid, name, df = get_price('sh000001', dd=cache_dict)
73
+ # 使用缓存(通过dd参数传递MemoryCache实例)
74
+ sid, name, df = get_price('sh000001', dd=cache)
75
+
76
+ # 注意:MemoryCache 是内存缓存,数据仅在当前进程运行期间有效
77
+ # 脚本运行结束后,缓存数据会丢失
78
+ ```
79
+
80
+ **缓存生命周期说明:**
81
+ - `MemoryCache` 是纯内存缓存,数据存储在进程内存中
82
+ - 缓存数据仅在当前脚本运行期间有效
83
+ - 脚本运行结束后,所有缓存数据会丢失
84
+
85
+ #### 持久化缓存(PersistentCache)
86
+
87
+ 持久化缓存支持跨进程/跨运行的缓存持久化,数据会保存到本地文件。
88
+
89
+ **安装可选依赖:**
90
+ ```bash
91
+ pip install rquote[persistent]
92
+ # 或
93
+ uv pip install "rquote[persistent]"
94
+ ```
95
+
96
+ **使用持久化缓存:**
97
+ ```python
98
+ from rquote import get_price, PersistentCache
99
+
100
+ # 创建持久化缓存实例
101
+ # 默认使用 duckdb(如果已安装),否则使用 pickle 文件
102
+ cache = PersistentCache(ttl=86400) # 缓存24小时,默认路径:~/.rquote/cache.db
103
+
104
+ # 或指定自定义路径
105
+ cache = PersistentCache(db_path='./my_cache.db', use_duckdb=True)
106
+
107
+ # 使用缓存
108
+ sid, name, df = get_price('sh000001', dd=cache)
109
+
110
+ # 持久化缓存支持智能扩展:
111
+ # - 当请求的结束日期不在缓存中时,会自动从缓存的最新日期向前扩展
112
+ # - 当请求的开始日期不在缓存中时,会自动从缓存的最早日期向后扩展
113
+ # - 数据会自动合并,避免重复请求
114
+
115
+ # 关闭缓存(可选,程序退出时会自动保存)
116
+ cache.close()
72
117
  ```
73
118
 
119
+ **持久化缓存特性:**
120
+ - ✅ 跨进程/跨运行持久化:数据保存在本地文件,下次运行仍可使用
121
+ - ✅ 智能数据合并:相同股票的数据会自动合并,key 不包含日期范围
122
+ - ✅ 智能扩展:当请求的日期范围超出缓存时,自动扩展并合并数据
123
+ - ✅ 支持 TTL:可设置缓存过期时间
124
+ - ✅ 可选 duckdb:如果安装了 duckdb,使用 duckdb 存储(性能更好),否则使用 pickle 文件
125
+
74
126
  ## 主要功能
75
127
 
76
128
  ### 历史价格数据获取
@@ -1,4 +1,4 @@
1
- rquote/__init__.py,sha256=-U5Uq4eT3Hhl2EkVmBgr5TAfU-ZfFzpOaGeJafnhyos,2208
1
+ rquote/__init__.py,sha256=HMXqZ_wfGoRqw1V3xm2MyBGYKB9ooGWIRnk60bisLZo,2370
2
2
  rquote/config.py,sha256=noep_VzY_nJehnkPQb4mkwzpeYLwkU1riqofQJ6Hhw0,1108
3
3
  rquote/exceptions.py,sha256=lJH2GC5dDhMoW_OtlBc03wlUn684-7jNPyF1NjmfVIE,569
4
4
  rquote/plots.py,sha256=UQn4sjhIzVwagfhUDM738b2HHjKo4tRdU2UCs_1-FbY,2341
@@ -8,9 +8,10 @@ rquote/api/lists.py,sha256=fRebS02Fi0qe6KpWBA-9W1UG0It6__DmRlNimtMa7L8,5331
8
8
  rquote/api/price.py,sha256=I5lZl6cUQRlE4AtzNbR-uGZt1ho9vgP1cgNFDjaigMA,3575
9
9
  rquote/api/stock_info.py,sha256=912ICdIBr8z2lKWDbq3gG0E94czTPvbx9aXsKUi-QkE,1537
10
10
  rquote/api/tick.py,sha256=nEcjuAjtBHUaD8KPRLg643piVa21PhKDQvkVWNwvvME,1431
11
- rquote/cache/__init__.py,sha256=IXGSRpvSgBlcM6twLuJEOEockbb09_VqORXdQpfwpCA,138
11
+ rquote/cache/__init__.py,sha256=S393I5Wmp0QooaRka9n7bvDUdEbg3jUhm6u815T86rM,317
12
12
  rquote/cache/base.py,sha256=orzG4Yo-6gzVG027j1-LTZPT718JohnCdLDnOLoLUQ4,515
13
13
  rquote/cache/memory.py,sha256=7z4keb3q91pzI4ASQWy1MU8T5nbWLCEUjJcStv_3hvk,1933
14
+ rquote/cache/persistent.py,sha256=_ASNobFDYvZ51XIux3NZN4M24Z-rs6gOcWKGRpbd6tg,15741
14
15
  rquote/data_sources/__init__.py,sha256=WCe1aam4677jM5G6wP4a-dQFTeBzcU5PJqsKieAVMBo,215
15
16
  rquote/data_sources/base.py,sha256=JuKsTMxH7y8yRxHg3JbLzQwXPr43rS4pnwc5625u2U4,443
16
17
  rquote/data_sources/sina.py,sha256=T_3Dl0Mwlhx8CKRJll_UKobYecRWltGaIOiGkpHS43Q,3300
@@ -18,12 +19,12 @@ rquote/data_sources/tencent.py,sha256=ayt1O85pheLwzX3z5c6Qij1NrmUywcsz6YcSVzdDoM
18
19
  rquote/factors/__init__.py,sha256=_ZbH2XxYtXwCJpvRVdNvGncoPSpMqrtlYmf1_fMGIjM,116
19
20
  rquote/factors/technical.py,sha256=dPDs3pDEDRV9iQJBrSoKpGFLQMjOqyoBdN2rUntpOUU,4235
20
21
  rquote/markets/__init__.py,sha256=k4F8cZgb-phqemMqhZXFPdOKsR4P--DD3d5i21vKhbg,365
21
- rquote/markets/base.py,sha256=DjvxRcJqwUsBTxnsE28Gd-zJLFsCGwdQpezLRAZ_9sQ,1347
22
- rquote/markets/cn_stock.py,sha256=fyF7jJHFUrI5jwuqBKHXpsIE51H4kbyc3q-uuviPLGk,8224
22
+ rquote/markets/base.py,sha256=EYd9sDK0tfqtwXTXY9E6RtniWp0JNZ3REYGQ0b1Q7xU,8151
23
+ rquote/markets/cn_stock.py,sha256=nu2ebTE4a6FAJkvpMN0FEPuqwom_hqTRjnUg96cQGKc,8320
23
24
  rquote/markets/factory.py,sha256=4Txpuok0LBOLT_vAiIU-NslwVnYF7sKHCdlacAboxpo,2875
24
- rquote/markets/future.py,sha256=7AqViPp0S9OQZsaU2hkJzh4My6gYFqLo1OUW2mVMSDo,7215
25
- rquote/markets/hk_stock.py,sha256=NlWaXQgXttpcQVFZjflcEkMTmXMxeP2C6Y7OGG50u7E,1452
26
- rquote/markets/us_stock.py,sha256=17mTg50g3ImOnGM4Re1MRSyvbD2mgFW6wjtMh86IEXA,2465
25
+ rquote/markets/future.py,sha256=yGMyUu9Fv75jbzPbvW6_36otEeebSij7vnzow_zyEn8,7358
26
+ rquote/markets/hk_stock.py,sha256=AhRJpWp027ACew9ogxkVCJXbqbYQ1AkbFwDJccXbvAs,1183
27
+ rquote/markets/us_stock.py,sha256=GT4IxCMTgxb0JSkDa6acZ6PpHLhK6lrskI0ftiYxGCA,2603
27
28
  rquote/parsers/__init__.py,sha256=q4g-FgpzxKBPfhJiQH3B5MEeZWUIXlyre-vAnOnfYmA,110
28
29
  rquote/parsers/kline.py,sha256=g6k8W76-4hpYsuBgvwmb5G6ZkzHOJDX-JrVVXYksw4c,4020
29
30
  rquote/utils/__init__.py,sha256=-ZHABqFHQeJrCCsgnqEYWR57jl7GduCKn2V3hpFi-pE,348
@@ -32,7 +33,7 @@ rquote/utils/helpers.py,sha256=V07n9BtRS8bEJH023Kca78-unk7iD3B9hn2UjELetYs,354
32
33
  rquote/utils/http.py,sha256=X0Alhnu0CNqyQeOt6ivUWmh2XwrWxXd2lSpQOKDdnzw,3249
33
34
  rquote/utils/logging.py,sha256=cbeRH4ODazn7iyQmGoEBT2lH5LX4Ca3zDfs_20J1T28,566
34
35
  rquote/utils/web.py,sha256=I8_pcThW6VUvahuRHdtp32iZwr85hEt1hB6TgznMy_U,3854
35
- rquote-0.3.9.dist-info/METADATA,sha256=gFYIx3AMMzbJgDx9BlXn-Xn3T9PgkTuYkQTXthyjeIA,11262
36
- rquote-0.3.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- rquote-0.3.9.dist-info/top_level.txt,sha256=CehAiaZx7Fo8HGoV2zd5GhILUW1jQEN8YS-cWMlrK9Y,7
38
- rquote-0.3.9.dist-info/RECORD,,
36
+ rquote-0.4.0.dist-info/METADATA,sha256=vdD7qOgz0sX7iiQJ-jHpWNKuFCcMzk4j4cYFuxB-6iw,13213
37
+ rquote-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
+ rquote-0.4.0.dist-info/top_level.txt,sha256=CehAiaZx7Fo8HGoV2zd5GhILUW1jQEN8YS-cWMlrK9Y,7
39
+ rquote-0.4.0.dist-info/RECORD,,
File without changes