rquote 0.3.9__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rquote-0.3.9/rquote.egg-info → rquote-0.4.0}/PKG-INFO +57 -5
- rquote-0.3.9/PKG-INFO → rquote-0.4.0/README.md +53 -15
- {rquote-0.3.9 → rquote-0.4.0}/pyproject.toml +5 -2
- {rquote-0.3.9 → rquote-0.4.0}/rquote/__init__.py +6 -0
- rquote-0.4.0/rquote/cache/__init__.py +14 -0
- rquote-0.4.0/rquote/cache/persistent.py +421 -0
- rquote-0.4.0/rquote/markets/base.py +196 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/cn_stock.py +9 -12
- {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/future.py +8 -9
- {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/hk_stock.py +4 -13
- {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/us_stock.py +8 -9
- rquote-0.3.9/README.md → rquote-0.4.0/rquote.egg-info/PKG-INFO +67 -3
- {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/SOURCES.txt +1 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/requires.txt +3 -0
- rquote-0.4.0/tests/test_cache.py +357 -0
- rquote-0.3.9/rquote/cache/__init__.py +0 -9
- rquote-0.3.9/rquote/markets/base.py +0 -49
- rquote-0.3.9/tests/test_cache.py +0 -71
- {rquote-0.3.9 → rquote-0.4.0}/rquote/api/__init__.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/api/lists.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/api/price.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/api/stock_info.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/api/tick.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/cache/base.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/cache/memory.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/config.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/__init__.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/base.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/sina.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/data_sources/tencent.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/exceptions.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/factors/__init__.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/factors/technical.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/__init__.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/markets/factory.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/parsers/__init__.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/parsers/kline.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/plots.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/__init__.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/date.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/helpers.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/http.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/logging.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils/web.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote/utils.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/dependency_links.txt +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/rquote.egg-info/top_level.txt +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/setup.cfg +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/tests/test_api.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/tests/test_config.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/tests/test_exceptions.py +0 -0
- {rquote-0.3.9 → rquote-0.4.0}/tests/test_utils.py +0 -0
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rquote
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch
|
|
5
|
-
Requires-Python: >=3.
|
|
5
|
+
Requires-Python: >=3.9.0
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: build>=0.9.0
|
|
8
8
|
Requires-Dist: httpx>=0.20.0
|
|
9
9
|
Requires-Dist: pandas>=1.0.0
|
|
10
10
|
Requires-Dist: setuptools>=42
|
|
11
11
|
Requires-Dist: twine>=3.8.0
|
|
12
|
+
Provides-Extra: persistent
|
|
13
|
+
Requires-Dist: duckdb>=0.9.0; extra == "persistent"
|
|
12
14
|
|
|
13
15
|
# rquote
|
|
14
16
|
|
|
@@ -60,17 +62,67 @@ sid, name, df = get_price('sz000001', sdate='2024-01-01', edate='2024-02-01')
|
|
|
60
62
|
|
|
61
63
|
### 使用缓存
|
|
62
64
|
|
|
65
|
+
#### 内存缓存(MemoryCache)
|
|
66
|
+
|
|
63
67
|
```python
|
|
64
68
|
from rquote import get_price, MemoryCache
|
|
65
69
|
|
|
66
70
|
# 创建缓存实例
|
|
67
71
|
cache = MemoryCache(ttl=3600) # 缓存1小时
|
|
68
72
|
|
|
69
|
-
# 使用缓存(通过dd
|
|
70
|
-
|
|
71
|
-
|
|
73
|
+
# 使用缓存(通过dd参数传递MemoryCache实例)
|
|
74
|
+
sid, name, df = get_price('sh000001', dd=cache)
|
|
75
|
+
|
|
76
|
+
# 注意:MemoryCache 是内存缓存,数据仅在当前进程运行期间有效
|
|
77
|
+
# 脚本运行结束后,缓存数据会丢失
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**缓存生命周期说明:**
|
|
81
|
+
- `MemoryCache` 是纯内存缓存,数据存储在进程内存中
|
|
82
|
+
- 缓存数据仅在当前脚本运行期间有效
|
|
83
|
+
- 脚本运行结束后,所有缓存数据会丢失
|
|
84
|
+
|
|
85
|
+
#### 持久化缓存(PersistentCache)
|
|
86
|
+
|
|
87
|
+
持久化缓存支持跨进程/跨运行的缓存持久化,数据会保存到本地文件。
|
|
88
|
+
|
|
89
|
+
**安装可选依赖:**
|
|
90
|
+
```bash
|
|
91
|
+
pip install rquote[persistent]
|
|
92
|
+
# 或
|
|
93
|
+
uv pip install "rquote[persistent]"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**使用持久化缓存:**
|
|
97
|
+
```python
|
|
98
|
+
from rquote import get_price, PersistentCache
|
|
99
|
+
|
|
100
|
+
# 创建持久化缓存实例
|
|
101
|
+
# 默认使用 duckdb(如果已安装),否则使用 pickle 文件
|
|
102
|
+
cache = PersistentCache(ttl=86400) # 缓存24小时,默认路径:~/.rquote/cache.db
|
|
103
|
+
|
|
104
|
+
# 或指定自定义路径
|
|
105
|
+
cache = PersistentCache(db_path='./my_cache.db', use_duckdb=True)
|
|
106
|
+
|
|
107
|
+
# 使用缓存
|
|
108
|
+
sid, name, df = get_price('sh000001', dd=cache)
|
|
109
|
+
|
|
110
|
+
# 持久化缓存支持智能扩展:
|
|
111
|
+
# - 当请求的结束日期不在缓存中时,会自动从缓存的最新日期向前扩展
|
|
112
|
+
# - 当请求的开始日期不在缓存中时,会自动从缓存的最早日期向后扩展
|
|
113
|
+
# - 数据会自动合并,避免重复请求
|
|
114
|
+
|
|
115
|
+
# 关闭缓存(可选,程序退出时会自动保存)
|
|
116
|
+
cache.close()
|
|
72
117
|
```
|
|
73
118
|
|
|
119
|
+
**持久化缓存特性:**
|
|
120
|
+
- ✅ 跨进程/跨运行持久化:数据保存在本地文件,下次运行仍可使用
|
|
121
|
+
- ✅ 智能数据合并:相同股票的数据会自动合并,key 不包含日期范围
|
|
122
|
+
- ✅ 智能扩展:当请求的日期范围超出缓存时,自动扩展并合并数据
|
|
123
|
+
- ✅ 支持 TTL:可设置缓存过期时间
|
|
124
|
+
- ✅ 可选 duckdb:如果安装了 duckdb,使用 duckdb 存储(性能更好),否则使用 pickle 文件
|
|
125
|
+
|
|
74
126
|
## 主要功能
|
|
75
127
|
|
|
76
128
|
### 历史价格数据获取
|
|
@@ -1,15 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: rquote
|
|
3
|
-
Version: 0.3.9
|
|
4
|
-
Summary: Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch
|
|
5
|
-
Requires-Python: >=3.6.1
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist: build>=0.9.0
|
|
8
|
-
Requires-Dist: httpx>=0.20.0
|
|
9
|
-
Requires-Dist: pandas>=1.0.0
|
|
10
|
-
Requires-Dist: setuptools>=42
|
|
11
|
-
Requires-Dist: twine>=3.8.0
|
|
12
|
-
|
|
13
1
|
# rquote
|
|
14
2
|
|
|
15
3
|
`rquote` 是一个提供 A股/港股/美股/ETF基金/期货 历史数据获取的Python库
|
|
@@ -60,17 +48,67 @@ sid, name, df = get_price('sz000001', sdate='2024-01-01', edate='2024-02-01')
|
|
|
60
48
|
|
|
61
49
|
### 使用缓存
|
|
62
50
|
|
|
51
|
+
#### 内存缓存(MemoryCache)
|
|
52
|
+
|
|
63
53
|
```python
|
|
64
54
|
from rquote import get_price, MemoryCache
|
|
65
55
|
|
|
66
56
|
# 创建缓存实例
|
|
67
57
|
cache = MemoryCache(ttl=3600) # 缓存1小时
|
|
68
58
|
|
|
69
|
-
# 使用缓存(通过dd
|
|
70
|
-
|
|
71
|
-
|
|
59
|
+
# 使用缓存(通过dd参数传递MemoryCache实例)
|
|
60
|
+
sid, name, df = get_price('sh000001', dd=cache)
|
|
61
|
+
|
|
62
|
+
# 注意:MemoryCache 是内存缓存,数据仅在当前进程运行期间有效
|
|
63
|
+
# 脚本运行结束后,缓存数据会丢失
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**缓存生命周期说明:**
|
|
67
|
+
- `MemoryCache` 是纯内存缓存,数据存储在进程内存中
|
|
68
|
+
- 缓存数据仅在当前脚本运行期间有效
|
|
69
|
+
- 脚本运行结束后,所有缓存数据会丢失
|
|
70
|
+
|
|
71
|
+
#### 持久化缓存(PersistentCache)
|
|
72
|
+
|
|
73
|
+
持久化缓存支持跨进程/跨运行的缓存持久化,数据会保存到本地文件。
|
|
74
|
+
|
|
75
|
+
**安装可选依赖:**
|
|
76
|
+
```bash
|
|
77
|
+
pip install rquote[persistent]
|
|
78
|
+
# 或
|
|
79
|
+
uv pip install "rquote[persistent]"
|
|
72
80
|
```
|
|
73
81
|
|
|
82
|
+
**使用持久化缓存:**
|
|
83
|
+
```python
|
|
84
|
+
from rquote import get_price, PersistentCache
|
|
85
|
+
|
|
86
|
+
# 创建持久化缓存实例
|
|
87
|
+
# 默认使用 duckdb(如果已安装),否则使用 pickle 文件
|
|
88
|
+
cache = PersistentCache(ttl=86400) # 缓存24小时,默认路径:~/.rquote/cache.db
|
|
89
|
+
|
|
90
|
+
# 或指定自定义路径
|
|
91
|
+
cache = PersistentCache(db_path='./my_cache.db', use_duckdb=True)
|
|
92
|
+
|
|
93
|
+
# 使用缓存
|
|
94
|
+
sid, name, df = get_price('sh000001', dd=cache)
|
|
95
|
+
|
|
96
|
+
# 持久化缓存支持智能扩展:
|
|
97
|
+
# - 当请求的结束日期不在缓存中时,会自动从缓存的最新日期向前扩展
|
|
98
|
+
# - 当请求的开始日期不在缓存中时,会自动从缓存的最早日期向后扩展
|
|
99
|
+
# - 数据会自动合并,避免重复请求
|
|
100
|
+
|
|
101
|
+
# 关闭缓存(可选,程序退出时会自动保存)
|
|
102
|
+
cache.close()
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**持久化缓存特性:**
|
|
106
|
+
- ✅ 跨进程/跨运行持久化:数据保存在本地文件,下次运行仍可使用
|
|
107
|
+
- ✅ 智能数据合并:相同股票的数据会自动合并,key 不包含日期范围
|
|
108
|
+
- ✅ 智能扩展:当请求的日期范围超出缓存时,自动扩展并合并数据
|
|
109
|
+
- ✅ 支持 TTL:可设置缓存过期时间
|
|
110
|
+
- ✅ 可选 duckdb:如果安装了 duckdb,使用 duckdb 存储(性能更好),否则使用 pickle 文件
|
|
111
|
+
|
|
74
112
|
## 主要功能
|
|
75
113
|
|
|
76
114
|
### 历史价格数据获取
|
|
@@ -4,10 +4,11 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "rquote"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Mostly day quotes of cn/hk/us/fund/future markets, side with quote list fetch"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.6.1"
|
|
10
|
+
# requires-python = ">=3.6.1" # duckdb requires higher python version
|
|
11
|
+
requires-python = ">=3.9.0"
|
|
11
12
|
dependencies = [
|
|
12
13
|
"build>=0.9.0",
|
|
13
14
|
"httpx>=0.20.0",
|
|
@@ -15,3 +16,5 @@ dependencies = [
|
|
|
15
16
|
"setuptools>=42",
|
|
16
17
|
"twine>=3.8.0",
|
|
17
18
|
]
|
|
19
|
+
[project.optional-dependencies]
|
|
20
|
+
persistent = ["duckdb>=0.9.0"]
|
|
@@ -35,6 +35,11 @@ from .plots import PlotUtils
|
|
|
35
35
|
from . import config
|
|
36
36
|
from . import exceptions
|
|
37
37
|
from .cache import MemoryCache, Cache
|
|
38
|
+
# 尝试导入持久化缓存(可选依赖)
|
|
39
|
+
try:
|
|
40
|
+
from .cache import PersistentCache
|
|
41
|
+
except ImportError:
|
|
42
|
+
PersistentCache = None
|
|
38
43
|
from .utils.http import HTTPClient
|
|
39
44
|
|
|
40
45
|
|
|
@@ -93,5 +98,6 @@ __all__ = [
|
|
|
93
98
|
'exceptions',
|
|
94
99
|
'MemoryCache',
|
|
95
100
|
'Cache',
|
|
101
|
+
'PersistentCache',
|
|
96
102
|
'HTTPClient',
|
|
97
103
|
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
缓存模块
|
|
4
|
+
"""
|
|
5
|
+
from .base import Cache
|
|
6
|
+
from .memory import MemoryCache
|
|
7
|
+
|
|
8
|
+
# 尝试导入持久化缓存(可选依赖)
|
|
9
|
+
try:
|
|
10
|
+
from .persistent import PersistentCache
|
|
11
|
+
__all__ = ['Cache', 'MemoryCache', 'PersistentCache']
|
|
12
|
+
except ImportError:
|
|
13
|
+
__all__ = ['Cache', 'MemoryCache']
|
|
14
|
+
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
持久化缓存实现
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, Any, Tuple
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from .base import Cache
|
|
11
|
+
|
|
12
|
+
# 尝试导入 duckdb(可选依赖)
|
|
13
|
+
try:
|
|
14
|
+
import duckdb
|
|
15
|
+
DUCKDB_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
DUCKDB_AVAILABLE = False
|
|
18
|
+
duckdb = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PersistentCache(Cache):
|
|
22
|
+
"""持久化缓存实现,使用 duckdb 或文件系统存储数据"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db_path: Optional[str] = None, use_duckdb: bool = True, ttl: Optional[int] = None):
|
|
25
|
+
"""
|
|
26
|
+
初始化持久化缓存
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db_path: 数据库文件路径,默认为 ~/.rquote/cache.db
|
|
30
|
+
use_duckdb: 是否使用 duckdb(如果可用),否则使用 pickle 文件
|
|
31
|
+
ttl: 默认过期时间(秒),None 表示不过期
|
|
32
|
+
"""
|
|
33
|
+
self.use_duckdb = use_duckdb and DUCKDB_AVAILABLE
|
|
34
|
+
self.ttl = ttl
|
|
35
|
+
|
|
36
|
+
if db_path is None:
|
|
37
|
+
# 默认路径:~/.rquote/cache.db 或 ~/.rquote/cache.pkl
|
|
38
|
+
home = Path.home()
|
|
39
|
+
cache_dir = home / '.rquote'
|
|
40
|
+
cache_dir.mkdir(exist_ok=True)
|
|
41
|
+
if self.use_duckdb:
|
|
42
|
+
db_path = str(cache_dir / 'cache.db')
|
|
43
|
+
else:
|
|
44
|
+
db_path = str(cache_dir / 'cache.pkl')
|
|
45
|
+
|
|
46
|
+
self.db_path = db_path
|
|
47
|
+
|
|
48
|
+
if self.use_duckdb:
|
|
49
|
+
self._init_duckdb()
|
|
50
|
+
else:
|
|
51
|
+
self._init_pickle()
|
|
52
|
+
|
|
53
|
+
def _init_duckdb(self):
|
|
54
|
+
"""初始化 duckdb 数据库"""
|
|
55
|
+
self.conn = duckdb.connect(self.db_path)
|
|
56
|
+
# 创建缓存表
|
|
57
|
+
self.conn.execute("""
|
|
58
|
+
CREATE TABLE IF NOT EXISTS cache_data (
|
|
59
|
+
cache_key TEXT PRIMARY KEY,
|
|
60
|
+
symbol TEXT NOT NULL,
|
|
61
|
+
name TEXT,
|
|
62
|
+
data BLOB,
|
|
63
|
+
earliest_date TEXT,
|
|
64
|
+
latest_date TEXT,
|
|
65
|
+
freq TEXT,
|
|
66
|
+
fq TEXT,
|
|
67
|
+
updated_at TIMESTAMP,
|
|
68
|
+
expire_at TIMESTAMP
|
|
69
|
+
)
|
|
70
|
+
""")
|
|
71
|
+
self.conn.execute("""
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_symbol_freq_fq
|
|
73
|
+
ON cache_data(symbol, freq, fq)
|
|
74
|
+
""")
|
|
75
|
+
|
|
76
|
+
def _init_pickle(self):
|
|
77
|
+
"""初始化 pickle 存储"""
|
|
78
|
+
import pickle
|
|
79
|
+
self.pickle = pickle
|
|
80
|
+
if os.path.exists(self.db_path):
|
|
81
|
+
try:
|
|
82
|
+
with open(self.db_path, 'rb') as f:
|
|
83
|
+
self._cache_data = self.pickle.load(f)
|
|
84
|
+
except:
|
|
85
|
+
self._cache_data = {}
|
|
86
|
+
else:
|
|
87
|
+
self._cache_data = {}
|
|
88
|
+
|
|
89
|
+
def _save_pickle(self):
|
|
90
|
+
"""保存 pickle 数据"""
|
|
91
|
+
import pickle
|
|
92
|
+
with open(self.db_path, 'wb') as f:
|
|
93
|
+
self.pickle.dump(self._cache_data, f)
|
|
94
|
+
|
|
95
|
+
def _extract_key_parts(self, key: str) -> Tuple[str, str, str, str, str]:
|
|
96
|
+
"""
|
|
97
|
+
从完整 key 中提取各部分
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
key: 完整 key,格式如 "symbol:sdate:edate:freq:days:fq"
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
(symbol, sdate, edate, freq, fq)
|
|
104
|
+
"""
|
|
105
|
+
parts = key.split(':')
|
|
106
|
+
if len(parts) >= 6:
|
|
107
|
+
return parts[0], parts[1], parts[2], parts[3], parts[5]
|
|
108
|
+
elif len(parts) >= 4:
|
|
109
|
+
return parts[0], parts[1] if len(parts) > 1 else '', parts[2] if len(parts) > 2 else '', parts[3], parts[4] if len(parts) > 4 else 'qfq'
|
|
110
|
+
else:
|
|
111
|
+
return parts[0] if parts else '', '', '', 'day', 'qfq'
|
|
112
|
+
|
|
113
|
+
def _get_base_key(self, symbol: str, freq: str, fq: str) -> str:
|
|
114
|
+
"""生成基础 key(不包含日期)"""
|
|
115
|
+
return f"{symbol}:{freq}:{fq}"
|
|
116
|
+
|
|
117
|
+
def _parse_date(self, date_str: str) -> Optional[pd.Timestamp]:
|
|
118
|
+
"""解析日期字符串"""
|
|
119
|
+
if not date_str:
|
|
120
|
+
return None
|
|
121
|
+
try:
|
|
122
|
+
return pd.to_datetime(date_str)
|
|
123
|
+
except:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
def _get_dataframe_date_range(self, df: pd.DataFrame) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
|
|
127
|
+
"""获取 DataFrame 的日期范围"""
|
|
128
|
+
if df.empty or not isinstance(df.index, pd.DatetimeIndex):
|
|
129
|
+
return None, None
|
|
130
|
+
return df.index.min(), df.index.max()
|
|
131
|
+
|
|
132
|
+
def _filter_dataframe_by_date(self, df: pd.DataFrame, sdate: Optional[str] = None,
|
|
133
|
+
edate: Optional[str] = None) -> pd.DataFrame:
|
|
134
|
+
"""根据日期范围过滤 DataFrame"""
|
|
135
|
+
if df.empty:
|
|
136
|
+
return df
|
|
137
|
+
|
|
138
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
139
|
+
return df
|
|
140
|
+
|
|
141
|
+
start_date = self._parse_date(sdate) if sdate else None
|
|
142
|
+
end_date = self._parse_date(edate) if edate else None
|
|
143
|
+
|
|
144
|
+
if start_date is not None and end_date is not None:
|
|
145
|
+
mask = (df.index >= start_date) & (df.index <= end_date)
|
|
146
|
+
return df[mask]
|
|
147
|
+
elif start_date is not None:
|
|
148
|
+
return df[df.index >= start_date]
|
|
149
|
+
elif end_date is not None:
|
|
150
|
+
return df[df.index <= end_date]
|
|
151
|
+
else:
|
|
152
|
+
return df
|
|
153
|
+
|
|
154
|
+
def _merge_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
|
|
155
|
+
"""合并两个 DataFrame,去重并排序"""
|
|
156
|
+
if df1.empty:
|
|
157
|
+
return df2
|
|
158
|
+
if df2.empty:
|
|
159
|
+
return df1
|
|
160
|
+
|
|
161
|
+
# 合并并去重
|
|
162
|
+
combined = pd.concat([df1, df2])
|
|
163
|
+
combined = combined[~combined.index.duplicated(keep='last')]
|
|
164
|
+
combined = combined.sort_index()
|
|
165
|
+
return combined
|
|
166
|
+
|
|
167
|
+
def get(self, key: str) -> Optional[Any]:
|
|
168
|
+
"""
|
|
169
|
+
获取缓存数据
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
key: 缓存 key,格式如 "symbol:sdate:edate:freq:days:fq"
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
(symbol, name, DataFrame) 或 None
|
|
176
|
+
"""
|
|
177
|
+
symbol, sdate, edate, freq, fq = self._extract_key_parts(key)
|
|
178
|
+
base_key = self._get_base_key(symbol, freq, fq)
|
|
179
|
+
|
|
180
|
+
if self.use_duckdb:
|
|
181
|
+
return self._get_duckdb(base_key, symbol, sdate, edate, freq, fq)
|
|
182
|
+
else:
|
|
183
|
+
return self._get_pickle(base_key, symbol, sdate, edate, freq, fq)
|
|
184
|
+
|
|
185
|
+
def _get_duckdb(self, base_key: str, symbol: str, sdate: str, edate: str,
|
|
186
|
+
freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
|
|
187
|
+
"""从 duckdb 获取数据"""
|
|
188
|
+
result = self.conn.execute("""
|
|
189
|
+
SELECT name, data, earliest_date, latest_date, expire_at
|
|
190
|
+
FROM cache_data
|
|
191
|
+
WHERE cache_key = ?
|
|
192
|
+
""", [base_key]).fetchone()
|
|
193
|
+
|
|
194
|
+
if not result:
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
name, data_blob, earliest_date, latest_date, expire_at = result
|
|
198
|
+
|
|
199
|
+
# 检查过期
|
|
200
|
+
if self.ttl and expire_at:
|
|
201
|
+
expire_ts = pd.to_datetime(expire_at)
|
|
202
|
+
if pd.Timestamp.now() > expire_ts:
|
|
203
|
+
self.delete(base_key)
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
# 反序列化 DataFrame
|
|
207
|
+
import pickle
|
|
208
|
+
df = pickle.loads(data_blob)
|
|
209
|
+
|
|
210
|
+
# 获取缓存数据的日期范围
|
|
211
|
+
cached_earliest = self._parse_date(earliest_date)
|
|
212
|
+
cached_latest = self._parse_date(latest_date)
|
|
213
|
+
|
|
214
|
+
# 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
|
|
215
|
+
request_sdate = self._parse_date(sdate) if sdate else None
|
|
216
|
+
request_edate = self._parse_date(edate) if edate else None
|
|
217
|
+
|
|
218
|
+
# 检查是否有重叠
|
|
219
|
+
if request_edate and cached_earliest and request_edate < cached_earliest:
|
|
220
|
+
# 请求的结束日期早于缓存的最早日期,无重叠
|
|
221
|
+
return None
|
|
222
|
+
if request_sdate and cached_latest and request_sdate > cached_latest:
|
|
223
|
+
# 请求的开始日期晚于缓存的最晚日期,无重叠
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
# 有重叠,返回缓存中可用的部分数据
|
|
227
|
+
# 计算实际可用的日期范围
|
|
228
|
+
actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
|
|
229
|
+
actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
|
|
230
|
+
|
|
231
|
+
# 过滤数据
|
|
232
|
+
filtered_df = self._filter_dataframe_by_date(
|
|
233
|
+
df,
|
|
234
|
+
actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
|
|
235
|
+
actual_edate.strftime('%Y-%m-%d') if actual_edate else None
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if filtered_df.empty:
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
return (symbol, name, filtered_df)
|
|
242
|
+
|
|
243
|
+
def _get_pickle(self, base_key: str, symbol: str, sdate: str, edate: str,
|
|
244
|
+
freq: str, fq: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
|
|
245
|
+
"""从 pickle 文件获取数据"""
|
|
246
|
+
if base_key not in self._cache_data:
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
cache_entry = self._cache_data[base_key]
|
|
250
|
+
|
|
251
|
+
# 检查过期
|
|
252
|
+
if self.ttl and 'expire_at' in cache_entry:
|
|
253
|
+
expire_ts = cache_entry['expire_at']
|
|
254
|
+
if pd.Timestamp.now() > expire_ts:
|
|
255
|
+
del self._cache_data[base_key]
|
|
256
|
+
self._save_pickle()
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
df = cache_entry['data']
|
|
260
|
+
name = cache_entry.get('name', '')
|
|
261
|
+
earliest_date = cache_entry.get('earliest_date')
|
|
262
|
+
latest_date = cache_entry.get('latest_date')
|
|
263
|
+
|
|
264
|
+
# 获取缓存数据的日期范围
|
|
265
|
+
cached_earliest = self._parse_date(earliest_date)
|
|
266
|
+
cached_latest = self._parse_date(latest_date)
|
|
267
|
+
|
|
268
|
+
# 如果请求的日期范围完全在缓存范围内,直接返回过滤后的数据
|
|
269
|
+
request_sdate = self._parse_date(sdate) if sdate else None
|
|
270
|
+
request_edate = self._parse_date(edate) if edate else None
|
|
271
|
+
|
|
272
|
+
# 检查是否有重叠
|
|
273
|
+
if request_edate and cached_earliest and request_edate < cached_earliest:
|
|
274
|
+
# 请求的结束日期早于缓存的最早日期,无重叠
|
|
275
|
+
return None
|
|
276
|
+
if request_sdate and cached_latest and request_sdate > cached_latest:
|
|
277
|
+
# 请求的开始日期晚于缓存的最晚日期,无重叠
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
# 有重叠,返回缓存中可用的部分数据
|
|
281
|
+
# 计算实际可用的日期范围
|
|
282
|
+
actual_sdate = max(request_sdate, cached_earliest) if request_sdate and cached_earliest else (request_sdate or cached_earliest)
|
|
283
|
+
actual_edate = min(request_edate, cached_latest) if request_edate and cached_latest else (request_edate or cached_latest)
|
|
284
|
+
|
|
285
|
+
# 过滤数据
|
|
286
|
+
filtered_df = self._filter_dataframe_by_date(
|
|
287
|
+
df,
|
|
288
|
+
actual_sdate.strftime('%Y-%m-%d') if actual_sdate else None,
|
|
289
|
+
actual_edate.strftime('%Y-%m-%d') if actual_edate else None
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if filtered_df.empty:
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
return (symbol, name, filtered_df)
|
|
296
|
+
|
|
297
|
+
def put(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
|
298
|
+
"""
|
|
299
|
+
存储缓存数据
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
key: 缓存 key
|
|
303
|
+
value: (symbol, name, DataFrame) 元组
|
|
304
|
+
ttl: 过期时间(秒)
|
|
305
|
+
"""
|
|
306
|
+
if not isinstance(value, tuple) or len(value) != 3:
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
symbol, name, df = value
|
|
310
|
+
if not isinstance(df, pd.DataFrame) or df.empty:
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
_, _, _, freq, fq = self._extract_key_parts(key)
|
|
314
|
+
base_key = self._get_base_key(symbol, freq, fq)
|
|
315
|
+
|
|
316
|
+
# 尝试从基础 key 获取完整数据并合并
|
|
317
|
+
existing = self._get_raw(base_key)
|
|
318
|
+
if existing:
|
|
319
|
+
_, existing_name, existing_df = existing
|
|
320
|
+
# 使用新数据的 name(如果有)
|
|
321
|
+
if not name:
|
|
322
|
+
name = existing_name
|
|
323
|
+
# 合并数据
|
|
324
|
+
df = self._merge_dataframes(existing_df, df)
|
|
325
|
+
|
|
326
|
+
# 获取日期范围
|
|
327
|
+
earliest_date, latest_date = self._get_dataframe_date_range(df)
|
|
328
|
+
earliest_str = earliest_date.strftime('%Y-%m-%d') if earliest_date else None
|
|
329
|
+
latest_str = latest_date.strftime('%Y-%m-%d') if latest_date else None
|
|
330
|
+
|
|
331
|
+
# 计算过期时间
|
|
332
|
+
expire_at = None
|
|
333
|
+
if ttl or self.ttl:
|
|
334
|
+
expire_seconds = (ttl or self.ttl)
|
|
335
|
+
expire_at = pd.Timestamp.now() + pd.Timedelta(seconds=expire_seconds)
|
|
336
|
+
|
|
337
|
+
if self.use_duckdb:
|
|
338
|
+
self._put_duckdb(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
|
|
339
|
+
else:
|
|
340
|
+
self._put_pickle(base_key, symbol, name, df, earliest_str, latest_str, freq, fq, expire_at)
|
|
341
|
+
|
|
342
|
+
def _get_raw(self, base_key: str) -> Optional[Tuple[str, str, pd.DataFrame]]:
|
|
343
|
+
"""获取原始数据(不进行日期过滤)"""
|
|
344
|
+
if self.use_duckdb:
|
|
345
|
+
result = self.conn.execute("""
|
|
346
|
+
SELECT name, data
|
|
347
|
+
FROM cache_data
|
|
348
|
+
WHERE cache_key = ?
|
|
349
|
+
""", [base_key]).fetchone()
|
|
350
|
+
|
|
351
|
+
if not result:
|
|
352
|
+
return None
|
|
353
|
+
|
|
354
|
+
import pickle
|
|
355
|
+
df = pickle.loads(result[1])
|
|
356
|
+
return (base_key.split(':')[0], result[0], df)
|
|
357
|
+
else:
|
|
358
|
+
if base_key not in self._cache_data:
|
|
359
|
+
return None
|
|
360
|
+
cache_entry = self._cache_data[base_key]
|
|
361
|
+
return (base_key.split(':')[0], cache_entry.get('name', ''), cache_entry['data'])
|
|
362
|
+
|
|
363
|
+
def _put_duckdb(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
|
|
364
|
+
earliest_date: Optional[str], latest_date: Optional[str],
|
|
365
|
+
freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
|
|
366
|
+
"""存储到 duckdb"""
|
|
367
|
+
import pickle
|
|
368
|
+
data_blob = pickle.dumps(df)
|
|
369
|
+
|
|
370
|
+
self.conn.execute("""
|
|
371
|
+
INSERT OR REPLACE INTO cache_data
|
|
372
|
+
(cache_key, symbol, name, data, earliest_date, latest_date, freq, fq, updated_at, expire_at)
|
|
373
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
374
|
+
""", [base_key, symbol, name, data_blob, earliest_date, latest_date, freq, fq,
|
|
375
|
+
pd.Timestamp.now(), expire_at])
|
|
376
|
+
self.conn.commit()
|
|
377
|
+
|
|
378
|
+
def _put_pickle(self, base_key: str, symbol: str, name: str, df: pd.DataFrame,
|
|
379
|
+
earliest_date: Optional[str], latest_date: Optional[str],
|
|
380
|
+
freq: str, fq: str, expire_at: Optional[pd.Timestamp]):
|
|
381
|
+
"""存储到 pickle 文件"""
|
|
382
|
+
self._cache_data[base_key] = {
|
|
383
|
+
'symbol': symbol,
|
|
384
|
+
'name': name,
|
|
385
|
+
'data': df,
|
|
386
|
+
'earliest_date': earliest_date,
|
|
387
|
+
'latest_date': latest_date,
|
|
388
|
+
'freq': freq,
|
|
389
|
+
'fq': fq,
|
|
390
|
+
'updated_at': pd.Timestamp.now(),
|
|
391
|
+
'expire_at': expire_at
|
|
392
|
+
}
|
|
393
|
+
self._save_pickle()
|
|
394
|
+
|
|
395
|
+
def delete(self, key: str) -> None:
|
|
396
|
+
"""删除缓存"""
|
|
397
|
+
symbol, _, _, freq, fq = self._extract_key_parts(key)
|
|
398
|
+
base_key = self._get_base_key(symbol, freq, fq)
|
|
399
|
+
|
|
400
|
+
if self.use_duckdb:
|
|
401
|
+
self.conn.execute("DELETE FROM cache_data WHERE cache_key = ?", [base_key])
|
|
402
|
+
self.conn.commit()
|
|
403
|
+
else:
|
|
404
|
+
if base_key in self._cache_data:
|
|
405
|
+
del self._cache_data[base_key]
|
|
406
|
+
self._save_pickle()
|
|
407
|
+
|
|
408
|
+
def clear(self) -> None:
|
|
409
|
+
"""清空所有缓存"""
|
|
410
|
+
if self.use_duckdb:
|
|
411
|
+
self.conn.execute("DELETE FROM cache_data")
|
|
412
|
+
self.conn.commit()
|
|
413
|
+
else:
|
|
414
|
+
self._cache_data.clear()
|
|
415
|
+
self._save_pickle()
|
|
416
|
+
|
|
417
|
+
def close(self):
|
|
418
|
+
"""关闭连接"""
|
|
419
|
+
if self.use_duckdb:
|
|
420
|
+
self.conn.close()
|
|
421
|
+
|