deeptrade-quant 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deeptrade/__init__.py +8 -0
- deeptrade/channels_builtin/__init__.py +0 -0
- deeptrade/channels_builtin/stdout/__init__.py +0 -0
- deeptrade/channels_builtin/stdout/deeptrade_plugin.yaml +25 -0
- deeptrade/channels_builtin/stdout/migrations/20260429_001_init.sql +13 -0
- deeptrade/channels_builtin/stdout/stdout_channel/__init__.py +0 -0
- deeptrade/channels_builtin/stdout/stdout_channel/channel.py +180 -0
- deeptrade/cli.py +214 -0
- deeptrade/cli_config.py +396 -0
- deeptrade/cli_data.py +33 -0
- deeptrade/cli_plugin.py +176 -0
- deeptrade/core/__init__.py +8 -0
- deeptrade/core/config.py +344 -0
- deeptrade/core/config_migrations.py +138 -0
- deeptrade/core/db.py +176 -0
- deeptrade/core/llm_client.py +591 -0
- deeptrade/core/llm_manager.py +174 -0
- deeptrade/core/logging_config.py +61 -0
- deeptrade/core/migrations/__init__.py +0 -0
- deeptrade/core/migrations/core/20260427_001_init.sql +121 -0
- deeptrade/core/migrations/core/20260501_002_drop_llm_calls_stage.sql +10 -0
- deeptrade/core/migrations/core/__init__.py +0 -0
- deeptrade/core/notifier.py +302 -0
- deeptrade/core/paths.py +49 -0
- deeptrade/core/plugin_manager.py +616 -0
- deeptrade/core/run_status.py +29 -0
- deeptrade/core/secrets.py +152 -0
- deeptrade/core/tushare_client.py +824 -0
- deeptrade/plugins_api/__init__.py +44 -0
- deeptrade/plugins_api/base.py +66 -0
- deeptrade/plugins_api/channel.py +42 -0
- deeptrade/plugins_api/events.py +61 -0
- deeptrade/plugins_api/llm.py +46 -0
- deeptrade/plugins_api/metadata.py +84 -0
- deeptrade/plugins_api/notify.py +67 -0
- deeptrade/strategies_builtin/__init__.py +0 -0
- deeptrade/strategies_builtin/limit_up_board/__init__.py +0 -0
- deeptrade/strategies_builtin/limit_up_board/deeptrade_plugin.yaml +101 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/__init__.py +0 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/calendar.py +65 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/cli.py +269 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/config.py +76 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/data.py +1191 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/pipeline.py +869 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/plugin.py +30 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/profiles.py +85 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/prompts.py +485 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/render.py +890 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/runner.py +1087 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/runtime.py +172 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/schemas.py +178 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260430_001_init.sql +150 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260501_002_lub_stage_results_llm_provider.sql +8 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_001_lub_lhb_tables.sql +36 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_002_lub_cyq_perf.sql +18 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_003_lub_lhb_pk_fix.sql +46 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_004_lub_lhb_drop_pk.sql +53 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_005_lub_config.sql +17 -0
- deeptrade/strategies_builtin/volume_anomaly/__init__.py +0 -0
- deeptrade/strategies_builtin/volume_anomaly/deeptrade_plugin.yaml +59 -0
- deeptrade/strategies_builtin/volume_anomaly/migrations/20260430_001_init.sql +94 -0
- deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_001_realized_returns.sql +44 -0
- deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_002_dimension_scores.sql +13 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/__init__.py +0 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/calendar.py +52 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/cli.py +247 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/data.py +2154 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/pipeline.py +327 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/plugin.py +22 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/profiles.py +49 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts.py +187 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts_examples.py +84 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/render.py +906 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runner.py +772 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runtime.py +90 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/schemas.py +97 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/stats.py +174 -0
- deeptrade/theme.py +48 -0
- deeptrade_quant-0.0.2.dist-info/METADATA +166 -0
- deeptrade_quant-0.0.2.dist-info/RECORD +83 -0
- deeptrade_quant-0.0.2.dist-info/WHEEL +4 -0
- deeptrade_quant-0.0.2.dist-info/entry_points.txt +2 -0
- deeptrade_quant-0.0.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,824 @@
|
|
|
1
|
+
"""Tushare client with rate limiting, per-plugin caching, and intraday isolation.
|
|
2
|
+
|
|
3
|
+
Per-plugin scoping (data isolation model): every TushareClient instance is
|
|
4
|
+
bound to a single ``plugin_id``; ``tushare_sync_state``, ``tushare_calls``,
|
|
5
|
+
and ``tushare_cache_blob`` rows are all scoped by ``plugin_id``. Plugins do
|
|
6
|
+
NOT share cached payloads with each other — even if two plugins call the
|
|
7
|
+
same API for the same trade_date, they each maintain their own cache row.
|
|
8
|
+
|
|
9
|
+
The framework reserves the synthetic ``plugin_id == "__framework__"`` for
|
|
10
|
+
its own connectivity tests (``deeptrade config test``).
|
|
11
|
+
|
|
12
|
+
Cache class buckets:
|
|
13
|
+
- static : api_name × '*' ; 7d TTL
|
|
14
|
+
- trade_day_immutable : api_name × trade_date ; never refetch when ok
|
|
15
|
+
- trade_day_mutable : api_name × trade_date ; allow T/T+1 refetch
|
|
16
|
+
- hot_or_anns : api_name × trade_date ; configurable TTL
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import hashlib
|
|
22
|
+
import io
|
|
23
|
+
import json
|
|
24
|
+
import logging
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
from abc import ABC, abstractmethod
|
|
28
|
+
from collections.abc import Callable
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from datetime import datetime, timedelta
|
|
31
|
+
from typing import Any, Literal
|
|
32
|
+
|
|
33
|
+
import pandas as pd
|
|
34
|
+
from tenacity import (
|
|
35
|
+
retry,
|
|
36
|
+
retry_if_exception_type,
|
|
37
|
+
stop_after_attempt,
|
|
38
|
+
wait_exponential,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
from deeptrade.core.db import Database
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# Cache class taxonomy
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
CacheClass = Literal["static", "trade_day_immutable", "trade_day_mutable", "hot_or_anns"]
|
|
51
|
+
|
|
52
|
+
# Per-API cache class assignment (DESIGN §11.1 + §11.2 lists)
|
|
53
|
+
API_CACHE_CLASS: dict[str, CacheClass] = {
|
|
54
|
+
# static
|
|
55
|
+
"stock_basic": "static",
|
|
56
|
+
"trade_cal": "static",
|
|
57
|
+
# trade_day_immutable (settled at end of day, never revised)
|
|
58
|
+
"daily": "trade_day_immutable",
|
|
59
|
+
"limit_list_d": "trade_day_immutable",
|
|
60
|
+
"limit_list_ths": "trade_day_immutable",
|
|
61
|
+
"limit_step": "trade_day_immutable",
|
|
62
|
+
"limit_cpt_list": "trade_day_immutable",
|
|
63
|
+
"stock_st": "trade_day_immutable",
|
|
64
|
+
"top_list": "trade_day_immutable",
|
|
65
|
+
"top_inst": "trade_day_immutable",
|
|
66
|
+
"stk_limit": "trade_day_immutable",
|
|
67
|
+
"stk_auction_o": "trade_day_immutable",
|
|
68
|
+
"suspend_d": "trade_day_immutable",
|
|
69
|
+
"adj_factor": "trade_day_immutable",
|
|
70
|
+
# trade_day_mutable (occasional T+1/T+2 corrections)
|
|
71
|
+
"moneyflow": "trade_day_mutable",
|
|
72
|
+
"moneyflow_ths": "trade_day_mutable",
|
|
73
|
+
"daily_basic": "trade_day_mutable",
|
|
74
|
+
# hot_or_anns (TTL-based)
|
|
75
|
+
"ths_hot": "hot_or_anns",
|
|
76
|
+
"dc_hot": "hot_or_anns",
|
|
77
|
+
"anns_d": "hot_or_anns",
|
|
78
|
+
"news": "hot_or_anns",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
# Per-API "wide" fields list pushed to the transport so the on-disk cache
|
|
82
|
+
# always contains every column downstream code expects. Without this, tushare's
|
|
83
|
+
# default field subset for some APIs (notably stock_basic, which omits
|
|
84
|
+
# market/exchange/list_status) silently corrupts the cache and any READ-time
|
|
85
|
+
# field projection can never recover the missing columns.
|
|
86
|
+
# Add an entry whenever the default subset misses something the strategies need.
|
|
87
|
+
WIDE_FIELDS: dict[str, str] = {
|
|
88
|
+
"stock_basic": (
|
|
89
|
+
"ts_code,symbol,name,area,industry,fullname,enname,market,exchange,"
|
|
90
|
+
"curr_type,list_status,list_date,delist_date,is_hs,act_name,act_ent_type"
|
|
91
|
+
),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# APIs whose intraday data is unstable; --allow-intraday triggers data_completeness='intraday'
|
|
95
|
+
INTRADAY_SENSITIVE_APIS: frozenset[str] = frozenset(
|
|
96
|
+
{
|
|
97
|
+
"limit_list_d",
|
|
98
|
+
"limit_list_ths",
|
|
99
|
+
"limit_step",
|
|
100
|
+
"limit_cpt_list",
|
|
101
|
+
"moneyflow",
|
|
102
|
+
"moneyflow_ths",
|
|
103
|
+
"daily",
|
|
104
|
+
"daily_basic",
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Default TTL for hot_or_anns class
|
|
109
|
+
DEFAULT_HOT_TTL_SECONDS = 6 * 3600
|
|
110
|
+
|
|
111
|
+
# Default TTL for static class
|
|
112
|
+
STATIC_TTL_SECONDS = 7 * 24 * 3600
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
# Errors
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class TushareError(Exception):
|
|
121
|
+
"""Base error from TushareClient."""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class TushareUnauthorizedError(TushareError):
|
|
125
|
+
"""Tushare reports the user lacks permission for this API."""
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class TushareRateLimitError(TushareError):
|
|
129
|
+
"""HTTP 429 / equivalent — caller should slow down."""
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class TushareServerError(TushareError):
|
|
133
|
+
"""5xx / transient transport error — eligible for retry."""
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
# Transport abstraction
|
|
138
|
+
# ---------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class TushareTransport(ABC):
|
|
142
|
+
"""Abstract carrier for Tushare API calls. Production = SDK; tests = fixtures."""
|
|
143
|
+
|
|
144
|
+
@abstractmethod
|
|
145
|
+
def call(self, api_name: str, params: dict[str, Any], fields: str | None) -> pd.DataFrame:
|
|
146
|
+
"""Execute a single API call. Raise the typed error subclass on failure."""
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class TushareSDKTransport(TushareTransport):
|
|
150
|
+
"""Production transport — wraps tushare.pro_api()."""
|
|
151
|
+
|
|
152
|
+
def __init__(self, token: str) -> None:
|
|
153
|
+
import tushare as ts # noqa: PLC0415 — defer import to avoid hard dep at module load
|
|
154
|
+
|
|
155
|
+
self._pro = ts.pro_api(token)
|
|
156
|
+
|
|
157
|
+
def call(self, api_name: str, params: dict[str, Any], fields: str | None) -> pd.DataFrame:
|
|
158
|
+
try:
|
|
159
|
+
method = getattr(self._pro, api_name)
|
|
160
|
+
except AttributeError as e:
|
|
161
|
+
raise TushareError(f"unknown tushare api: {api_name}") from e
|
|
162
|
+
|
|
163
|
+
kwargs = dict(params)
|
|
164
|
+
if fields:
|
|
165
|
+
kwargs["fields"] = fields
|
|
166
|
+
try:
|
|
167
|
+
df = method(**kwargs)
|
|
168
|
+
except Exception as e: # noqa: BLE001 — translate SDK errors uniformly
|
|
169
|
+
msg = str(e)
|
|
170
|
+
low = msg.lower()
|
|
171
|
+
if "权限" in msg or "permission" in low or "未开通" in msg or "no permission" in low:
|
|
172
|
+
raise TushareUnauthorizedError(msg) from e
|
|
173
|
+
if "频率" in msg or "rate" in low or "429" in msg:
|
|
174
|
+
raise TushareRateLimitError(msg) from e
|
|
175
|
+
if "5" in msg[:3] or "timeout" in low or "connection" in low:
|
|
176
|
+
raise TushareServerError(msg) from e
|
|
177
|
+
raise TushareError(msg) from e
|
|
178
|
+
if df is None:
|
|
179
|
+
return pd.DataFrame()
|
|
180
|
+
return df
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class FixtureTransport(TushareTransport):
|
|
184
|
+
"""Test transport — replays canned DataFrames keyed by (api, params)."""
|
|
185
|
+
|
|
186
|
+
def __init__(self) -> None:
|
|
187
|
+
self._fixtures: dict[str, pd.DataFrame | Exception] = {}
|
|
188
|
+
self.calls: list[tuple[str, dict[str, Any]]] = [] # call audit log
|
|
189
|
+
|
|
190
|
+
def register(
|
|
191
|
+
self,
|
|
192
|
+
api_name: str,
|
|
193
|
+
result: pd.DataFrame | Exception,
|
|
194
|
+
params: dict[str, Any] | None = None,
|
|
195
|
+
) -> None:
|
|
196
|
+
key = self._key(api_name, params or {})
|
|
197
|
+
self._fixtures[key] = result
|
|
198
|
+
|
|
199
|
+
def call(self, api_name: str, params: dict[str, Any], fields: str | None) -> pd.DataFrame:
|
|
200
|
+
self.calls.append((api_name, dict(params)))
|
|
201
|
+
key = self._key(api_name, params)
|
|
202
|
+
if key in self._fixtures:
|
|
203
|
+
entry = self._fixtures[key]
|
|
204
|
+
else:
|
|
205
|
+
# fallback: any matching api_name (lets tests register without exact param match)
|
|
206
|
+
for k, v in self._fixtures.items():
|
|
207
|
+
if k.startswith(api_name + "|"):
|
|
208
|
+
entry = v
|
|
209
|
+
break
|
|
210
|
+
else:
|
|
211
|
+
raise TushareError(f"no fixture registered for {api_name} {params}")
|
|
212
|
+
if isinstance(entry, Exception):
|
|
213
|
+
raise entry
|
|
214
|
+
return (
|
|
215
|
+
entry.copy()
|
|
216
|
+
if fields is None
|
|
217
|
+
else entry[[c.strip() for c in fields.split(",") if c.strip() in entry.columns]].copy()
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def _key(api_name: str, params: dict[str, Any]) -> str:
|
|
222
|
+
body = json.dumps(params, sort_keys=True, default=str)
|
|
223
|
+
return f"{api_name}|{body}"
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
# Token-bucket rate limiter
|
|
228
|
+
# ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class _TokenBucket:
|
|
232
|
+
def __init__(self, rps: float) -> None:
|
|
233
|
+
self.rps = max(rps, 0.1)
|
|
234
|
+
self._tokens = self.rps
|
|
235
|
+
self._last = time.monotonic()
|
|
236
|
+
self._lock = threading.Lock()
|
|
237
|
+
|
|
238
|
+
def acquire(self) -> None:
|
|
239
|
+
with self._lock:
|
|
240
|
+
now = time.monotonic()
|
|
241
|
+
self._tokens = min(self.rps, self._tokens + (now - self._last) * self.rps)
|
|
242
|
+
self._last = now
|
|
243
|
+
if self._tokens >= 1.0:
|
|
244
|
+
self._tokens -= 1.0
|
|
245
|
+
return
|
|
246
|
+
need = (1.0 - self._tokens) / self.rps
|
|
247
|
+
time.sleep(need)
|
|
248
|
+
# reacquire
|
|
249
|
+
with self._lock:
|
|
250
|
+
self._tokens = max(0.0, self._tokens - 1.0)
|
|
251
|
+
self._last = time.monotonic()
|
|
252
|
+
|
|
253
|
+
def decay(self, factor: float = 0.5) -> None:
|
|
254
|
+
with self._lock:
|
|
255
|
+
self.rps = max(self.rps * factor, 0.1)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# ---------------------------------------------------------------------------
|
|
259
|
+
# Sync state record (mirrors tushare_sync_state row)
|
|
260
|
+
# ---------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@dataclass
|
|
264
|
+
class SyncState:
|
|
265
|
+
plugin_id: str
|
|
266
|
+
api_name: str
|
|
267
|
+
trade_date: str
|
|
268
|
+
status: str # ok | partial | failed | unauthorized
|
|
269
|
+
row_count: int | None
|
|
270
|
+
cache_class: CacheClass
|
|
271
|
+
ttl_seconds: int | None
|
|
272
|
+
data_completeness: str # 'final' | 'intraday'
|
|
273
|
+
synced_at: datetime
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# Synthetic plugin_id for framework-level connectivity tests
|
|
277
|
+
# (deeptrade config test). Real plugin_ids cannot match this pattern (the
|
|
278
|
+
# Pydantic regex requires lowercase alnum + hyphen, no underscores).
|
|
279
|
+
FRAMEWORK_PLUGIN_ID: str = "__framework__"
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
# Client
|
|
284
|
+
# ---------------------------------------------------------------------------
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class TushareClient:
|
|
288
|
+
"""Cache-aware Tushare client. Bound to a single ``plugin_id``.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
db: Open Database instance (for sync_state / calls / cached frames).
|
|
292
|
+
transport: TushareTransport (real or fixture).
|
|
293
|
+
plugin_id: scopes every cached row / audit row / sync-state row.
|
|
294
|
+
Use ``FRAMEWORK_PLUGIN_ID`` for framework-level probes.
|
|
295
|
+
rps: initial token-bucket rate (decays on 429).
|
|
296
|
+
intraday: if True, all writes for INTRADAY_SENSITIVE_APIS get
|
|
297
|
+
data_completeness='intraday'; reads will only accept matching
|
|
298
|
+
completeness.
|
|
299
|
+
event_cb: optional callback for surfacing operationally-relevant
|
|
300
|
+
tushare events (5xx fallback, etc.) to the caller. Signature
|
|
301
|
+
``event_cb(event_type, message, payload_dict)``. Kept as
|
|
302
|
+
plain strings to avoid plugins_api imports.
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
def __init__(
|
|
306
|
+
self,
|
|
307
|
+
db: Database,
|
|
308
|
+
transport: TushareTransport,
|
|
309
|
+
*,
|
|
310
|
+
plugin_id: str,
|
|
311
|
+
rps: float = 6.0,
|
|
312
|
+
intraday: bool = False,
|
|
313
|
+
event_cb: Callable[[str, str, dict[str, Any]], None] | None = None,
|
|
314
|
+
) -> None:
|
|
315
|
+
self._db = db
|
|
316
|
+
self._transport = transport
|
|
317
|
+
self._plugin_id = plugin_id
|
|
318
|
+
self._bucket = _TokenBucket(rps)
|
|
319
|
+
self._intraday = intraday
|
|
320
|
+
self._event_cb = event_cb
|
|
321
|
+
|
|
322
|
+
@property
|
|
323
|
+
def plugin_id(self) -> str:
|
|
324
|
+
return self._plugin_id
|
|
325
|
+
|
|
326
|
+
@property
|
|
327
|
+
def is_intraday(self) -> bool:
|
|
328
|
+
return self._intraday
|
|
329
|
+
|
|
330
|
+
@property
|
|
331
|
+
def rps(self) -> float:
|
|
332
|
+
return self._bucket.rps
|
|
333
|
+
|
|
334
|
+
# --- B2.3 — materialize tushare frames into named business tables --------
|
|
335
|
+
|
|
336
|
+
def materialize(
|
|
337
|
+
self,
|
|
338
|
+
table_name: str,
|
|
339
|
+
df: pd.DataFrame,
|
|
340
|
+
*,
|
|
341
|
+
key_cols: list[str] | None = None,
|
|
342
|
+
) -> int:
|
|
343
|
+
"""Upsert ``df`` into the named DuckDB table (must already exist).
|
|
344
|
+
|
|
345
|
+
Used by strategies to persist tushare returns into core shared tables
|
|
346
|
+
(``stock_basic`` / ``daily`` / ``daily_basic``) and plugin tables
|
|
347
|
+
(``lub_limit_list_d``, ``lub_limit_ths``, ...) — the addresses where
|
|
348
|
+
DESIGN says the data should land, not just in ``tushare_cache_blob``.
|
|
349
|
+
|
|
350
|
+
Strategy:
|
|
351
|
+
- For idempotency, when ``key_cols`` is given, DELETE rows whose
|
|
352
|
+
(key_cols) appear in ``df`` first, then INSERT.
|
|
353
|
+
- Without ``key_cols``, INSERT only (caller responsibility).
|
|
354
|
+
|
|
355
|
+
Returns the row count written.
|
|
356
|
+
"""
|
|
357
|
+
if df is None or df.empty:
|
|
358
|
+
return 0
|
|
359
|
+
|
|
360
|
+
# Verify the table exists; if not, refuse — the strategy plugin (or core
|
|
361
|
+
# migrations) should have created it.
|
|
362
|
+
existing_tables = {
|
|
363
|
+
r[0]
|
|
364
|
+
for r in self._db.fetchall(
|
|
365
|
+
"SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
|
|
366
|
+
)
|
|
367
|
+
}
|
|
368
|
+
if table_name not in existing_tables:
|
|
369
|
+
raise TushareError(f"materialize target table {table_name!r} does not exist")
|
|
370
|
+
|
|
371
|
+
# Discover destination columns to safely down-select df
|
|
372
|
+
dest_cols = [
|
|
373
|
+
r[0]
|
|
374
|
+
for r in self._db.fetchall(
|
|
375
|
+
"SELECT column_name FROM information_schema.columns "
|
|
376
|
+
"WHERE table_name = ? AND table_schema='main' ORDER BY ordinal_position",
|
|
377
|
+
(table_name,),
|
|
378
|
+
)
|
|
379
|
+
]
|
|
380
|
+
usable = [c for c in dest_cols if c in df.columns]
|
|
381
|
+
if not usable:
|
|
382
|
+
return 0
|
|
383
|
+
|
|
384
|
+
df_proj = df[usable].copy()
|
|
385
|
+
|
|
386
|
+
with self._db.transaction():
|
|
387
|
+
if key_cols:
|
|
388
|
+
# Build DELETE WHERE (k1, k2) IN (...)
|
|
389
|
+
key_cols = [k for k in key_cols if k in df_proj.columns]
|
|
390
|
+
if key_cols:
|
|
391
|
+
# Iterate rows; for small N this is fine. For large N, use
|
|
392
|
+
# temp-table-based anti-join (future optimization).
|
|
393
|
+
where = " AND ".join([f'"{k}" = ?' for k in key_cols])
|
|
394
|
+
for _, row in df_proj[key_cols].iterrows():
|
|
395
|
+
self._db.execute(
|
|
396
|
+
f'DELETE FROM "{table_name}" WHERE {where}', # noqa: S608 — names from schema
|
|
397
|
+
tuple(row.tolist()),
|
|
398
|
+
)
|
|
399
|
+
# Bulk INSERT via DuckDB's pandas integration
|
|
400
|
+
self._db.conn.register("__mat_df", df_proj)
|
|
401
|
+
try:
|
|
402
|
+
col_list = ", ".join(f'"{c}"' for c in usable)
|
|
403
|
+
self._db.execute(
|
|
404
|
+
f'INSERT INTO "{table_name}" ({col_list}) ' # noqa: S608
|
|
405
|
+
f"SELECT {col_list} FROM __mat_df"
|
|
406
|
+
)
|
|
407
|
+
finally:
|
|
408
|
+
self._db.conn.unregister("__mat_df")
|
|
409
|
+
return len(df_proj)
|
|
410
|
+
|
|
411
|
+
# --- public entry --------------------------------------------------
|
|
412
|
+
|
|
413
|
+
def call(
|
|
414
|
+
self,
|
|
415
|
+
api_name: str,
|
|
416
|
+
*,
|
|
417
|
+
trade_date: str | None = None,
|
|
418
|
+
params: dict[str, Any] | None = None,
|
|
419
|
+
fields: str | None = None,
|
|
420
|
+
force_sync: bool = False,
|
|
421
|
+
) -> pd.DataFrame:
|
|
422
|
+
"""Fetch from cache (if fresh) or transport. See module docstring."""
|
|
423
|
+
params = dict(params or {})
|
|
424
|
+
if trade_date is not None:
|
|
425
|
+
params.setdefault("trade_date", trade_date)
|
|
426
|
+
# F-C1 fix — discriminating cache_key_date that captures windows too,
|
|
427
|
+
# so that daily(start=A,end=B) and daily(start=C,end=D) live in
|
|
428
|
+
# different cache rows even when neither passes a single trade_date.
|
|
429
|
+
cache_key_date = self._compute_cache_key_date(trade_date, params)
|
|
430
|
+
cache_class = API_CACHE_CLASS.get(api_name, "trade_day_immutable")
|
|
431
|
+
|
|
432
|
+
state = self._read_state(api_name, cache_key_date)
|
|
433
|
+
if not force_sync and self._cache_hit(
|
|
434
|
+
state, cache_class, api_name=api_name, trade_date=cache_key_date, params=params
|
|
435
|
+
):
|
|
436
|
+
df = self._read_cached(api_name, cache_key_date, params, fields=None)
|
|
437
|
+
else:
|
|
438
|
+
# ⚠ Bug fix: always fetch the FULL payload from upstream, never let
|
|
439
|
+
# `fields=` constrain what gets cached. Otherwise a caller asking for
|
|
440
|
+
# `fields="ts_code"` would poison the cache with a 1-column frame
|
|
441
|
+
# that all later callers receive.
|
|
442
|
+
df = self._fetch_and_store(api_name, cache_key_date, params, cache_class)
|
|
443
|
+
|
|
444
|
+
# Apply field projection at the read site (cache stays full).
|
|
445
|
+
return self._project_fields(df, fields)
|
|
446
|
+
|
|
447
|
+
@staticmethod
|
|
448
|
+
def _compute_cache_key_date(trade_date: str | None, params: dict[str, Any]) -> str:
|
|
449
|
+
"""Pick a cache_key_date that uniquely partitions queries by date scope.
|
|
450
|
+
|
|
451
|
+
Priority:
|
|
452
|
+
1. explicit ``trade_date`` argument or ``params['trade_date']``
|
|
453
|
+
2. ``params['start_date']:params['end_date']`` window key
|
|
454
|
+
3. literal '*' (parameter-less APIs like stock_basic / trade_cal)
|
|
455
|
+
|
|
456
|
+
Combined with ``params_hash`` in the payload table, this guarantees that
|
|
457
|
+
e.g. ``daily(start=20260401,end=20260410)`` and
|
|
458
|
+
``daily(start=20260420,end=20260427)`` cannot collide.
|
|
459
|
+
"""
|
|
460
|
+
if trade_date is not None:
|
|
461
|
+
return str(trade_date)
|
|
462
|
+
if "trade_date" in params:
|
|
463
|
+
return str(params["trade_date"])
|
|
464
|
+
start = params.get("start_date")
|
|
465
|
+
end = params.get("end_date")
|
|
466
|
+
if start is not None and end is not None:
|
|
467
|
+
return f"{start}:{end}"
|
|
468
|
+
if start is not None:
|
|
469
|
+
return f"{start}:"
|
|
470
|
+
if end is not None:
|
|
471
|
+
return f":{end}"
|
|
472
|
+
return "*"
|
|
473
|
+
|
|
474
|
+
@staticmethod
|
|
475
|
+
def _project_fields(df: pd.DataFrame, fields: str | None) -> pd.DataFrame:
|
|
476
|
+
if fields is None or df is None or df.empty:
|
|
477
|
+
return df
|
|
478
|
+
wanted = [c.strip() for c in fields.split(",") if c.strip()]
|
|
479
|
+
present = [c for c in wanted if c in df.columns]
|
|
480
|
+
if not present:
|
|
481
|
+
return df
|
|
482
|
+
return df[present].copy()
|
|
483
|
+
|
|
484
|
+
# --- cache decisions ----------------------------------------------
|
|
485
|
+
|
|
486
|
+
def _cache_hit(
|
|
487
|
+
self,
|
|
488
|
+
state: SyncState | None,
|
|
489
|
+
cache_class: CacheClass,
|
|
490
|
+
*,
|
|
491
|
+
api_name: str | None = None,
|
|
492
|
+
trade_date: str | None = None,
|
|
493
|
+
params: dict[str, Any] | None = None,
|
|
494
|
+
) -> bool:
|
|
495
|
+
if state is None or state.status != "ok":
|
|
496
|
+
return False
|
|
497
|
+
# F4: daily-mode reader rejects intraday-cached data
|
|
498
|
+
if not self._intraday and state.data_completeness == "intraday":
|
|
499
|
+
return False
|
|
500
|
+
# B2.4: state.ok with no payload row is NOT a hit (atomicity defense)
|
|
501
|
+
if api_name is not None and trade_date is not None:
|
|
502
|
+
if not self._payload_exists(api_name, trade_date, params or {}):
|
|
503
|
+
return False
|
|
504
|
+
# By cache class
|
|
505
|
+
if cache_class == "static":
|
|
506
|
+
return self._is_fresh_ttl(state, STATIC_TTL_SECONDS)
|
|
507
|
+
if cache_class == "trade_day_immutable":
|
|
508
|
+
return True
|
|
509
|
+
if cache_class == "trade_day_mutable":
|
|
510
|
+
return not self._is_T_or_T_plus_1(state.trade_date)
|
|
511
|
+
if cache_class == "hot_or_anns":
|
|
512
|
+
ttl = state.ttl_seconds or DEFAULT_HOT_TTL_SECONDS
|
|
513
|
+
return self._is_fresh_ttl(state, ttl)
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
def _payload_exists(self, api_name: str, trade_date: str, params: dict[str, Any]) -> bool:
|
|
517
|
+
"""STRICT payload presence check on (plugin_id, api_name, trade_date,
|
|
518
|
+
params_hash)."""
|
|
519
|
+
if not self._cache_table_exists():
|
|
520
|
+
return False
|
|
521
|
+
body = json.dumps(params, sort_keys=True, default=str)
|
|
522
|
+
h = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
|
523
|
+
row = self._db.fetchone(
|
|
524
|
+
"SELECT 1 FROM tushare_cache_blob "
|
|
525
|
+
"WHERE plugin_id = ? AND api_name = ? AND trade_date = ? AND params_hash = ? "
|
|
526
|
+
"LIMIT 1",
|
|
527
|
+
(self._plugin_id, api_name, trade_date, h),
|
|
528
|
+
)
|
|
529
|
+
return row is not None
|
|
530
|
+
|
|
531
|
+
@staticmethod
|
|
532
|
+
def _is_fresh_ttl(state: SyncState, ttl_seconds: int) -> bool:
|
|
533
|
+
return (datetime.now() - state.synced_at) < timedelta(seconds=ttl_seconds)
|
|
534
|
+
|
|
535
|
+
@staticmethod
|
|
536
|
+
def _is_T_or_T_plus_1(trade_date: str) -> bool:
|
|
537
|
+
if not trade_date or trade_date == "*":
|
|
538
|
+
return False
|
|
539
|
+
try:
|
|
540
|
+
d = datetime.strptime(trade_date, "%Y%m%d").date()
|
|
541
|
+
except ValueError:
|
|
542
|
+
return False
|
|
543
|
+
today = datetime.now().date()
|
|
544
|
+
return today - d <= timedelta(days=1)
|
|
545
|
+
|
|
546
|
+
# --- fetch + store -------------------------------------------------
|
|
547
|
+
|
|
548
|
+
def _fetch_and_store(
|
|
549
|
+
self,
|
|
550
|
+
api_name: str,
|
|
551
|
+
cache_key_date: str,
|
|
552
|
+
params: dict[str, Any],
|
|
553
|
+
cache_class: CacheClass,
|
|
554
|
+
) -> pd.DataFrame:
|
|
555
|
+
try:
|
|
556
|
+
# NOTE: never pass `fields=` to the transport — caching must always
|
|
557
|
+
# store the full payload so different callers requesting different
|
|
558
|
+
# field projections all share one cache entry.
|
|
559
|
+
df = self._fetch_with_retries(api_name, params)
|
|
560
|
+
except TushareUnauthorizedError as e:
|
|
561
|
+
self._write_state(
|
|
562
|
+
api_name,
|
|
563
|
+
cache_key_date,
|
|
564
|
+
"unauthorized",
|
|
565
|
+
row_count=None,
|
|
566
|
+
cache_class=cache_class,
|
|
567
|
+
)
|
|
568
|
+
self._audit_call(api_name, params, rows=0, latency_ms=0)
|
|
569
|
+
raise e
|
|
570
|
+
except TushareServerError as e:
|
|
571
|
+
# B1.4 — 5xx fallback: try local cached payload if state allows.
|
|
572
|
+
existing = self._read_state(api_name, cache_key_date)
|
|
573
|
+
if can_fallback(existing, cache_key_date, is_intraday_run=self._intraday):
|
|
574
|
+
logger.warning(
|
|
575
|
+
"tushare 5xx for %s @ %s; falling back to cached payload",
|
|
576
|
+
api_name,
|
|
577
|
+
cache_key_date,
|
|
578
|
+
)
|
|
579
|
+
cached = self._read_cached(api_name, cache_key_date, params, fields=None)
|
|
580
|
+
# F-L2 — surface fallback to runner / dashboard so users can see
|
|
581
|
+
# that data is being served from cache instead of fresh.
|
|
582
|
+
if self._event_cb is not None:
|
|
583
|
+
try:
|
|
584
|
+
self._event_cb(
|
|
585
|
+
"tushare.fallback",
|
|
586
|
+
f"tushare 5xx; serving cached payload for {api_name}",
|
|
587
|
+
{
|
|
588
|
+
"api_name": api_name,
|
|
589
|
+
"cache_key_date": cache_key_date,
|
|
590
|
+
"row_count": len(cached),
|
|
591
|
+
},
|
|
592
|
+
)
|
|
593
|
+
except Exception: # noqa: BLE001 — never let observers crash a fetch
|
|
594
|
+
logger.exception("event_cb raised on TUSHARE_FALLBACK")
|
|
595
|
+
# Don't change sync_state — it's still 'ok' with the original payload.
|
|
596
|
+
return cached
|
|
597
|
+
# No usable cache → propagate; caller decides terminate (required) vs degrade (optional)
|
|
598
|
+
raise e
|
|
599
|
+
|
|
600
|
+
completeness = self._completeness_for(api_name)
|
|
601
|
+
ttl = DEFAULT_HOT_TTL_SECONDS if cache_class == "hot_or_anns" else None
|
|
602
|
+
# B2.4 — atomic write of state + payload so a partial write can't yield
|
|
603
|
+
# "state=ok but payload missing" stale cache hits.
|
|
604
|
+
with self._db.transaction():
|
|
605
|
+
self._write_state(
|
|
606
|
+
api_name,
|
|
607
|
+
cache_key_date,
|
|
608
|
+
"ok",
|
|
609
|
+
row_count=len(df),
|
|
610
|
+
cache_class=cache_class,
|
|
611
|
+
ttl_seconds=ttl,
|
|
612
|
+
data_completeness=completeness,
|
|
613
|
+
)
|
|
614
|
+
self._write_cached(api_name, cache_key_date, params, df)
|
|
615
|
+
return df
|
|
616
|
+
|
|
617
|
+
@retry(
|
|
618
|
+
retry=retry_if_exception_type((TushareRateLimitError, TushareServerError)),
|
|
619
|
+
stop=stop_after_attempt(5),
|
|
620
|
+
wait=wait_exponential(multiplier=1, min=1, max=15),
|
|
621
|
+
reraise=True,
|
|
622
|
+
)
|
|
623
|
+
def _fetch_with_retries(self, api_name: str, params: dict[str, Any]) -> pd.DataFrame:
|
|
624
|
+
"""Fetch the widest payload we'd ever want for this API.
|
|
625
|
+
|
|
626
|
+
For most APIs tushare returns every column when ``fields`` is omitted,
|
|
627
|
+
but a few (notably ``stock_basic``) only return a narrow default subset.
|
|
628
|
+
``WIDE_FIELDS`` overrides ``fields=`` per-API so the cache row contains
|
|
629
|
+
every column downstream callers need; ``call()``'s ``_project_fields``
|
|
630
|
+
narrows it back at READ time.
|
|
631
|
+
"""
|
|
632
|
+
self._bucket.acquire()
|
|
633
|
+
t0 = time.monotonic()
|
|
634
|
+
try:
|
|
635
|
+
df = self._transport.call(api_name, params, fields=WIDE_FIELDS.get(api_name))
|
|
636
|
+
except TushareRateLimitError:
|
|
637
|
+
self._bucket.decay(0.5)
|
|
638
|
+
raise
|
|
639
|
+
latency_ms = int((time.monotonic() - t0) * 1000)
|
|
640
|
+
self._audit_call(api_name, params, rows=len(df), latency_ms=latency_ms)
|
|
641
|
+
return df
|
|
642
|
+
|
|
643
|
+
def _completeness_for(self, api_name: str) -> str:
|
|
644
|
+
if self._intraday and api_name in INTRADAY_SENSITIVE_APIS:
|
|
645
|
+
return "intraday"
|
|
646
|
+
return "final"
|
|
647
|
+
|
|
648
|
+
# --- DB read/write helpers ---------------------------------------
|
|
649
|
+
|
|
650
|
+
def _read_state(self, api_name: str, trade_date: str) -> SyncState | None:
|
|
651
|
+
row = self._db.fetchone(
|
|
652
|
+
"SELECT plugin_id, api_name, trade_date, status, row_count, cache_class, "
|
|
653
|
+
"ttl_seconds, data_completeness, synced_at FROM tushare_sync_state "
|
|
654
|
+
"WHERE plugin_id = ? AND api_name = ? AND trade_date = ?",
|
|
655
|
+
(self._plugin_id, api_name, trade_date),
|
|
656
|
+
)
|
|
657
|
+
if row is None:
|
|
658
|
+
return None
|
|
659
|
+
return SyncState(
|
|
660
|
+
plugin_id=row[0],
|
|
661
|
+
api_name=row[1],
|
|
662
|
+
trade_date=row[2],
|
|
663
|
+
status=row[3],
|
|
664
|
+
row_count=row[4],
|
|
665
|
+
cache_class=row[5],
|
|
666
|
+
ttl_seconds=row[6],
|
|
667
|
+
data_completeness=row[7],
|
|
668
|
+
synced_at=row[8]
|
|
669
|
+
if isinstance(row[8], datetime)
|
|
670
|
+
else datetime.fromisoformat(str(row[8])),
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
def _write_state(
|
|
674
|
+
self,
|
|
675
|
+
api_name: str,
|
|
676
|
+
trade_date: str,
|
|
677
|
+
status: str,
|
|
678
|
+
*,
|
|
679
|
+
row_count: int | None,
|
|
680
|
+
cache_class: CacheClass,
|
|
681
|
+
ttl_seconds: int | None = None,
|
|
682
|
+
data_completeness: str = "final",
|
|
683
|
+
) -> None:
|
|
684
|
+
with self._db.transaction():
|
|
685
|
+
self._db.execute(
|
|
686
|
+
"DELETE FROM tushare_sync_state "
|
|
687
|
+
"WHERE plugin_id = ? AND api_name = ? AND trade_date = ?",
|
|
688
|
+
(self._plugin_id, api_name, trade_date),
|
|
689
|
+
)
|
|
690
|
+
self._db.execute(
|
|
691
|
+
"INSERT INTO tushare_sync_state(plugin_id, api_name, trade_date, status, "
|
|
692
|
+
"row_count, cache_class, ttl_seconds, data_completeness, synced_at) "
|
|
693
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)",
|
|
694
|
+
(
|
|
695
|
+
self._plugin_id,
|
|
696
|
+
api_name,
|
|
697
|
+
trade_date,
|
|
698
|
+
status,
|
|
699
|
+
row_count,
|
|
700
|
+
cache_class,
|
|
701
|
+
ttl_seconds,
|
|
702
|
+
data_completeness,
|
|
703
|
+
),
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
def _audit_call(
|
|
707
|
+
self, api_name: str, params: dict[str, Any], *, rows: int, latency_ms: int
|
|
708
|
+
) -> None:
|
|
709
|
+
body = json.dumps(params, sort_keys=True, default=str)
|
|
710
|
+
h = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
|
711
|
+
self._db.execute(
|
|
712
|
+
"INSERT INTO tushare_calls(plugin_id, api_name, params_hash, rows, latency_ms) "
|
|
713
|
+
"VALUES (?, ?, ?, ?, ?)",
|
|
714
|
+
(self._plugin_id, api_name, h, rows, latency_ms),
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# ---- cached frame storage ---------------------------------------
|
|
718
|
+
# We use a generic JSON column so V0.3 doesn't require per-API tables.
|
|
719
|
+
# V0.7a will overlay strategy-specific lub_* tables on top of these.
|
|
720
|
+
|
|
721
|
+
def _cache_table_exists(self) -> bool:
|
|
722
|
+
rows = self._db.fetchall(
|
|
723
|
+
"SELECT table_name FROM information_schema.tables "
|
|
724
|
+
"WHERE table_schema='main' AND table_name='tushare_cache_blob'"
|
|
725
|
+
)
|
|
726
|
+
return bool(rows)
|
|
727
|
+
|
|
728
|
+
def _ensure_cache_table(self) -> None:
|
|
729
|
+
if self._cache_table_exists():
|
|
730
|
+
return
|
|
731
|
+
self._db.execute(
|
|
732
|
+
"CREATE TABLE IF NOT EXISTS tushare_cache_blob ("
|
|
733
|
+
" plugin_id VARCHAR NOT NULL,"
|
|
734
|
+
" api_name VARCHAR NOT NULL,"
|
|
735
|
+
" trade_date VARCHAR NOT NULL,"
|
|
736
|
+
" params_hash VARCHAR NOT NULL,"
|
|
737
|
+
" payload_json VARCHAR NOT NULL,"
|
|
738
|
+
" cached_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,"
|
|
739
|
+
" PRIMARY KEY (plugin_id, api_name, trade_date, params_hash)"
|
|
740
|
+
")"
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
def _write_cached(
|
|
744
|
+
self,
|
|
745
|
+
api_name: str,
|
|
746
|
+
trade_date: str,
|
|
747
|
+
params: dict[str, Any],
|
|
748
|
+
df: pd.DataFrame,
|
|
749
|
+
) -> None:
|
|
750
|
+
self._ensure_cache_table()
|
|
751
|
+
body = json.dumps(params, sort_keys=True, default=str)
|
|
752
|
+
h = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
|
753
|
+
payload = df.to_json(orient="records", date_format="iso")
|
|
754
|
+
with self._db.transaction():
|
|
755
|
+
self._db.execute(
|
|
756
|
+
"DELETE FROM tushare_cache_blob "
|
|
757
|
+
"WHERE plugin_id = ? AND api_name = ? AND trade_date = ? AND params_hash = ?",
|
|
758
|
+
(self._plugin_id, api_name, trade_date, h),
|
|
759
|
+
)
|
|
760
|
+
self._db.execute(
|
|
761
|
+
"INSERT INTO tushare_cache_blob(plugin_id, api_name, trade_date, params_hash, "
|
|
762
|
+
"payload_json) VALUES (?, ?, ?, ?, ?)",
|
|
763
|
+
(self._plugin_id, api_name, trade_date, h, payload),
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
def _read_cached(
|
|
767
|
+
self,
|
|
768
|
+
api_name: str,
|
|
769
|
+
trade_date: str,
|
|
770
|
+
params: dict[str, Any],
|
|
771
|
+
fields: str | None,
|
|
772
|
+
) -> pd.DataFrame:
|
|
773
|
+
if not self._cache_table_exists():
|
|
774
|
+
return pd.DataFrame()
|
|
775
|
+
body = json.dumps(params, sort_keys=True, default=str)
|
|
776
|
+
h = hashlib.sha256(body.encode("utf-8")).hexdigest()
|
|
777
|
+
row = self._db.fetchone(
|
|
778
|
+
"SELECT payload_json FROM tushare_cache_blob "
|
|
779
|
+
"WHERE plugin_id = ? AND api_name = ? AND trade_date = ? AND params_hash = ?",
|
|
780
|
+
(self._plugin_id, api_name, trade_date, h),
|
|
781
|
+
)
|
|
782
|
+
if row is None:
|
|
783
|
+
return pd.DataFrame()
|
|
784
|
+
df = pd.read_json(io.StringIO(row[0]), orient="records")
|
|
785
|
+
if fields:
|
|
786
|
+
cols = [c.strip() for c in fields.split(",") if c.strip() in df.columns]
|
|
787
|
+
df = df[cols]
|
|
788
|
+
return df
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
# ---------------------------------------------------------------------------
|
|
792
|
+
# Fallback predicate (DESIGN §13.2 + S4)
|
|
793
|
+
# ---------------------------------------------------------------------------
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def can_fallback(
|
|
797
|
+
state: SyncState | None,
|
|
798
|
+
target_trade_date: str,
|
|
799
|
+
*,
|
|
800
|
+
is_intraday_run: bool,
|
|
801
|
+
) -> bool:
|
|
802
|
+
"""Decide if a 5xx/timeout failure may use already-cached data.
|
|
803
|
+
|
|
804
|
+
Conditions (all required):
|
|
805
|
+
- state.status == 'ok'
|
|
806
|
+
- state.trade_date == target_trade_date (no nearest-day approximation)
|
|
807
|
+
- cache_class != trade_day_mutable when target is T or T+1
|
|
808
|
+
- row_count >= 0 (zero rows ARE valid — S4 fix)
|
|
809
|
+
- data_completeness == 'final' for daily-mode runs (F4)
|
|
810
|
+
"""
|
|
811
|
+
if state is None or state.status != "ok":
|
|
812
|
+
return False
|
|
813
|
+
if state.trade_date != target_trade_date:
|
|
814
|
+
return False
|
|
815
|
+
if state.row_count is not None and state.row_count < 0:
|
|
816
|
+
return False
|
|
817
|
+
if (
|
|
818
|
+
state.cache_class == "trade_day_mutable"
|
|
819
|
+
and TushareClient._is_T_or_T_plus_1(target_trade_date) # noqa: SLF001 — internal helper reuse
|
|
820
|
+
):
|
|
821
|
+
return False
|
|
822
|
+
if not is_intraday_run and state.data_completeness == "intraday":
|
|
823
|
+
return False
|
|
824
|
+
return True
|