deeptrade-quant 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deeptrade/__init__.py +8 -0
  2. deeptrade/channels_builtin/__init__.py +0 -0
  3. deeptrade/channels_builtin/stdout/__init__.py +0 -0
  4. deeptrade/channels_builtin/stdout/deeptrade_plugin.yaml +25 -0
  5. deeptrade/channels_builtin/stdout/migrations/20260429_001_init.sql +13 -0
  6. deeptrade/channels_builtin/stdout/stdout_channel/__init__.py +0 -0
  7. deeptrade/channels_builtin/stdout/stdout_channel/channel.py +180 -0
  8. deeptrade/cli.py +214 -0
  9. deeptrade/cli_config.py +396 -0
  10. deeptrade/cli_data.py +33 -0
  11. deeptrade/cli_plugin.py +176 -0
  12. deeptrade/core/__init__.py +8 -0
  13. deeptrade/core/config.py +344 -0
  14. deeptrade/core/config_migrations.py +138 -0
  15. deeptrade/core/db.py +176 -0
  16. deeptrade/core/llm_client.py +591 -0
  17. deeptrade/core/llm_manager.py +174 -0
  18. deeptrade/core/logging_config.py +61 -0
  19. deeptrade/core/migrations/__init__.py +0 -0
  20. deeptrade/core/migrations/core/20260427_001_init.sql +121 -0
  21. deeptrade/core/migrations/core/20260501_002_drop_llm_calls_stage.sql +10 -0
  22. deeptrade/core/migrations/core/__init__.py +0 -0
  23. deeptrade/core/notifier.py +302 -0
  24. deeptrade/core/paths.py +49 -0
  25. deeptrade/core/plugin_manager.py +616 -0
  26. deeptrade/core/run_status.py +29 -0
  27. deeptrade/core/secrets.py +152 -0
  28. deeptrade/core/tushare_client.py +824 -0
  29. deeptrade/plugins_api/__init__.py +44 -0
  30. deeptrade/plugins_api/base.py +66 -0
  31. deeptrade/plugins_api/channel.py +42 -0
  32. deeptrade/plugins_api/events.py +61 -0
  33. deeptrade/plugins_api/llm.py +46 -0
  34. deeptrade/plugins_api/metadata.py +84 -0
  35. deeptrade/plugins_api/notify.py +67 -0
  36. deeptrade/strategies_builtin/__init__.py +0 -0
  37. deeptrade/strategies_builtin/limit_up_board/__init__.py +0 -0
  38. deeptrade/strategies_builtin/limit_up_board/deeptrade_plugin.yaml +101 -0
  39. deeptrade/strategies_builtin/limit_up_board/limit_up_board/__init__.py +0 -0
  40. deeptrade/strategies_builtin/limit_up_board/limit_up_board/calendar.py +65 -0
  41. deeptrade/strategies_builtin/limit_up_board/limit_up_board/cli.py +269 -0
  42. deeptrade/strategies_builtin/limit_up_board/limit_up_board/config.py +76 -0
  43. deeptrade/strategies_builtin/limit_up_board/limit_up_board/data.py +1191 -0
  44. deeptrade/strategies_builtin/limit_up_board/limit_up_board/pipeline.py +869 -0
  45. deeptrade/strategies_builtin/limit_up_board/limit_up_board/plugin.py +30 -0
  46. deeptrade/strategies_builtin/limit_up_board/limit_up_board/profiles.py +85 -0
  47. deeptrade/strategies_builtin/limit_up_board/limit_up_board/prompts.py +485 -0
  48. deeptrade/strategies_builtin/limit_up_board/limit_up_board/render.py +890 -0
  49. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runner.py +1087 -0
  50. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runtime.py +172 -0
  51. deeptrade/strategies_builtin/limit_up_board/limit_up_board/schemas.py +178 -0
  52. deeptrade/strategies_builtin/limit_up_board/migrations/20260430_001_init.sql +150 -0
  53. deeptrade/strategies_builtin/limit_up_board/migrations/20260501_002_lub_stage_results_llm_provider.sql +8 -0
  54. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_001_lub_lhb_tables.sql +36 -0
  55. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_002_lub_cyq_perf.sql +18 -0
  56. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_003_lub_lhb_pk_fix.sql +46 -0
  57. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_004_lub_lhb_drop_pk.sql +53 -0
  58. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_005_lub_config.sql +17 -0
  59. deeptrade/strategies_builtin/volume_anomaly/__init__.py +0 -0
  60. deeptrade/strategies_builtin/volume_anomaly/deeptrade_plugin.yaml +59 -0
  61. deeptrade/strategies_builtin/volume_anomaly/migrations/20260430_001_init.sql +94 -0
  62. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_001_realized_returns.sql +44 -0
  63. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_002_dimension_scores.sql +13 -0
  64. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/__init__.py +0 -0
  65. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/calendar.py +52 -0
  66. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/cli.py +247 -0
  67. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/data.py +2154 -0
  68. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/pipeline.py +327 -0
  69. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/plugin.py +22 -0
  70. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/profiles.py +49 -0
  71. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts.py +187 -0
  72. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts_examples.py +84 -0
  73. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/render.py +906 -0
  74. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runner.py +772 -0
  75. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runtime.py +90 -0
  76. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/schemas.py +97 -0
  77. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/stats.py +174 -0
  78. deeptrade/theme.py +48 -0
  79. deeptrade_quant-0.0.2.dist-info/METADATA +166 -0
  80. deeptrade_quant-0.0.2.dist-info/RECORD +83 -0
  81. deeptrade_quant-0.0.2.dist-info/WHEEL +4 -0
  82. deeptrade_quant-0.0.2.dist-info/entry_points.txt +2 -0
  83. deeptrade_quant-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,824 @@
1
+ """Tushare client with rate limiting, per-plugin caching, and intraday isolation.
2
+
3
+ Per-plugin scoping (data isolation model): every TushareClient instance is
4
+ bound to a single ``plugin_id``; ``tushare_sync_state``, ``tushare_calls``,
5
+ and ``tushare_cache_blob`` rows are all scoped by ``plugin_id``. Plugins do
6
+ NOT share cached payloads with each other — even if two plugins call the
7
+ same API for the same trade_date, they each maintain their own cache row.
8
+
9
+ The framework reserves the synthetic ``plugin_id == "__framework__"`` for
10
+ its own connectivity tests (``deeptrade config test``).
11
+
12
+ Cache class buckets:
13
+ - static : api_name × '*' ; 7d TTL
14
+ - trade_day_immutable : api_name × trade_date ; never refetch when ok
15
+ - trade_day_mutable : api_name × trade_date ; allow T/T+1 refetch
16
+ - hot_or_anns : api_name × trade_date ; configurable TTL
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import io
23
+ import json
24
+ import logging
25
+ import threading
26
+ import time
27
+ from abc import ABC, abstractmethod
28
+ from collections.abc import Callable
29
+ from dataclasses import dataclass
30
+ from datetime import datetime, timedelta
31
+ from typing import Any, Literal
32
+
33
+ import pandas as pd
34
+ from tenacity import (
35
+ retry,
36
+ retry_if_exception_type,
37
+ stop_after_attempt,
38
+ wait_exponential,
39
+ )
40
+
41
+ from deeptrade.core.db import Database
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Cache class taxonomy
48
+ # ---------------------------------------------------------------------------
49
+
50
+ CacheClass = Literal["static", "trade_day_immutable", "trade_day_mutable", "hot_or_anns"]
51
+
52
+ # Per-API cache class assignment (DESIGN §11.1 + §11.2 lists)
53
+ API_CACHE_CLASS: dict[str, CacheClass] = {
54
+ # static
55
+ "stock_basic": "static",
56
+ "trade_cal": "static",
57
+ # trade_day_immutable (settled at end of day, never revised)
58
+ "daily": "trade_day_immutable",
59
+ "limit_list_d": "trade_day_immutable",
60
+ "limit_list_ths": "trade_day_immutable",
61
+ "limit_step": "trade_day_immutable",
62
+ "limit_cpt_list": "trade_day_immutable",
63
+ "stock_st": "trade_day_immutable",
64
+ "top_list": "trade_day_immutable",
65
+ "top_inst": "trade_day_immutable",
66
+ "stk_limit": "trade_day_immutable",
67
+ "stk_auction_o": "trade_day_immutable",
68
+ "suspend_d": "trade_day_immutable",
69
+ "adj_factor": "trade_day_immutable",
70
+ # trade_day_mutable (occasional T+1/T+2 corrections)
71
+ "moneyflow": "trade_day_mutable",
72
+ "moneyflow_ths": "trade_day_mutable",
73
+ "daily_basic": "trade_day_mutable",
74
+ # hot_or_anns (TTL-based)
75
+ "ths_hot": "hot_or_anns",
76
+ "dc_hot": "hot_or_anns",
77
+ "anns_d": "hot_or_anns",
78
+ "news": "hot_or_anns",
79
+ }
80
+
81
+ # Per-API "wide" fields list pushed to the transport so the on-disk cache
82
+ # always contains every column downstream code expects. Without this, tushare's
83
+ # default field subset for some APIs (notably stock_basic, which omits
84
+ # market/exchange/list_status) silently corrupts the cache and any READ-time
85
+ # field projection can never recover the missing columns.
86
+ # Add an entry whenever the default subset misses something the strategies need.
87
+ WIDE_FIELDS: dict[str, str] = {
88
+ "stock_basic": (
89
+ "ts_code,symbol,name,area,industry,fullname,enname,market,exchange,"
90
+ "curr_type,list_status,list_date,delist_date,is_hs,act_name,act_ent_type"
91
+ ),
92
+ }
93
+
94
+ # APIs whose intraday data is unstable; --allow-intraday triggers data_completeness='intraday'
95
+ INTRADAY_SENSITIVE_APIS: frozenset[str] = frozenset(
96
+ {
97
+ "limit_list_d",
98
+ "limit_list_ths",
99
+ "limit_step",
100
+ "limit_cpt_list",
101
+ "moneyflow",
102
+ "moneyflow_ths",
103
+ "daily",
104
+ "daily_basic",
105
+ }
106
+ )
107
+
108
+ # Default TTL for hot_or_anns class
109
+ DEFAULT_HOT_TTL_SECONDS = 6 * 3600
110
+
111
+ # Default TTL for static class
112
+ STATIC_TTL_SECONDS = 7 * 24 * 3600
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Errors
117
+ # ---------------------------------------------------------------------------
118
+
119
+
120
+ class TushareError(Exception):
121
+ """Base error from TushareClient."""
122
+
123
+
124
+ class TushareUnauthorizedError(TushareError):
125
+ """Tushare reports the user lacks permission for this API."""
126
+
127
+
128
+ class TushareRateLimitError(TushareError):
129
+ """HTTP 429 / equivalent — caller should slow down."""
130
+
131
+
132
+ class TushareServerError(TushareError):
133
+ """5xx / transient transport error — eligible for retry."""
134
+
135
+
136
+ # ---------------------------------------------------------------------------
137
+ # Transport abstraction
138
+ # ---------------------------------------------------------------------------
139
+
140
+
141
+ class TushareTransport(ABC):
142
+ """Abstract carrier for Tushare API calls. Production = SDK; tests = fixtures."""
143
+
144
+ @abstractmethod
145
+ def call(self, api_name: str, params: dict[str, Any], fields: str | None) -> pd.DataFrame:
146
+ """Execute a single API call. Raise the typed error subclass on failure."""
147
+
148
+
149
+ class TushareSDKTransport(TushareTransport):
150
+ """Production transport — wraps tushare.pro_api()."""
151
+
152
+ def __init__(self, token: str) -> None:
153
+ import tushare as ts # noqa: PLC0415 — defer import to avoid hard dep at module load
154
+
155
+ self._pro = ts.pro_api(token)
156
+
157
+ def call(self, api_name: str, params: dict[str, Any], fields: str | None) -> pd.DataFrame:
158
+ try:
159
+ method = getattr(self._pro, api_name)
160
+ except AttributeError as e:
161
+ raise TushareError(f"unknown tushare api: {api_name}") from e
162
+
163
+ kwargs = dict(params)
164
+ if fields:
165
+ kwargs["fields"] = fields
166
+ try:
167
+ df = method(**kwargs)
168
+ except Exception as e: # noqa: BLE001 — translate SDK errors uniformly
169
+ msg = str(e)
170
+ low = msg.lower()
171
+ if "权限" in msg or "permission" in low or "未开通" in msg or "no permission" in low:
172
+ raise TushareUnauthorizedError(msg) from e
173
+ if "频率" in msg or "rate" in low or "429" in msg:
174
+ raise TushareRateLimitError(msg) from e
175
+ if "5" in msg[:3] or "timeout" in low or "connection" in low:
176
+ raise TushareServerError(msg) from e
177
+ raise TushareError(msg) from e
178
+ if df is None:
179
+ return pd.DataFrame()
180
+ return df
181
+
182
+
183
+ class FixtureTransport(TushareTransport):
184
+ """Test transport — replays canned DataFrames keyed by (api, params)."""
185
+
186
+ def __init__(self) -> None:
187
+ self._fixtures: dict[str, pd.DataFrame | Exception] = {}
188
+ self.calls: list[tuple[str, dict[str, Any]]] = [] # call audit log
189
+
190
+ def register(
191
+ self,
192
+ api_name: str,
193
+ result: pd.DataFrame | Exception,
194
+ params: dict[str, Any] | None = None,
195
+ ) -> None:
196
+ key = self._key(api_name, params or {})
197
+ self._fixtures[key] = result
198
+
199
+ def call(self, api_name: str, params: dict[str, Any], fields: str | None) -> pd.DataFrame:
200
+ self.calls.append((api_name, dict(params)))
201
+ key = self._key(api_name, params)
202
+ if key in self._fixtures:
203
+ entry = self._fixtures[key]
204
+ else:
205
+ # fallback: any matching api_name (lets tests register without exact param match)
206
+ for k, v in self._fixtures.items():
207
+ if k.startswith(api_name + "|"):
208
+ entry = v
209
+ break
210
+ else:
211
+ raise TushareError(f"no fixture registered for {api_name} {params}")
212
+ if isinstance(entry, Exception):
213
+ raise entry
214
+ return (
215
+ entry.copy()
216
+ if fields is None
217
+ else entry[[c.strip() for c in fields.split(",") if c.strip() in entry.columns]].copy()
218
+ )
219
+
220
+ @staticmethod
221
+ def _key(api_name: str, params: dict[str, Any]) -> str:
222
+ body = json.dumps(params, sort_keys=True, default=str)
223
+ return f"{api_name}|{body}"
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # Token-bucket rate limiter
228
+ # ---------------------------------------------------------------------------
229
+
230
+
231
+ class _TokenBucket:
232
+ def __init__(self, rps: float) -> None:
233
+ self.rps = max(rps, 0.1)
234
+ self._tokens = self.rps
235
+ self._last = time.monotonic()
236
+ self._lock = threading.Lock()
237
+
238
+ def acquire(self) -> None:
239
+ with self._lock:
240
+ now = time.monotonic()
241
+ self._tokens = min(self.rps, self._tokens + (now - self._last) * self.rps)
242
+ self._last = now
243
+ if self._tokens >= 1.0:
244
+ self._tokens -= 1.0
245
+ return
246
+ need = (1.0 - self._tokens) / self.rps
247
+ time.sleep(need)
248
+ # reacquire
249
+ with self._lock:
250
+ self._tokens = max(0.0, self._tokens - 1.0)
251
+ self._last = time.monotonic()
252
+
253
+ def decay(self, factor: float = 0.5) -> None:
254
+ with self._lock:
255
+ self.rps = max(self.rps * factor, 0.1)
256
+
257
+
258
+ # ---------------------------------------------------------------------------
259
+ # Sync state record (mirrors tushare_sync_state row)
260
+ # ---------------------------------------------------------------------------
261
+
262
+
263
+ @dataclass
264
+ class SyncState:
265
+ plugin_id: str
266
+ api_name: str
267
+ trade_date: str
268
+ status: str # ok | partial | failed | unauthorized
269
+ row_count: int | None
270
+ cache_class: CacheClass
271
+ ttl_seconds: int | None
272
+ data_completeness: str # 'final' | 'intraday'
273
+ synced_at: datetime
274
+
275
+
276
+ # Synthetic plugin_id for framework-level connectivity tests
277
+ # (deeptrade config test). Real plugin_ids cannot match this pattern (the
278
+ # Pydantic regex requires lowercase alnum + hyphen, no underscores).
279
+ FRAMEWORK_PLUGIN_ID: str = "__framework__"
280
+
281
+
282
+ # ---------------------------------------------------------------------------
283
+ # Client
284
+ # ---------------------------------------------------------------------------
285
+
286
+
287
+ class TushareClient:
288
+ """Cache-aware Tushare client. Bound to a single ``plugin_id``.
289
+
290
+ Args:
291
+ db: Open Database instance (for sync_state / calls / cached frames).
292
+ transport: TushareTransport (real or fixture).
293
+ plugin_id: scopes every cached row / audit row / sync-state row.
294
+ Use ``FRAMEWORK_PLUGIN_ID`` for framework-level probes.
295
+ rps: initial token-bucket rate (decays on 429).
296
+ intraday: if True, all writes for INTRADAY_SENSITIVE_APIS get
297
+ data_completeness='intraday'; reads will only accept matching
298
+ completeness.
299
+ event_cb: optional callback for surfacing operationally-relevant
300
+ tushare events (5xx fallback, etc.) to the caller. Signature
301
+ ``event_cb(event_type, message, payload_dict)``. Kept as
302
+ plain strings to avoid plugins_api imports.
303
+ """
304
+
305
+ def __init__(
306
+ self,
307
+ db: Database,
308
+ transport: TushareTransport,
309
+ *,
310
+ plugin_id: str,
311
+ rps: float = 6.0,
312
+ intraday: bool = False,
313
+ event_cb: Callable[[str, str, dict[str, Any]], None] | None = None,
314
+ ) -> None:
315
+ self._db = db
316
+ self._transport = transport
317
+ self._plugin_id = plugin_id
318
+ self._bucket = _TokenBucket(rps)
319
+ self._intraday = intraday
320
+ self._event_cb = event_cb
321
+
322
+ @property
323
+ def plugin_id(self) -> str:
324
+ return self._plugin_id
325
+
326
+ @property
327
+ def is_intraday(self) -> bool:
328
+ return self._intraday
329
+
330
+ @property
331
+ def rps(self) -> float:
332
+ return self._bucket.rps
333
+
334
+ # --- B2.3 — materialize tushare frames into named business tables --------
335
+
336
+ def materialize(
337
+ self,
338
+ table_name: str,
339
+ df: pd.DataFrame,
340
+ *,
341
+ key_cols: list[str] | None = None,
342
+ ) -> int:
343
+ """Upsert ``df`` into the named DuckDB table (must already exist).
344
+
345
+ Used by strategies to persist tushare returns into core shared tables
346
+ (``stock_basic`` / ``daily`` / ``daily_basic``) and plugin tables
347
+ (``lub_limit_list_d``, ``lub_limit_ths``, ...) — the addresses where
348
+ DESIGN says the data should land, not just in ``tushare_cache_blob``.
349
+
350
+ Strategy:
351
+ - For idempotency, when ``key_cols`` is given, DELETE rows whose
352
+ (key_cols) appear in ``df`` first, then INSERT.
353
+ - Without ``key_cols``, INSERT only (caller responsibility).
354
+
355
+ Returns the row count written.
356
+ """
357
+ if df is None or df.empty:
358
+ return 0
359
+
360
+ # Verify the table exists; if not, refuse — the strategy plugin (or core
361
+ # migrations) should have created it.
362
+ existing_tables = {
363
+ r[0]
364
+ for r in self._db.fetchall(
365
+ "SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
366
+ )
367
+ }
368
+ if table_name not in existing_tables:
369
+ raise TushareError(f"materialize target table {table_name!r} does not exist")
370
+
371
+ # Discover destination columns to safely down-select df
372
+ dest_cols = [
373
+ r[0]
374
+ for r in self._db.fetchall(
375
+ "SELECT column_name FROM information_schema.columns "
376
+ "WHERE table_name = ? AND table_schema='main' ORDER BY ordinal_position",
377
+ (table_name,),
378
+ )
379
+ ]
380
+ usable = [c for c in dest_cols if c in df.columns]
381
+ if not usable:
382
+ return 0
383
+
384
+ df_proj = df[usable].copy()
385
+
386
+ with self._db.transaction():
387
+ if key_cols:
388
+ # Build DELETE WHERE (k1, k2) IN (...)
389
+ key_cols = [k for k in key_cols if k in df_proj.columns]
390
+ if key_cols:
391
+ # Iterate rows; for small N this is fine. For large N, use
392
+ # temp-table-based anti-join (future optimization).
393
+ where = " AND ".join([f'"{k}" = ?' for k in key_cols])
394
+ for _, row in df_proj[key_cols].iterrows():
395
+ self._db.execute(
396
+ f'DELETE FROM "{table_name}" WHERE {where}', # noqa: S608 — names from schema
397
+ tuple(row.tolist()),
398
+ )
399
+ # Bulk INSERT via DuckDB's pandas integration
400
+ self._db.conn.register("__mat_df", df_proj)
401
+ try:
402
+ col_list = ", ".join(f'"{c}"' for c in usable)
403
+ self._db.execute(
404
+ f'INSERT INTO "{table_name}" ({col_list}) ' # noqa: S608
405
+ f"SELECT {col_list} FROM __mat_df"
406
+ )
407
+ finally:
408
+ self._db.conn.unregister("__mat_df")
409
+ return len(df_proj)
410
+
411
+ # --- public entry --------------------------------------------------
412
+
413
+ def call(
414
+ self,
415
+ api_name: str,
416
+ *,
417
+ trade_date: str | None = None,
418
+ params: dict[str, Any] | None = None,
419
+ fields: str | None = None,
420
+ force_sync: bool = False,
421
+ ) -> pd.DataFrame:
422
+ """Fetch from cache (if fresh) or transport. See module docstring."""
423
+ params = dict(params or {})
424
+ if trade_date is not None:
425
+ params.setdefault("trade_date", trade_date)
426
+ # F-C1 fix — discriminating cache_key_date that captures windows too,
427
+ # so that daily(start=A,end=B) and daily(start=C,end=D) live in
428
+ # different cache rows even when neither passes a single trade_date.
429
+ cache_key_date = self._compute_cache_key_date(trade_date, params)
430
+ cache_class = API_CACHE_CLASS.get(api_name, "trade_day_immutable")
431
+
432
+ state = self._read_state(api_name, cache_key_date)
433
+ if not force_sync and self._cache_hit(
434
+ state, cache_class, api_name=api_name, trade_date=cache_key_date, params=params
435
+ ):
436
+ df = self._read_cached(api_name, cache_key_date, params, fields=None)
437
+ else:
438
+ # ⚠ Bug fix: always fetch the FULL payload from upstream, never let
439
+ # `fields=` constrain what gets cached. Otherwise a caller asking for
440
+ # `fields="ts_code"` would poison the cache with a 1-column frame
441
+ # that all later callers receive.
442
+ df = self._fetch_and_store(api_name, cache_key_date, params, cache_class)
443
+
444
+ # Apply field projection at the read site (cache stays full).
445
+ return self._project_fields(df, fields)
446
+
447
+ @staticmethod
448
+ def _compute_cache_key_date(trade_date: str | None, params: dict[str, Any]) -> str:
449
+ """Pick a cache_key_date that uniquely partitions queries by date scope.
450
+
451
+ Priority:
452
+ 1. explicit ``trade_date`` argument or ``params['trade_date']``
453
+ 2. ``params['start_date']:params['end_date']`` window key
454
+ 3. literal '*' (parameter-less APIs like stock_basic / trade_cal)
455
+
456
+ Combined with ``params_hash`` in the payload table, this guarantees that
457
+ e.g. ``daily(start=20260401,end=20260410)`` and
458
+ ``daily(start=20260420,end=20260427)`` cannot collide.
459
+ """
460
+ if trade_date is not None:
461
+ return str(trade_date)
462
+ if "trade_date" in params:
463
+ return str(params["trade_date"])
464
+ start = params.get("start_date")
465
+ end = params.get("end_date")
466
+ if start is not None and end is not None:
467
+ return f"{start}:{end}"
468
+ if start is not None:
469
+ return f"{start}:"
470
+ if end is not None:
471
+ return f":{end}"
472
+ return "*"
473
+
474
+ @staticmethod
475
+ def _project_fields(df: pd.DataFrame, fields: str | None) -> pd.DataFrame:
476
+ if fields is None or df is None or df.empty:
477
+ return df
478
+ wanted = [c.strip() for c in fields.split(",") if c.strip()]
479
+ present = [c for c in wanted if c in df.columns]
480
+ if not present:
481
+ return df
482
+ return df[present].copy()
483
+
484
+ # --- cache decisions ----------------------------------------------
485
+
486
+ def _cache_hit(
487
+ self,
488
+ state: SyncState | None,
489
+ cache_class: CacheClass,
490
+ *,
491
+ api_name: str | None = None,
492
+ trade_date: str | None = None,
493
+ params: dict[str, Any] | None = None,
494
+ ) -> bool:
495
+ if state is None or state.status != "ok":
496
+ return False
497
+ # F4: daily-mode reader rejects intraday-cached data
498
+ if not self._intraday and state.data_completeness == "intraday":
499
+ return False
500
+ # B2.4: state.ok with no payload row is NOT a hit (atomicity defense)
501
+ if api_name is not None and trade_date is not None:
502
+ if not self._payload_exists(api_name, trade_date, params or {}):
503
+ return False
504
+ # By cache class
505
+ if cache_class == "static":
506
+ return self._is_fresh_ttl(state, STATIC_TTL_SECONDS)
507
+ if cache_class == "trade_day_immutable":
508
+ return True
509
+ if cache_class == "trade_day_mutable":
510
+ return not self._is_T_or_T_plus_1(state.trade_date)
511
+ if cache_class == "hot_or_anns":
512
+ ttl = state.ttl_seconds or DEFAULT_HOT_TTL_SECONDS
513
+ return self._is_fresh_ttl(state, ttl)
514
+ return False
515
+
516
+ def _payload_exists(self, api_name: str, trade_date: str, params: dict[str, Any]) -> bool:
517
+ """STRICT payload presence check on (plugin_id, api_name, trade_date,
518
+ params_hash)."""
519
+ if not self._cache_table_exists():
520
+ return False
521
+ body = json.dumps(params, sort_keys=True, default=str)
522
+ h = hashlib.sha256(body.encode("utf-8")).hexdigest()
523
+ row = self._db.fetchone(
524
+ "SELECT 1 FROM tushare_cache_blob "
525
+ "WHERE plugin_id = ? AND api_name = ? AND trade_date = ? AND params_hash = ? "
526
+ "LIMIT 1",
527
+ (self._plugin_id, api_name, trade_date, h),
528
+ )
529
+ return row is not None
530
+
531
+ @staticmethod
532
+ def _is_fresh_ttl(state: SyncState, ttl_seconds: int) -> bool:
533
+ return (datetime.now() - state.synced_at) < timedelta(seconds=ttl_seconds)
534
+
535
+ @staticmethod
536
+ def _is_T_or_T_plus_1(trade_date: str) -> bool:
537
+ if not trade_date or trade_date == "*":
538
+ return False
539
+ try:
540
+ d = datetime.strptime(trade_date, "%Y%m%d").date()
541
+ except ValueError:
542
+ return False
543
+ today = datetime.now().date()
544
+ return today - d <= timedelta(days=1)
545
+
546
+ # --- fetch + store -------------------------------------------------
547
+
548
+ def _fetch_and_store(
549
+ self,
550
+ api_name: str,
551
+ cache_key_date: str,
552
+ params: dict[str, Any],
553
+ cache_class: CacheClass,
554
+ ) -> pd.DataFrame:
555
+ try:
556
+ # NOTE: never pass `fields=` to the transport — caching must always
557
+ # store the full payload so different callers requesting different
558
+ # field projections all share one cache entry.
559
+ df = self._fetch_with_retries(api_name, params)
560
+ except TushareUnauthorizedError as e:
561
+ self._write_state(
562
+ api_name,
563
+ cache_key_date,
564
+ "unauthorized",
565
+ row_count=None,
566
+ cache_class=cache_class,
567
+ )
568
+ self._audit_call(api_name, params, rows=0, latency_ms=0)
569
+ raise e
570
+ except TushareServerError as e:
571
+ # B1.4 — 5xx fallback: try local cached payload if state allows.
572
+ existing = self._read_state(api_name, cache_key_date)
573
+ if can_fallback(existing, cache_key_date, is_intraday_run=self._intraday):
574
+ logger.warning(
575
+ "tushare 5xx for %s @ %s; falling back to cached payload",
576
+ api_name,
577
+ cache_key_date,
578
+ )
579
+ cached = self._read_cached(api_name, cache_key_date, params, fields=None)
580
+ # F-L2 — surface fallback to runner / dashboard so users can see
581
+ # that data is being served from cache instead of fresh.
582
+ if self._event_cb is not None:
583
+ try:
584
+ self._event_cb(
585
+ "tushare.fallback",
586
+ f"tushare 5xx; serving cached payload for {api_name}",
587
+ {
588
+ "api_name": api_name,
589
+ "cache_key_date": cache_key_date,
590
+ "row_count": len(cached),
591
+ },
592
+ )
593
+ except Exception: # noqa: BLE001 — never let observers crash a fetch
594
+ logger.exception("event_cb raised on TUSHARE_FALLBACK")
595
+ # Don't change sync_state — it's still 'ok' with the original payload.
596
+ return cached
597
+ # No usable cache → propagate; caller decides terminate (required) vs degrade (optional)
598
+ raise e
599
+
600
+ completeness = self._completeness_for(api_name)
601
+ ttl = DEFAULT_HOT_TTL_SECONDS if cache_class == "hot_or_anns" else None
602
+ # B2.4 — atomic write of state + payload so a partial write can't yield
603
+ # "state=ok but payload missing" stale cache hits.
604
+ with self._db.transaction():
605
+ self._write_state(
606
+ api_name,
607
+ cache_key_date,
608
+ "ok",
609
+ row_count=len(df),
610
+ cache_class=cache_class,
611
+ ttl_seconds=ttl,
612
+ data_completeness=completeness,
613
+ )
614
+ self._write_cached(api_name, cache_key_date, params, df)
615
+ return df
616
+
617
+ @retry(
618
+ retry=retry_if_exception_type((TushareRateLimitError, TushareServerError)),
619
+ stop=stop_after_attempt(5),
620
+ wait=wait_exponential(multiplier=1, min=1, max=15),
621
+ reraise=True,
622
+ )
623
+ def _fetch_with_retries(self, api_name: str, params: dict[str, Any]) -> pd.DataFrame:
624
+ """Fetch the widest payload we'd ever want for this API.
625
+
626
+ For most APIs tushare returns every column when ``fields`` is omitted,
627
+ but a few (notably ``stock_basic``) only return a narrow default subset.
628
+ ``WIDE_FIELDS`` overrides ``fields=`` per-API so the cache row contains
629
+ every column downstream callers need; ``call()``'s ``_project_fields``
630
+ narrows it back at READ time.
631
+ """
632
+ self._bucket.acquire()
633
+ t0 = time.monotonic()
634
+ try:
635
+ df = self._transport.call(api_name, params, fields=WIDE_FIELDS.get(api_name))
636
+ except TushareRateLimitError:
637
+ self._bucket.decay(0.5)
638
+ raise
639
+ latency_ms = int((time.monotonic() - t0) * 1000)
640
+ self._audit_call(api_name, params, rows=len(df), latency_ms=latency_ms)
641
+ return df
642
+
643
+ def _completeness_for(self, api_name: str) -> str:
644
+ if self._intraday and api_name in INTRADAY_SENSITIVE_APIS:
645
+ return "intraday"
646
+ return "final"
647
+
648
+ # --- DB read/write helpers ---------------------------------------
649
+
650
+ def _read_state(self, api_name: str, trade_date: str) -> SyncState | None:
651
+ row = self._db.fetchone(
652
+ "SELECT plugin_id, api_name, trade_date, status, row_count, cache_class, "
653
+ "ttl_seconds, data_completeness, synced_at FROM tushare_sync_state "
654
+ "WHERE plugin_id = ? AND api_name = ? AND trade_date = ?",
655
+ (self._plugin_id, api_name, trade_date),
656
+ )
657
+ if row is None:
658
+ return None
659
+ return SyncState(
660
+ plugin_id=row[0],
661
+ api_name=row[1],
662
+ trade_date=row[2],
663
+ status=row[3],
664
+ row_count=row[4],
665
+ cache_class=row[5],
666
+ ttl_seconds=row[6],
667
+ data_completeness=row[7],
668
+ synced_at=row[8]
669
+ if isinstance(row[8], datetime)
670
+ else datetime.fromisoformat(str(row[8])),
671
+ )
672
+
673
+ def _write_state(
674
+ self,
675
+ api_name: str,
676
+ trade_date: str,
677
+ status: str,
678
+ *,
679
+ row_count: int | None,
680
+ cache_class: CacheClass,
681
+ ttl_seconds: int | None = None,
682
+ data_completeness: str = "final",
683
+ ) -> None:
684
+ with self._db.transaction():
685
+ self._db.execute(
686
+ "DELETE FROM tushare_sync_state "
687
+ "WHERE plugin_id = ? AND api_name = ? AND trade_date = ?",
688
+ (self._plugin_id, api_name, trade_date),
689
+ )
690
+ self._db.execute(
691
+ "INSERT INTO tushare_sync_state(plugin_id, api_name, trade_date, status, "
692
+ "row_count, cache_class, ttl_seconds, data_completeness, synced_at) "
693
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)",
694
+ (
695
+ self._plugin_id,
696
+ api_name,
697
+ trade_date,
698
+ status,
699
+ row_count,
700
+ cache_class,
701
+ ttl_seconds,
702
+ data_completeness,
703
+ ),
704
+ )
705
+
706
+ def _audit_call(
707
+ self, api_name: str, params: dict[str, Any], *, rows: int, latency_ms: int
708
+ ) -> None:
709
+ body = json.dumps(params, sort_keys=True, default=str)
710
+ h = hashlib.sha256(body.encode("utf-8")).hexdigest()
711
+ self._db.execute(
712
+ "INSERT INTO tushare_calls(plugin_id, api_name, params_hash, rows, latency_ms) "
713
+ "VALUES (?, ?, ?, ?, ?)",
714
+ (self._plugin_id, api_name, h, rows, latency_ms),
715
+ )
716
+
717
+ # ---- cached frame storage ---------------------------------------
718
+ # We use a generic JSON column so V0.3 doesn't require per-API tables.
719
+ # V0.7a will overlay strategy-specific lub_* tables on top of these.
720
+
721
+ def _cache_table_exists(self) -> bool:
722
+ rows = self._db.fetchall(
723
+ "SELECT table_name FROM information_schema.tables "
724
+ "WHERE table_schema='main' AND table_name='tushare_cache_blob'"
725
+ )
726
+ return bool(rows)
727
+
728
+ def _ensure_cache_table(self) -> None:
729
+ if self._cache_table_exists():
730
+ return
731
+ self._db.execute(
732
+ "CREATE TABLE IF NOT EXISTS tushare_cache_blob ("
733
+ " plugin_id VARCHAR NOT NULL,"
734
+ " api_name VARCHAR NOT NULL,"
735
+ " trade_date VARCHAR NOT NULL,"
736
+ " params_hash VARCHAR NOT NULL,"
737
+ " payload_json VARCHAR NOT NULL,"
738
+ " cached_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,"
739
+ " PRIMARY KEY (plugin_id, api_name, trade_date, params_hash)"
740
+ ")"
741
+ )
742
+
743
+ def _write_cached(
744
+ self,
745
+ api_name: str,
746
+ trade_date: str,
747
+ params: dict[str, Any],
748
+ df: pd.DataFrame,
749
+ ) -> None:
750
+ self._ensure_cache_table()
751
+ body = json.dumps(params, sort_keys=True, default=str)
752
+ h = hashlib.sha256(body.encode("utf-8")).hexdigest()
753
+ payload = df.to_json(orient="records", date_format="iso")
754
+ with self._db.transaction():
755
+ self._db.execute(
756
+ "DELETE FROM tushare_cache_blob "
757
+ "WHERE plugin_id = ? AND api_name = ? AND trade_date = ? AND params_hash = ?",
758
+ (self._plugin_id, api_name, trade_date, h),
759
+ )
760
+ self._db.execute(
761
+ "INSERT INTO tushare_cache_blob(plugin_id, api_name, trade_date, params_hash, "
762
+ "payload_json) VALUES (?, ?, ?, ?, ?)",
763
+ (self._plugin_id, api_name, trade_date, h, payload),
764
+ )
765
+
766
+ def _read_cached(
767
+ self,
768
+ api_name: str,
769
+ trade_date: str,
770
+ params: dict[str, Any],
771
+ fields: str | None,
772
+ ) -> pd.DataFrame:
773
+ if not self._cache_table_exists():
774
+ return pd.DataFrame()
775
+ body = json.dumps(params, sort_keys=True, default=str)
776
+ h = hashlib.sha256(body.encode("utf-8")).hexdigest()
777
+ row = self._db.fetchone(
778
+ "SELECT payload_json FROM tushare_cache_blob "
779
+ "WHERE plugin_id = ? AND api_name = ? AND trade_date = ? AND params_hash = ?",
780
+ (self._plugin_id, api_name, trade_date, h),
781
+ )
782
+ if row is None:
783
+ return pd.DataFrame()
784
+ df = pd.read_json(io.StringIO(row[0]), orient="records")
785
+ if fields:
786
+ cols = [c.strip() for c in fields.split(",") if c.strip() in df.columns]
787
+ df = df[cols]
788
+ return df
789
+
790
+
791
+ # ---------------------------------------------------------------------------
792
+ # Fallback predicate (DESIGN §13.2 + S4)
793
+ # ---------------------------------------------------------------------------
794
+
795
+
796
+ def can_fallback(
797
+ state: SyncState | None,
798
+ target_trade_date: str,
799
+ *,
800
+ is_intraday_run: bool,
801
+ ) -> bool:
802
+ """Decide if a 5xx/timeout failure may use already-cached data.
803
+
804
+ Conditions (all required):
805
+ - state.status == 'ok'
806
+ - state.trade_date == target_trade_date (no nearest-day approximation)
807
+ - cache_class != trade_day_mutable when target is T or T+1
808
+ - row_count >= 0 (zero rows ARE valid — S4 fix)
809
+ - data_completeness == 'final' for daily-mode runs (F4)
810
+ """
811
+ if state is None or state.status != "ok":
812
+ return False
813
+ if state.trade_date != target_trade_date:
814
+ return False
815
+ if state.row_count is not None and state.row_count < 0:
816
+ return False
817
+ if (
818
+ state.cache_class == "trade_day_mutable"
819
+ and TushareClient._is_T_or_T_plus_1(target_trade_date) # noqa: SLF001 — internal helper reuse
820
+ ):
821
+ return False
822
+ if not is_intraday_run and state.data_completeness == "intraday":
823
+ return False
824
+ return True