deeptrade-quant 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deeptrade/__init__.py +8 -0
  2. deeptrade/channels_builtin/__init__.py +0 -0
  3. deeptrade/channels_builtin/stdout/__init__.py +0 -0
  4. deeptrade/channels_builtin/stdout/deeptrade_plugin.yaml +25 -0
  5. deeptrade/channels_builtin/stdout/migrations/20260429_001_init.sql +13 -0
  6. deeptrade/channels_builtin/stdout/stdout_channel/__init__.py +0 -0
  7. deeptrade/channels_builtin/stdout/stdout_channel/channel.py +180 -0
  8. deeptrade/cli.py +214 -0
  9. deeptrade/cli_config.py +396 -0
  10. deeptrade/cli_data.py +33 -0
  11. deeptrade/cli_plugin.py +176 -0
  12. deeptrade/core/__init__.py +8 -0
  13. deeptrade/core/config.py +344 -0
  14. deeptrade/core/config_migrations.py +138 -0
  15. deeptrade/core/db.py +176 -0
  16. deeptrade/core/llm_client.py +591 -0
  17. deeptrade/core/llm_manager.py +174 -0
  18. deeptrade/core/logging_config.py +61 -0
  19. deeptrade/core/migrations/__init__.py +0 -0
  20. deeptrade/core/migrations/core/20260427_001_init.sql +121 -0
  21. deeptrade/core/migrations/core/20260501_002_drop_llm_calls_stage.sql +10 -0
  22. deeptrade/core/migrations/core/__init__.py +0 -0
  23. deeptrade/core/notifier.py +302 -0
  24. deeptrade/core/paths.py +49 -0
  25. deeptrade/core/plugin_manager.py +616 -0
  26. deeptrade/core/run_status.py +29 -0
  27. deeptrade/core/secrets.py +152 -0
  28. deeptrade/core/tushare_client.py +824 -0
  29. deeptrade/plugins_api/__init__.py +44 -0
  30. deeptrade/plugins_api/base.py +66 -0
  31. deeptrade/plugins_api/channel.py +42 -0
  32. deeptrade/plugins_api/events.py +61 -0
  33. deeptrade/plugins_api/llm.py +46 -0
  34. deeptrade/plugins_api/metadata.py +84 -0
  35. deeptrade/plugins_api/notify.py +67 -0
  36. deeptrade/strategies_builtin/__init__.py +0 -0
  37. deeptrade/strategies_builtin/limit_up_board/__init__.py +0 -0
  38. deeptrade/strategies_builtin/limit_up_board/deeptrade_plugin.yaml +101 -0
  39. deeptrade/strategies_builtin/limit_up_board/limit_up_board/__init__.py +0 -0
  40. deeptrade/strategies_builtin/limit_up_board/limit_up_board/calendar.py +65 -0
  41. deeptrade/strategies_builtin/limit_up_board/limit_up_board/cli.py +269 -0
  42. deeptrade/strategies_builtin/limit_up_board/limit_up_board/config.py +76 -0
  43. deeptrade/strategies_builtin/limit_up_board/limit_up_board/data.py +1191 -0
  44. deeptrade/strategies_builtin/limit_up_board/limit_up_board/pipeline.py +869 -0
  45. deeptrade/strategies_builtin/limit_up_board/limit_up_board/plugin.py +30 -0
  46. deeptrade/strategies_builtin/limit_up_board/limit_up_board/profiles.py +85 -0
  47. deeptrade/strategies_builtin/limit_up_board/limit_up_board/prompts.py +485 -0
  48. deeptrade/strategies_builtin/limit_up_board/limit_up_board/render.py +890 -0
  49. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runner.py +1087 -0
  50. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runtime.py +172 -0
  51. deeptrade/strategies_builtin/limit_up_board/limit_up_board/schemas.py +178 -0
  52. deeptrade/strategies_builtin/limit_up_board/migrations/20260430_001_init.sql +150 -0
  53. deeptrade/strategies_builtin/limit_up_board/migrations/20260501_002_lub_stage_results_llm_provider.sql +8 -0
  54. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_001_lub_lhb_tables.sql +36 -0
  55. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_002_lub_cyq_perf.sql +18 -0
  56. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_003_lub_lhb_pk_fix.sql +46 -0
  57. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_004_lub_lhb_drop_pk.sql +53 -0
  58. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_005_lub_config.sql +17 -0
  59. deeptrade/strategies_builtin/volume_anomaly/__init__.py +0 -0
  60. deeptrade/strategies_builtin/volume_anomaly/deeptrade_plugin.yaml +59 -0
  61. deeptrade/strategies_builtin/volume_anomaly/migrations/20260430_001_init.sql +94 -0
  62. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_001_realized_returns.sql +44 -0
  63. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_002_dimension_scores.sql +13 -0
  64. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/__init__.py +0 -0
  65. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/calendar.py +52 -0
  66. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/cli.py +247 -0
  67. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/data.py +2154 -0
  68. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/pipeline.py +327 -0
  69. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/plugin.py +22 -0
  70. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/profiles.py +49 -0
  71. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts.py +187 -0
  72. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts_examples.py +84 -0
  73. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/render.py +906 -0
  74. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runner.py +772 -0
  75. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runtime.py +90 -0
  76. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/schemas.py +97 -0
  77. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/stats.py +174 -0
  78. deeptrade/theme.py +48 -0
  79. deeptrade_quant-0.0.2.dist-info/METADATA +166 -0
  80. deeptrade_quant-0.0.2.dist-info/RECORD +83 -0
  81. deeptrade_quant-0.0.2.dist-info/WHEEL +4 -0
  82. deeptrade_quant-0.0.2.dist-info/entry_points.txt +2 -0
  83. deeptrade_quant-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2154 @@
1
+ """Data layer for the volume-anomaly strategy.
2
+
3
+ Two distinct data flows:
4
+ screen_anomalies(...) — apply local rules to find new anomaly hits on T
5
+ collect_analyze_bundle(...) — read watchlist + assemble per-stock context for LLM
6
+
7
+ Reuses limit_up_board's main_board_filter / FIELD_UNITS_RAW conventions where
8
+ sensible but does NOT import from limit_up_board (plugins are self-contained).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import logging
15
+ import math
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime, time, timedelta
18
+ from typing import Any
19
+
20
+ import pandas as pd
21
+
22
+ from deeptrade.core.tushare_client import (
23
+ TushareClient,
24
+ TushareUnauthorizedError,
25
+ )
26
+
27
+ from .calendar import TradeCalendar
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Step 0 — resolve trade date (mirrors limit_up_board behaviour)
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ def resolve_trade_date(
38
+ now_dt: datetime,
39
+ calendar: TradeCalendar,
40
+ *,
41
+ user_specified: str | None = None,
42
+ allow_intraday: bool = False,
43
+ close_after: time = time(18, 0),
44
+ ) -> tuple[str, str]:
45
+ """Return (T, T+1).
46
+
47
+ T defaults to the most recent CLOSED trade day:
48
+ * if today is open AND now ≥ close_after → today
49
+ * if today is open AND allow_intraday → today (intraday banner)
50
+ * else → pretrade_date(today)
51
+ """
52
+ if user_specified:
53
+ T = user_specified
54
+ return T, calendar.next_open(T)
55
+
56
+ today = now_dt.strftime("%Y%m%d")
57
+ today_is_open = calendar.is_open(today)
58
+ if today_is_open and (now_dt.time() >= close_after or allow_intraday):
59
+ T = today
60
+ elif today_is_open:
61
+ T = calendar.pretrade_date(today)
62
+ else:
63
+ T = calendar.pretrade_date(today)
64
+ return T, calendar.next_open(T)
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Main board filter
69
+ # ---------------------------------------------------------------------------
70
+
71
+
72
+ def main_board_filter(stock_basic: pd.DataFrame) -> pd.DataFrame:
73
+ """Keep only Shanghai/Shenzhen MAIN board, listed.
74
+
75
+ Excludes ChiNext (300xxx), STAR (688xxx), BSE (8xxxxx), CDR.
76
+ """
77
+ if "market" not in stock_basic.columns or "exchange" not in stock_basic.columns:
78
+ raise ValueError("stock_basic missing market/exchange columns")
79
+ df = stock_basic[
80
+ (stock_basic["market"] == "主板") & (stock_basic["exchange"].isin(["SSE", "SZSE"]))
81
+ ].copy()
82
+ if "list_status" in df.columns:
83
+ df = df[df["list_status"] == "L"]
84
+ return df.reset_index(drop=True)
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Optional API wrapper (transient failure → empty df + reason string)
89
+ # ---------------------------------------------------------------------------
90
+
91
+
92
+ def _try_optional(
93
+ tushare: TushareClient, api_name: str, **kwargs: Any
94
+ ) -> tuple[pd.DataFrame, str | None]:
95
+ """Call an optional tushare API; on transient failure return (empty df, err)."""
96
+ from deeptrade.core.tushare_client import ( # noqa: PLC0415
97
+ TushareRateLimitError,
98
+ TushareServerError,
99
+ )
100
+
101
+ try:
102
+ return tushare.call(api_name, **kwargs), None
103
+ except TushareUnauthorizedError as e:
104
+ return pd.DataFrame(), f"unauthorized: {e}"
105
+ except TushareServerError as e:
106
+ return pd.DataFrame(), f"server_error: {e}"
107
+ except TushareRateLimitError as e:
108
+ return pd.DataFrame(), f"rate_limited: {e}"
109
+
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # Unit normalizers (per-field; tushare units are heterogeneous)
113
+ # ---------------------------------------------------------------------------
114
+
115
+
116
+ FIELD_UNITS_RAW: dict[str, str] = {
117
+ # daily.amount is 千元; daily.vol is 手 (handled separately)
118
+ "amount_daily": "千元",
119
+ # daily_basic
120
+ "circ_mv": "万元",
121
+ "total_mv": "万元",
122
+ "free_share": "万股",
123
+ "float_share": "万股",
124
+ "total_share": "万股",
125
+ # moneyflow (all amounts in 万元)
126
+ "net_mf_amount": "万元",
127
+ "buy_lg_amount": "万元",
128
+ "buy_elg_amount": "万元",
129
+ "buy_md_amount": "万元",
130
+ "buy_sm_amount": "万元",
131
+ "sell_lg_amount": "万元",
132
+ "sell_elg_amount": "万元",
133
+ }
134
+
135
+
136
+ def normalize_to_yi(field: str, raw_value: float | None) -> float | None:
137
+ if raw_value is None or pd.isna(raw_value):
138
+ return None
139
+ unit = FIELD_UNITS_RAW.get(field, "元")
140
+ if unit == "元":
141
+ factor = 1e8
142
+ elif unit == "万元":
143
+ factor = 1e4
144
+ elif unit == "千元":
145
+ factor = 1e5
146
+ else:
147
+ return None
148
+ return round(float(raw_value) / factor, 2)
149
+
150
+
151
+ def round2(v: float | None) -> float | None:
152
+ if v is None or pd.isna(v):
153
+ return None
154
+ return round(float(v), 2)
155
+
156
+
157
+ def _opt_int(v: Any) -> int | None:
158
+ if v is None or pd.isna(v):
159
+ return None
160
+ return int(v)
161
+
162
+
163
+ def _normalize_id_cols(df: pd.DataFrame | None) -> pd.DataFrame | None:
164
+ """Coerce identifier columns to str so cross-frame sort/compare is stable.
165
+
166
+ The tushare-on-disk JSON cache widens "20260428" → 20260428 on round-trip;
167
+ if some rows come fresh from the SDK (str) and others from the cache (int),
168
+ pandas .sort_values()/comparisons raise:
169
+ TypeError: '<' not supported between instances of 'int' and 'str'
170
+ """
171
+ if df is None or df.empty:
172
+ return df
173
+ df = df.copy()
174
+ for col in ("trade_date", "ts_code", "cal_date"):
175
+ if col in df.columns:
176
+ df[col] = df[col].astype(str)
177
+ return df
178
+
179
+
180
+ def _shift_calendar_days(yyyymmdd: str, days: int) -> str:
181
+ """Naive ±days shift on YYYYMMDD (calendar days, not trade days)."""
182
+ d = datetime.strptime(yyyymmdd, "%Y%m%d") + timedelta(days=days)
183
+ return d.strftime("%Y%m%d")
184
+
185
+
186
+ def _calendar_days_between(earlier: str, later: str) -> int:
187
+ """Calendar-day diff (later - earlier) on YYYYMMDD strings; negative if reversed."""
188
+ d1 = datetime.strptime(earlier, "%Y%m%d")
189
+ d2 = datetime.strptime(later, "%Y%m%d")
190
+ return (d2 - d1).days
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # SCREEN MODE — anomaly screening rules
195
+ # ---------------------------------------------------------------------------
196
+
197
+
198
+ # Default lookback window (kept module-level so analyze mode can re-use it
199
+ # without depending on the screen-only ScreenRules dataclass).
200
+ RULE_LOOKBACK_TRADE_DAYS = 60 # ~3 months
201
+
202
+ # v0.4.0 P1-3 — T+N realized-return evaluation horizons. F6 decision: keep as a
203
+ # module-level constant rather than introducing a new `va_config` table.
204
+ EVALUATE_HORIZONS: tuple[int, ...] = (1, 3, 5, 10)
205
+ EVALUATE_DEFAULT_LOOKBACK_DAYS: int = 30
206
+ EVALUATE_MAX_HORIZON: int = max(EVALUATE_HORIZONS)
207
+ EVALUATE_WINDOW_5D = 5
208
+ EVALUATE_WINDOW_10D = 10
209
+
210
+
211
+ # v0.3.0 P0-2 — default circ_mv-bucketed turnover thresholds.
212
+ # Each tuple is (circ_mv_yi_max, turnover_min, turnover_max). The first bucket
213
+ # whose `max` is ≥ the candidate's circ_mv_yi (亿元) wins; boundary values fall
214
+ # into the smaller bucket (E4 — `circ_mv_yi ≤ bucket_max`).
215
+ DEFAULT_TURNOVER_BUCKETS: list[tuple[float, float, float]] = [
216
+ (50.0, 5.0, 15.0), # ≤ 50亿 — 微盘
217
+ (200.0, 3.5, 12.0), # 50–200亿 — 中小盘
218
+ (1000.0, 2.5, 9.0), # 200–1000亿 — 中盘
219
+ (math.inf, 1.5, 6.0), # > 1000亿 — 大盘
220
+ ]
221
+
222
+
223
+ def _bucket_label(bucket_max: float, prev_max: float) -> str:
224
+ """Render a human-readable bucket label like "≤50亿" / "50-200亿" / ">1000亿"."""
225
+ if prev_max <= 0:
226
+ return f"≤{int(bucket_max)}亿"
227
+ if math.isinf(bucket_max):
228
+ return f">{int(prev_max)}亿"
229
+ return f"{int(prev_max)}-{int(bucket_max)}亿"
230
+
231
+
232
+ def _resolve_turnover_bucket(
233
+ circ_mv_yi: float, buckets: list[tuple[float, float, float]]
234
+ ) -> tuple[int, str, float, float]:
235
+ """Return (idx, label, t_min, t_max) for the first bucket where circ_mv_yi ≤ max."""
236
+ prev_max = 0.0
237
+ for idx, (b_max, t_min, t_max) in enumerate(buckets):
238
+ if circ_mv_yi <= b_max:
239
+ return idx, _bucket_label(b_max, prev_max), t_min, t_max
240
+ prev_max = b_max
241
+ # Past the last bucket (only possible if last bucket isn't math.inf —
242
+ # ScreenRules.__post_init__ guards against that, but be defensive).
243
+ last_max, t_min, t_max = buckets[-1]
244
+ return len(buckets) - 1, _bucket_label(last_max, prev_max), t_min, t_max
245
+
246
+
247
+ @dataclass
248
+ class ScreenRules:
249
+ """User-tunable screening thresholds.
250
+
251
+ Plan A (v0.2): turnover_max raised 7 → 10 — empirically the dominant
252
+ bottleneck on real funnel data.
253
+ Plan B (v0.2): vol rule split into "short-window must be max" OR
254
+ "long-window top-N", because strict 60d-max disqualifies any stock
255
+ that happened to have a single大量 day in the past 3 months.
256
+ Plan C (v0.2): all knobs collected by configure() at runtime.
257
+ """
258
+
259
+ pct_chg_min: float = 5.0
260
+ pct_chg_max: float = 8.0
261
+ body_ratio_min: float = 0.6
262
+ turnover_min: float = 3.0
263
+ turnover_max: float = 10.0 # Plan A — was 7.0
264
+ vol_ratio_5d_min: float = 2.0
265
+ # Plan B — vol passes if EITHER:
266
+ # (a) vol_t == max(vol over last `vol_max_short_window` trade days) OR
267
+ # (b) vol_t is among the top `vol_top_n_long` over `lookback_trade_days`
268
+ vol_max_short_window: int = 30
269
+ vol_top_n_long: int = 3
270
+ lookback_trade_days: int = RULE_LOOKBACK_TRADE_DAYS
271
+ # P0 H2 — minimum fraction of `lookback_trade_days` a stock must have
272
+ # in its history before vol-rule evaluation. Stocks with less are
273
+ # surfaced in `insufficient_history` rather than silently passing through.
274
+ min_history_coverage: float = 0.8
275
+ # P2 L3 — apply adj_factor-based forward-volume adjustment to historical
276
+ # vol so that vol_max comparisons stay valid across splits/送转 events.
277
+ # Falls back to raw vol when adj_factor is unavailable (with a diagnostic).
278
+ vol_adjust: bool = True
279
+ # v0.3.0 P0-1 — drop hits whose upper shadow exceeds this fraction of the
280
+ # day's range (避雷针 / 长上影). None disables the filter entirely.
281
+ upper_shadow_ratio_max: float | None = 0.35
282
+ # v0.3.0 P0-2 — circ_mv-bucketed (turnover_min, turnover_max). Each entry is
283
+ # (circ_mv_yi_max, turnover_min, turnover_max); the first bucket where
284
+ # circ_mv_yi ≤ max wins. None falls back to the global turnover_min/max.
285
+ turnover_buckets: list[tuple[float, float, float]] | None = field(
286
+ default_factory=lambda: list(DEFAULT_TURNOVER_BUCKETS)
287
+ )
288
+
289
+ def __post_init__(self) -> None:
290
+ """P1 L2 — fail loud on impossible threshold combos.
291
+
292
+ These checks run at construction (defaults / from_dict / explicit),
293
+ so misconfigured runs surface a ValueError immediately rather than
294
+ silently producing 0 hits.
295
+ """
296
+ if not (0 <= self.pct_chg_min <= self.pct_chg_max):
297
+ raise ValueError(
298
+ f"invalid pct_chg range [{self.pct_chg_min}, {self.pct_chg_max}] "
299
+ "(require 0 ≤ min ≤ max)"
300
+ )
301
+ if not (0 <= self.turnover_min <= self.turnover_max):
302
+ raise ValueError(
303
+ f"invalid turnover range [{self.turnover_min}, {self.turnover_max}] "
304
+ "(require 0 ≤ min ≤ max)"
305
+ )
306
+ if not (0.0 <= self.body_ratio_min <= 1.0):
307
+ raise ValueError(
308
+ f"body_ratio_min must be in [0, 1], got {self.body_ratio_min}"
309
+ )
310
+ if self.vol_ratio_5d_min < 0:
311
+ raise ValueError(f"vol_ratio_5d_min must be ≥ 0, got {self.vol_ratio_5d_min}")
312
+ if self.vol_max_short_window <= 0:
313
+ raise ValueError(
314
+ f"vol_max_short_window must be > 0, got {self.vol_max_short_window}"
315
+ )
316
+ if self.vol_top_n_long <= 0:
317
+ raise ValueError(f"vol_top_n_long must be > 0, got {self.vol_top_n_long}")
318
+ if self.lookback_trade_days < 6:
319
+ # 6 = 5 prev-day window + the T row itself
320
+ raise ValueError(
321
+ f"lookback_trade_days must be ≥ 6 (5 prev + T), got {self.lookback_trade_days}"
322
+ )
323
+ if self.vol_max_short_window > self.lookback_trade_days:
324
+ raise ValueError(
325
+ f"vol_max_short_window ({self.vol_max_short_window}) must be ≤ "
326
+ f"lookback_trade_days ({self.lookback_trade_days})"
327
+ )
328
+ if not (0.0 < self.min_history_coverage <= 1.0):
329
+ raise ValueError(
330
+ f"min_history_coverage must be in (0, 1], got {self.min_history_coverage}"
331
+ )
332
+ if self.upper_shadow_ratio_max is not None and not (
333
+ 0.0 < self.upper_shadow_ratio_max <= 1.0
334
+ ):
335
+ raise ValueError(
336
+ f"upper_shadow_ratio_max must be in (0, 1] or None, "
337
+ f"got {self.upper_shadow_ratio_max}"
338
+ )
339
+ if self.turnover_buckets is not None:
340
+ if not self.turnover_buckets:
341
+ raise ValueError("turnover_buckets, if set, must be non-empty")
342
+ prev_max = float("-inf")
343
+ for entry in self.turnover_buckets:
344
+ if not isinstance(entry, tuple) or len(entry) != 3:
345
+ raise ValueError(
346
+ f"each turnover_buckets entry must be a 3-tuple "
347
+ f"(circ_mv_yi_max, turnover_min, turnover_max); got {entry}"
348
+ )
349
+ b_max, t_min, t_max = entry
350
+ if b_max <= prev_max:
351
+ raise ValueError(
352
+ f"turnover_buckets circ_mv_yi_max must be strictly increasing; "
353
+ f"{prev_max} → {b_max}"
354
+ )
355
+ if not (0 <= t_min <= t_max):
356
+ raise ValueError(
357
+ f"turnover_buckets entry has invalid turnover range "
358
+ f"[{t_min}, {t_max}] (require 0 ≤ min ≤ max)"
359
+ )
360
+ prev_max = b_max
361
+
362
+ @classmethod
363
+ def defaults(cls) -> ScreenRules:
364
+ return cls()
365
+
366
+ @classmethod
367
+ def from_dict(cls, d: dict[str, Any] | None) -> ScreenRules:
368
+ """Build from a partial dict (configure() output); missing keys → default."""
369
+ if not d:
370
+ return cls.defaults()
371
+ type_hints: dict[str, type] = {
372
+ "pct_chg_min": float,
373
+ "pct_chg_max": float,
374
+ "body_ratio_min": float,
375
+ "turnover_min": float,
376
+ "turnover_max": float,
377
+ "vol_ratio_5d_min": float,
378
+ "vol_max_short_window": int,
379
+ "vol_top_n_long": int,
380
+ "lookback_trade_days": int,
381
+ "min_history_coverage": float,
382
+ }
383
+ defaults = cls.defaults()
384
+ kwargs: dict[str, Any] = {}
385
+ for name, ty in type_hints.items():
386
+ v = d.get(name)
387
+ kwargs[name] = ty(v) if v is not None else getattr(defaults, name)
388
+ # vol_adjust handled separately so we don't rely on bool(str) (which
389
+ # is True for non-empty strings — an easy footgun for "false").
390
+ if "vol_adjust" in d and d["vol_adjust"] is not None:
391
+ v = d["vol_adjust"]
392
+ if isinstance(v, str):
393
+ kwargs["vol_adjust"] = v.strip().lower() in {"1", "true", "t", "yes", "y"}
394
+ else:
395
+ kwargs["vol_adjust"] = bool(v)
396
+ # v0.3.0 P0-1 — `upper_shadow_ratio_max`: explicit `null` → disable filter;
397
+ # missing key → keep default (0.35).
398
+ if "upper_shadow_ratio_max" in d:
399
+ v = d["upper_shadow_ratio_max"]
400
+ kwargs["upper_shadow_ratio_max"] = float(v) if v is not None else None
401
+ # v0.3.0 P0-2 — `turnover_buckets`: accept list-of-list (JSON has no tuple);
402
+ # explicit `null` → fall back to global turnover_min/max; missing key →
403
+ # keep default DEFAULT_TURNOVER_BUCKETS. The first element of any entry
404
+ # may be `null` to mean "no upper bound" (math.inf).
405
+ if "turnover_buckets" in d:
406
+ raw = d["turnover_buckets"]
407
+ if raw is None:
408
+ kwargs["turnover_buckets"] = None
409
+ else:
410
+ parsed: list[tuple[float, float, float]] = []
411
+ for entry in raw:
412
+ if len(entry) != 3:
413
+ raise ValueError(
414
+ f"each turnover_buckets entry must have 3 elements, got {entry}"
415
+ )
416
+ b_max_raw, t_min, t_max = entry
417
+ b_max = math.inf if b_max_raw is None else float(b_max_raw)
418
+ parsed.append((b_max, float(t_min), float(t_max)))
419
+ kwargs["turnover_buckets"] = parsed
420
+ return cls(**kwargs)
421
+
422
+ def as_dict(self) -> dict[str, Any]:
423
+ from dataclasses import asdict as _asdict # noqa: PLC0415
424
+
425
+ out = _asdict(self)
426
+ # JSON has no `inf`; round-trip-friendly form mirrors what `from_dict`
427
+ # accepts (`null` for an unbounded last bucket).
428
+ if self.turnover_buckets is not None:
429
+ out["turnover_buckets"] = [
430
+ [None if math.isinf(b_max) else b_max, t_min, t_max]
431
+ for (b_max, t_min, t_max) in self.turnover_buckets
432
+ ]
433
+ return out
434
+
435
+
436
+ @dataclass
437
+ class ScreenDiagnostics:
438
+ """P0 — observable data-completeness counters surfaced in the report.
439
+
440
+ Populated by `screen_anomalies` regardless of outcome so the user can
441
+ 自证 that no silent degradation happened on this run.
442
+ """
443
+
444
+ # Step 1
445
+ stock_basic_rows: int = 0
446
+ main_board_rows: int = 0
447
+ # Step 2
448
+ stock_st_count: int = 0
449
+ stock_st_status: str = "ok" # 'ok' | 'empty' (suspicious) | 'error: ...'
450
+ suspend_d_count: int = 0
451
+ suspend_d_status: str = "ok"
452
+ # Step 3
453
+ daily_t_total_rows: int = 0
454
+ daily_t_main_board_rows: int = 0 # ts_codes intersected with main_codes
455
+ # Step 4
456
+ daily_basic_t_total_rows: int = 0
457
+ daily_basic_t_main_board_rows: int = 0
458
+ daily_basic_status: str = "ok"
459
+ turnover_missing_codes: list[str] = field(default_factory=list)
460
+ n_turnover_missing: int = 0
461
+ # Step 5 (history window)
462
+ history_window_planned_days: int = 0
463
+ history_window_actual_days: int = 0
464
+ history_window_missing_dates: list[str] = field(default_factory=list)
465
+ history_min_required_days: int = 0
466
+ insufficient_history: list[dict[str, Any]] = field(default_factory=list)
467
+ # P2 L3 — adj_factor coverage; surfaces whether vol-adjust ran on full data,
468
+ # degraded to raw vol for some codes, or was disabled.
469
+ vol_adjust_enabled: bool = False
470
+ vol_adjust_status: str = "disabled" # 'ok' | 'disabled' | 'degraded: ...'
471
+ adj_factor_planned_days: int = 0
472
+ adj_factor_actual_days: int = 0
473
+ adj_factor_missing_dates: list[str] = field(default_factory=list)
474
+ adj_factor_missing_codes: list[str] = field(default_factory=list)
475
+ # v0.3.0 P0-1 — upper-shadow filter; `enabled=False` when rules disable it.
476
+ upper_shadow_filter_enabled: bool = False
477
+ upper_shadow_filter_threshold: float | None = None
478
+ n_after_upper_shadow: int = 0
479
+ # v0.3.0 P0-2 — circ_mv-bucketed turnover bookkeeping.
480
+ turnover_buckets_enabled: bool = False
481
+ turnover_bucket_hits: dict[str, int] = field(default_factory=dict)
482
+ n_missing_circ_mv: int = 0
483
+ circ_mv_missing_codes: list[str] = field(default_factory=list)
484
+
485
+
486
+ @dataclass
487
+ class ScreenResult:
488
+ """Outcome of a screen pass."""
489
+
490
+ trade_date: str
491
+ n_main_board: int
492
+ n_after_st_susp: int
493
+ n_after_t_day_rules: int # pct_chg + body_ratio
494
+ n_after_upper_shadow: int # v0.3.0 P0-1
495
+ n_after_turnover: int
496
+ n_after_vol_rules: int # vol_ratio_5d + dual vol rule
497
+ rules: ScreenRules = field(default_factory=ScreenRules.defaults)
498
+ diagnostics: ScreenDiagnostics = field(default_factory=ScreenDiagnostics)
499
+ hits: list[dict[str, Any]] = field(default_factory=list)
500
+ data_unavailable: list[str] = field(default_factory=list)
501
+
502
+
503
+ def screen_anomalies(
504
+ *,
505
+ tushare: TushareClient,
506
+ calendar: TradeCalendar,
507
+ trade_date: str,
508
+ rules: ScreenRules | None = None,
509
+ force_sync: bool = False,
510
+ ) -> ScreenResult:
511
+ """Apply the local screening rules and return matched candidates.
512
+
513
+ Pipeline (cheapest filter first):
514
+ 1. stock_basic → main board pool
515
+ 2. stock_st(T) → drop ST; suspend_d(T) → drop suspended
516
+ 3. daily(T) → keep阳线 + pct_chg in [pct_chg_min, pct_chg_max]
517
+ + body_ratio ≥ body_ratio_min
518
+ 4. daily_basic(T) → keep turnover_rate in [turnover_min, turnover_max]
519
+ 5. daily(N-trade-day window) → keep
520
+ (vol_t == max(vol_max_short_window) OR
521
+ vol_t in top vol_top_n_long over lookback_trade_days)
522
+ AND vol_t ≥ vol_ratio_5d_min × mean(prev 5d)
523
+ """
524
+ rules = rules or ScreenRules.defaults()
525
+ data_unavailable: list[str] = []
526
+ diag = ScreenDiagnostics()
527
+ # v0.3.0 P0-1 / P0-2 — surface whether each new filter is engaged this run.
528
+ diag.upper_shadow_filter_enabled = rules.upper_shadow_ratio_max is not None
529
+ diag.upper_shadow_filter_threshold = rules.upper_shadow_ratio_max
530
+ diag.turnover_buckets_enabled = rules.turnover_buckets is not None
531
+
532
+ # 1. main board pool
533
+ stock_basic = tushare.call("stock_basic", force_sync=force_sync)
534
+ diag.stock_basic_rows = int(len(stock_basic)) if stock_basic is not None else 0
535
+ main_pool = main_board_filter(stock_basic)
536
+ main_codes = set(main_pool["ts_code"].astype(str))
537
+ n_main = len(main_codes)
538
+ diag.main_board_rows = n_main
539
+
540
+ # 2a. ST exclusion (REQUIRED — propagate auth failure)
541
+ st_df = tushare.call("stock_st", trade_date=trade_date, force_sync=force_sync)
542
+ st_codes = set(st_df["ts_code"].astype(str)) if not st_df.empty else set()
543
+ diag.stock_st_count = len(st_codes)
544
+ if not st_codes:
545
+ # P0 M2 — A股每日 ST 数稳定在 100+;返空一定是数据异常,应警示
546
+ diag.stock_st_status = "empty (suspicious — verify data freshness)"
547
+ data_unavailable.append(
548
+ "stock_st(T) returned 0 ST codes — abnormal for A股, "
549
+ "ST stocks may have leaked into candidates; verify data freshness"
550
+ )
551
+
552
+ # 2b. suspended exclusion (OPTIONAL)
553
+ susp_df, susp_err = _try_optional(
554
+ tushare, "suspend_d", trade_date=trade_date, force_sync=force_sync
555
+ )
556
+ if susp_err:
557
+ data_unavailable.append(f"suspend_d ({susp_err})")
558
+ diag.suspend_d_status = susp_err
559
+ susp_codes = set(susp_df["ts_code"].astype(str)) if susp_df is not None and not susp_df.empty else set()
560
+ diag.suspend_d_count = len(susp_codes)
561
+
562
+ eligible = main_codes - st_codes - susp_codes
563
+ n_after_st = len(eligible)
564
+
565
+ # 3. T-day daily — single API call returns all stocks for that date
566
+ daily_t_full = tushare.call("daily", trade_date=trade_date, force_sync=force_sync)
567
+ daily_t_full = _normalize_id_cols(daily_t_full)
568
+ if daily_t_full is None or daily_t_full.empty:
569
+ data_unavailable.append("daily(T) returned empty")
570
+ return ScreenResult(
571
+ trade_date=trade_date,
572
+ n_main_board=n_main,
573
+ n_after_st_susp=n_after_st,
574
+ n_after_t_day_rules=0,
575
+ n_after_upper_shadow=0,
576
+ n_after_turnover=0,
577
+ n_after_vol_rules=0,
578
+ rules=rules,
579
+ diagnostics=diag,
580
+ data_unavailable=data_unavailable,
581
+ )
582
+ diag.daily_t_total_rows = int(len(daily_t_full))
583
+ diag.daily_t_main_board_rows = int(
584
+ daily_t_full["ts_code"].astype(str).isin(main_codes).sum()
585
+ )
586
+ daily_t = daily_t_full[daily_t_full["ts_code"].astype(str).isin(eligible)].copy()
587
+
588
+ # T-day阳线 + 实体占比 + 涨幅区间
589
+ daily_t["body"] = daily_t["close"] - daily_t["open"]
590
+ daily_t["range"] = (daily_t["high"] - daily_t["low"]).clip(lower=1e-9)
591
+ daily_t["body_ratio"] = daily_t["body"] / daily_t["range"]
592
+ # v0.3.0 P0-1 — upper shadow as a fraction of the day's range.
593
+ # = (high − max(open, close)) / range; pure upper wick → 1.0.
594
+ daily_t["upper_shadow_ratio"] = (
595
+ daily_t["high"] - daily_t[["open", "close"]].max(axis=1)
596
+ ) / daily_t["range"]
597
+ t_day_hits = daily_t[
598
+ (daily_t["close"] > daily_t["open"])
599
+ & (daily_t["body_ratio"] >= rules.body_ratio_min)
600
+ & (daily_t["pct_chg"] >= rules.pct_chg_min)
601
+ & (daily_t["pct_chg"] <= rules.pct_chg_max)
602
+ ].copy()
603
+ n_after_t_rules = len(t_day_hits)
604
+ if t_day_hits.empty:
605
+ return ScreenResult(
606
+ trade_date=trade_date,
607
+ n_main_board=n_main,
608
+ n_after_st_susp=n_after_st,
609
+ n_after_t_day_rules=0,
610
+ n_after_upper_shadow=0,
611
+ n_after_turnover=0,
612
+ n_after_vol_rules=0,
613
+ rules=rules,
614
+ diagnostics=diag,
615
+ data_unavailable=data_unavailable,
616
+ )
617
+
618
+ # v0.3.0 P0-1 — upper-shadow filter (skipped when threshold is None).
619
+ if rules.upper_shadow_ratio_max is not None:
620
+ t_day_hits = t_day_hits[
621
+ t_day_hits["upper_shadow_ratio"] <= rules.upper_shadow_ratio_max
622
+ ].copy()
623
+ n_after_upper_shadow = len(t_day_hits)
624
+ diag.n_after_upper_shadow = n_after_upper_shadow
625
+ if t_day_hits.empty:
626
+ return ScreenResult(
627
+ trade_date=trade_date,
628
+ n_main_board=n_main,
629
+ n_after_st_susp=n_after_st,
630
+ n_after_t_day_rules=n_after_t_rules,
631
+ n_after_upper_shadow=0,
632
+ n_after_turnover=0,
633
+ n_after_vol_rules=0,
634
+ rules=rules,
635
+ diagnostics=diag,
636
+ data_unavailable=data_unavailable,
637
+ )
638
+
639
+ # 4. daily_basic — turnover_rate (+ circ_mv for v0.3.0 bucketing) filter
640
+ db_t = tushare.call("daily_basic", trade_date=trade_date, force_sync=force_sync)
641
+ db_t = _normalize_id_cols(db_t)
642
+ db_lookup: dict[str, dict[str, Any]] = {}
643
+ if db_t is not None and not db_t.empty and "turnover_rate" in db_t.columns:
644
+ cols = ["turnover_rate"]
645
+ if "circ_mv" in db_t.columns:
646
+ cols.append("circ_mv")
647
+ db_lookup = db_t.set_index("ts_code")[cols].to_dict("index")
648
+ diag.daily_basic_t_total_rows = int(len(db_t))
649
+ diag.daily_basic_t_main_board_rows = int(
650
+ db_t["ts_code"].astype(str).isin(main_codes).sum()
651
+ )
652
+ else:
653
+ diag.daily_basic_status = "empty"
654
+ data_unavailable.append("daily_basic.turnover_rate (frame empty)")
655
+ t_day_hits["turnover_rate"] = t_day_hits["ts_code"].map(
656
+ lambda c: db_lookup.get(c, {}).get("turnover_rate")
657
+ )
658
+ # v0.3.0 P0-2 — circ_mv lookup (亿元 via normalize_to_yi).
659
+ t_day_hits["circ_mv_yi"] = t_day_hits["ts_code"].map(
660
+ lambda c: normalize_to_yi("circ_mv", db_lookup.get(c, {}).get("circ_mv"))
661
+ )
662
+
663
+ # P0 M1 — surface candidates whose turnover_rate lookup returned NaN.
664
+ # They will be silently dropped by the comparison below; we make that visible.
665
+ missing_mask = t_day_hits["turnover_rate"].isna()
666
+ n_missing_turnover = int(missing_mask.sum())
667
+ diag.n_turnover_missing = n_missing_turnover
668
+ if n_missing_turnover > 0:
669
+ miss_codes = t_day_hits.loc[missing_mask, "ts_code"].astype(str).tolist()
670
+ diag.turnover_missing_codes = miss_codes
671
+ sample = miss_codes[:5]
672
+ ellipsis = "..." if n_missing_turnover > 5 else ""
673
+ data_unavailable.append(
674
+ f"daily_basic.turnover_rate missing for {n_missing_turnover} candidates "
675
+ f"(silently dropped at turnover step): {sample}{ellipsis}"
676
+ )
677
+
678
+ # v0.3.0 P0-2 — bucket lookup. circ_mv missing → fall back to global thresholds.
679
+ buckets = rules.turnover_buckets
680
+ bucket_label_per_row: dict[Any, str | None] = {}
681
+ bucket_hit_counter: dict[str, int] = {}
682
+ circ_mv_missing_codes: list[str] = []
683
+
684
+ def _row_passes_turnover(row: Any) -> bool:
685
+ tr = row.turnover_rate
686
+ if pd.isna(tr):
687
+ return False
688
+ circ = row.circ_mv_yi
689
+ if buckets is None or circ is None or pd.isna(circ):
690
+ t_min, t_max = rules.turnover_min, rules.turnover_max
691
+ label = None
692
+ if buckets is not None and (circ is None or pd.isna(circ)):
693
+ circ_mv_missing_codes.append(str(row.ts_code))
694
+ else:
695
+ _, label, t_min, t_max = _resolve_turnover_bucket(float(circ), buckets)
696
+ bucket_label_per_row[row.Index] = label
697
+ return t_min <= tr <= t_max
698
+
699
+ # We need pandas Index access — use `itertuples(index=True)` and rebuild filter mask.
700
+ keep_mask = []
701
+ for row in t_day_hits.itertuples(index=True):
702
+ keep_mask.append(_row_passes_turnover(row))
703
+ turnover_hits = t_day_hits.loc[keep_mask].copy()
704
+ turnover_hits["turnover_bucket"] = turnover_hits.index.map(
705
+ lambda i: bucket_label_per_row.get(i)
706
+ )
707
+ # Tally bucket distribution among rows that PASSED the filter.
708
+ for label in turnover_hits["turnover_bucket"].tolist():
709
+ if label is None:
710
+ bucket_hit_counter["fallback (no circ_mv)"] = (
711
+ bucket_hit_counter.get("fallback (no circ_mv)", 0) + 1
712
+ )
713
+ else:
714
+ bucket_hit_counter[label] = bucket_hit_counter.get(label, 0) + 1
715
+ diag.turnover_bucket_hits = bucket_hit_counter
716
+ diag.n_missing_circ_mv = len(circ_mv_missing_codes)
717
+ diag.circ_mv_missing_codes = circ_mv_missing_codes
718
+ if circ_mv_missing_codes:
719
+ sample = circ_mv_missing_codes[:5]
720
+ ellipsis = "..." if len(circ_mv_missing_codes) > 5 else ""
721
+ data_unavailable.append(
722
+ f"daily_basic.circ_mv missing for {len(circ_mv_missing_codes)} candidates "
723
+ f"(fell back to global turnover thresholds): {sample}{ellipsis}"
724
+ )
725
+
726
+ n_after_turnover = len(turnover_hits)
727
+ if turnover_hits.empty:
728
+ return ScreenResult(
729
+ trade_date=trade_date,
730
+ n_main_board=n_main,
731
+ n_after_st_susp=n_after_st,
732
+ n_after_t_day_rules=n_after_t_rules,
733
+ n_after_upper_shadow=n_after_upper_shadow,
734
+ n_after_turnover=0,
735
+ n_after_vol_rules=0,
736
+ rules=rules,
737
+ diagnostics=diag,
738
+ data_unavailable=data_unavailable,
739
+ )
740
+
741
+ # 5. N-trade-day vol history for surviving codes (Plan B dual rule)
742
+ survivor_codes = set(turnover_hits["ts_code"].astype(str))
743
+ history_dates = _last_n_trade_dates(calendar, trade_date, rules.lookback_trade_days)
744
+ diag.history_window_planned_days = len(history_dates)
745
+
746
+ # P0 H1 — capture which planned dates returned empty (silent skip → visible).
747
+ history_df, missing_history_dates = _fetch_daily_history_by_date(
748
+ tushare, history_dates, survivor_codes, force_sync=force_sync
749
+ )
750
+ diag.history_window_actual_days = len(history_dates) - len(missing_history_dates)
751
+ diag.history_window_missing_dates = missing_history_dates
752
+ if missing_history_dates:
753
+ sample = missing_history_dates[:5]
754
+ ellipsis = "..." if len(missing_history_dates) > 5 else ""
755
+ data_unavailable.append(
756
+ f"daily history missing on {len(missing_history_dates)}/"
757
+ f"{len(history_dates)} planned days "
758
+ f"(vol_max comparison weakened): {sample}{ellipsis}"
759
+ )
760
+
761
+ # P2 L3 — fetch adj_factor over the same window so vol_max comparisons
762
+ # stay valid across splits/送转. Falls back to raw vol when unavailable.
763
+ adj_factor_lookup, adj_factor_T_lookup = _build_adj_factor_lookups(
764
+ tushare,
765
+ history_dates,
766
+ survivor_codes,
767
+ trade_date=trade_date,
768
+ rules=rules,
769
+ diag=diag,
770
+ data_unavailable=data_unavailable,
771
+ force_sync=force_sync,
772
+ )
773
+
774
+ # P1 L1 — pre-compute the strict 5 trade-dates immediately preceding T.
775
+ # `prior.tail(5)` was permissive: it would happily take any 5 surviving rows,
776
+ # so a stock with gaps could end up averaging vol over a span > 5 trade days.
777
+ expected_prev5_dates = (
778
+ history_dates[-6:-1] if len(history_dates) >= 6 else history_dates[:-1]
779
+ )
780
+ expected_prev5_set = set(expected_prev5_dates)
781
+
782
+ # P0 H2 — enforce minimum history coverage; record stocks that fail
783
+ required_days = max(6, int(rules.lookback_trade_days * rules.min_history_coverage))
784
+ diag.history_min_required_days = required_days
785
+ insufficient_history: list[dict[str, Any]] = []
786
+
787
+ final_hits: list[dict[str, Any]] = []
788
+ industry_lookup = main_pool.set_index("ts_code")[["name", "industry"]].to_dict(orient="index")
789
+ for row in turnover_hits.itertuples(index=False):
790
+ code = str(row.ts_code)
791
+ h = history_df[history_df["ts_code"].astype(str) == code].sort_values("trade_date")
792
+ if len(h) < required_days:
793
+ insufficient_history.append(
794
+ {
795
+ "ts_code": code,
796
+ "name": industry_lookup.get(code, {}).get("name"),
797
+ "available_days": int(len(h)),
798
+ "required_days": required_days,
799
+ "lookback_window": rules.lookback_trade_days,
800
+ }
801
+ )
802
+ continue
803
+ # Identify T row + prev 5 days (excluding T)
804
+ t_row = h[h["trade_date"].astype(str) == trade_date]
805
+ prior = h[h["trade_date"].astype(str) < trade_date]
806
+ if t_row.empty or len(prior) < 5:
807
+ insufficient_history.append(
808
+ {
809
+ "ts_code": code,
810
+ "name": industry_lookup.get(code, {}).get("name"),
811
+ "available_days": int(len(h)),
812
+ "required_days": required_days,
813
+ "lookback_window": rules.lookback_trade_days,
814
+ "reason": "missing T-row or <5 prior days",
815
+ }
816
+ )
817
+ continue
818
+
819
+ # P1 L1 — strict prev-5 trade-day filter: require all 5 calendar
820
+ # positions (history_dates[-6:-1]) to be present, else surface as
821
+ # insufficient_history rather than averaging over a sparse span.
822
+ prior_5d_strict = prior[prior["trade_date"].astype(str).isin(expected_prev5_set)]
823
+ if len(prior_5d_strict) < 5:
824
+ insufficient_history.append(
825
+ {
826
+ "ts_code": code,
827
+ "name": industry_lookup.get(code, {}).get("name"),
828
+ "available_days": int(len(h)),
829
+ "required_days": required_days,
830
+ "lookback_window": rules.lookback_trade_days,
831
+ "reason": (
832
+ f"missing prev-5d trade dates "
833
+ f"(have {len(prior_5d_strict)}/5 of {sorted(expected_prev5_set)})"
834
+ ),
835
+ }
836
+ )
837
+ continue
838
+
839
+ # P2 L3 — adj_factor-aware vol values. When vol_adjust is enabled and
840
+ # f_T is available, compute forward-adjusted vol so that a 1:N split
841
+ # between d and T inflates pre-split vol by N (= adj_T / adj_d) and
842
+ # historical vol stays comparable to T-day vol. Falls back to raw vol
843
+ # silently per-row when adj_factor is missing for that row.
844
+ f_T = adj_factor_T_lookup.get(code)
845
+ if rules.vol_adjust and f_T is not None and f_T > 0:
846
+ def _adj(d: str, raw: float) -> float:
847
+ f_d = adj_factor_lookup.get((code, d))
848
+ if f_d is None or f_d <= 0:
849
+ return raw
850
+ return raw * (f_T / f_d)
851
+ vol_t = float(t_row.iloc[0]["vol"]) # at T, f_d == f_T → no change
852
+ vols_long = [
853
+ _adj(str(td), float(v))
854
+ for td, v in zip(
855
+ h["trade_date"].astype(str).tolist(),
856
+ h["vol"].astype(float).tolist(),
857
+ strict=False,
858
+ )
859
+ ]
860
+ short_h = h.tail(rules.vol_max_short_window)
861
+ vols_short = [
862
+ _adj(str(td), float(v))
863
+ for td, v in zip(
864
+ short_h["trade_date"].astype(str).tolist(),
865
+ short_h["vol"].astype(float).tolist(),
866
+ strict=False,
867
+ )
868
+ ]
869
+ vol_mean_prev5 = float(
870
+ pd.Series(
871
+ [
872
+ _adj(str(td), float(v))
873
+ for td, v in zip(
874
+ prior_5d_strict["trade_date"].astype(str).tolist(),
875
+ prior_5d_strict["vol"].astype(float).tolist(),
876
+ strict=False,
877
+ )
878
+ ]
879
+ ).mean()
880
+ )
881
+ else:
882
+ vol_t = float(t_row.iloc[0]["vol"])
883
+ vols_long = h["vol"].astype(float).tolist()
884
+ vols_short = [
885
+ float(v) for v in h.tail(rules.vol_max_short_window)["vol"].tolist()
886
+ ]
887
+ vol_mean_prev5 = float(prior_5d_strict["vol"].astype(float).mean())
888
+ vol_max_long = max(vols_long)
889
+ vol_max_short = max(vols_short)
890
+
891
+ # Plan B — vol passes if either condition holds
892
+ short_window_max_pass = vol_t >= vol_max_short - 1e-9
893
+ days_with_higher_vol = sum(1 for v in vols_long if v > vol_t + 1e-9)
894
+ long_window_top_n_pass = days_with_higher_vol < rules.vol_top_n_long
895
+ if not (short_window_max_pass or long_window_top_n_pass):
896
+ continue
897
+
898
+ # vol_ratio_5d ≥ rules.vol_ratio_5d_min
899
+ if vol_mean_prev5 <= 0:
900
+ continue
901
+ vol_ratio_5d = vol_t / vol_mean_prev5
902
+ if vol_ratio_5d < rules.vol_ratio_5d_min:
903
+ continue
904
+
905
+ meta = industry_lookup.get(code, {})
906
+ final_hits.append(
907
+ {
908
+ "ts_code": code,
909
+ "name": meta.get("name"),
910
+ "industry": meta.get("industry"),
911
+ "trade_date": trade_date,
912
+ "pct_chg": round2(row.pct_chg),
913
+ "open": round2(row.open),
914
+ "high": round2(row.high),
915
+ "low": round2(row.low),
916
+ "close": round2(row.close),
917
+ "vol": round2(row.vol),
918
+ "amount": round2(row.amount),
919
+ "body_ratio": round2(row.body_ratio),
920
+ "upper_shadow_ratio": round2(getattr(row, "upper_shadow_ratio", None)),
921
+ "turnover_rate": round2(row.turnover_rate),
922
+ "circ_mv_yi": round2(getattr(row, "circ_mv_yi", None)),
923
+ "turnover_bucket": getattr(row, "turnover_bucket", None),
924
+ "vol_ratio_5d": round2(vol_ratio_5d),
925
+ "vol_rank_in_long_window": days_with_higher_vol + 1,
926
+ "max_vol_short_window": round2(vol_max_short),
927
+ "max_vol_long_window": round2(vol_max_long),
928
+ "history_days_used": int(len(h)),
929
+ # Legacy-named column populated by upsert_watchlist /
930
+ # append_anomaly_history. Holds the long-window max regardless
931
+ # of the actual lookback_trade_days setting.
932
+ "max_vol_60d": round2(vol_max_long),
933
+ }
934
+ )
935
+
936
+ diag.insufficient_history = insufficient_history
937
+ if insufficient_history:
938
+ sample = [r["ts_code"] for r in insufficient_history[:5]]
939
+ ellipsis = "..." if len(insufficient_history) > 5 else ""
940
+ data_unavailable.append(
941
+ f"insufficient history (<{required_days} of {rules.lookback_trade_days} days) "
942
+ f"for {len(insufficient_history)} candidates (excluded from vol rule): "
943
+ f"{sample}{ellipsis}"
944
+ )
945
+
946
+ return ScreenResult(
947
+ trade_date=trade_date,
948
+ n_main_board=n_main,
949
+ n_after_st_susp=n_after_st,
950
+ n_after_t_day_rules=n_after_t_rules,
951
+ n_after_upper_shadow=n_after_upper_shadow,
952
+ n_after_turnover=n_after_turnover,
953
+ n_after_vol_rules=len(final_hits),
954
+ rules=rules,
955
+ diagnostics=diag,
956
+ hits=final_hits,
957
+ data_unavailable=data_unavailable,
958
+ )
959
+
960
+
961
+ def _last_n_trade_dates(calendar: TradeCalendar, end_date: str, n: int) -> list[str]:
962
+ """Return the last `n` open trade dates ending at (and including) end_date."""
963
+ dates: list[str] = []
964
+ cursor = end_date
965
+ if calendar.is_open(cursor):
966
+ dates.append(cursor)
967
+ while len(dates) < n:
968
+ cursor = calendar.pretrade_date(cursor)
969
+ dates.append(cursor)
970
+ dates.sort()
971
+ return dates
972
+
973
+
974
+ def _fetch_daily_history_by_date(
975
+ tushare: TushareClient,
976
+ trade_dates: list[str],
977
+ candidate_codes: set[str],
978
+ *,
979
+ force_sync: bool = False,
980
+ ) -> tuple[pd.DataFrame, list[str]]:
981
+ """Fetch daily(trade_date=X) for each X in trade_dates and concat.
982
+
983
+ Per-day calls are O(N) but each call is cached as ``trade_day_immutable`` in
984
+ TushareClient, so subsequent runs hit the cache. Filtering by candidate_codes
985
+ happens client-side.
986
+
987
+ Returns:
988
+ (concat_df, missing_dates) — `missing_dates` lists every planned
989
+ trade_date for which the daily call returned None or an empty frame.
990
+ Caller (P0 H1) MUST surface these so the user knows the vol_max
991
+ comparison was computed on incomplete data.
992
+ """
993
+ frames: list[pd.DataFrame] = []
994
+ missing_dates: list[str] = []
995
+ for d in trade_dates:
996
+ df = tushare.call("daily", trade_date=d, force_sync=force_sync)
997
+ if df is None or df.empty:
998
+ missing_dates.append(d)
999
+ continue
1000
+ df = _normalize_id_cols(df)
1001
+ if df is None or df.empty:
1002
+ missing_dates.append(d)
1003
+ continue
1004
+ if candidate_codes:
1005
+ df = df[df["ts_code"].isin(candidate_codes)]
1006
+ frames.append(df)
1007
+ out = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
1008
+ return out, missing_dates
1009
+
1010
+
1011
+ def _build_adj_factor_lookups(
1012
+ tushare: TushareClient,
1013
+ history_dates: list[str],
1014
+ survivor_codes: set[str],
1015
+ *,
1016
+ trade_date: str,
1017
+ rules: ScreenRules,
1018
+ diag: ScreenDiagnostics,
1019
+ data_unavailable: list[str],
1020
+ force_sync: bool = False,
1021
+ ) -> tuple[dict[tuple[str, str], float], dict[str, float]]:
1022
+ """Fetch adj_factor for the screening window and build (code, date)→f and code→f_T lookups.
1023
+
1024
+ The two returned dicts let the per-stock loop compute forward-adjusted vol
1025
+ in O(1) per row without re-filtering the frame each iteration.
1026
+
1027
+ Diagnostics fields populated (P2 L3):
1028
+ diag.vol_adjust_enabled — whether the rule was on at all
1029
+ diag.vol_adjust_status — 'ok' | 'disabled' | 'degraded: ...'
1030
+ diag.adj_factor_planned_days — len(history_dates) when enabled
1031
+ diag.adj_factor_actual_days — successful per-day fetches
1032
+ diag.adj_factor_missing_dates — list of date strings that returned empty
1033
+ diag.adj_factor_missing_codes — codes whose T-day adj_factor was missing
1034
+ (forces fallback to raw vol for that code)
1035
+ """
1036
+ if not rules.vol_adjust:
1037
+ diag.vol_adjust_enabled = False
1038
+ diag.vol_adjust_status = "disabled"
1039
+ return {}, {}
1040
+
1041
+ diag.vol_adjust_enabled = True
1042
+ diag.adj_factor_planned_days = len(history_dates)
1043
+ adj_df, missing_adj_dates = _fetch_adj_factor_history_by_date(
1044
+ tushare, history_dates, survivor_codes, force_sync=force_sync
1045
+ )
1046
+ diag.adj_factor_actual_days = len(history_dates) - len(missing_adj_dates)
1047
+ diag.adj_factor_missing_dates = missing_adj_dates
1048
+
1049
+ if adj_df.empty or "adj_factor" not in adj_df.columns:
1050
+ diag.vol_adjust_status = "degraded: adj_factor unavailable (raw vol used)"
1051
+ data_unavailable.append(
1052
+ "adj_factor unavailable for the entire window — vol-adjust disabled, "
1053
+ "raw vol used (splits/送转 in lookback may understate historical vol)"
1054
+ )
1055
+ return {}, {}
1056
+
1057
+ # (code, date) → adj_factor and code → adj_factor at T
1058
+ pair_lookup: dict[tuple[str, str], float] = {}
1059
+ for r in adj_df.itertuples(index=False):
1060
+ try:
1061
+ f = float(r.adj_factor)
1062
+ except (TypeError, ValueError):
1063
+ continue
1064
+ if pd.isna(f):
1065
+ continue
1066
+ pair_lookup[(str(r.ts_code), str(r.trade_date))] = f
1067
+
1068
+ f_T_lookup: dict[str, float] = {
1069
+ code: f
1070
+ for (code, d), f in pair_lookup.items()
1071
+ if d == str(trade_date)
1072
+ }
1073
+
1074
+ missing_t_codes = sorted(survivor_codes - set(f_T_lookup.keys()))
1075
+ diag.adj_factor_missing_codes = missing_t_codes
1076
+
1077
+ if missing_adj_dates and not missing_t_codes:
1078
+ diag.vol_adjust_status = (
1079
+ f"degraded: {len(missing_adj_dates)} historical day(s) missing adj_factor"
1080
+ )
1081
+ sample = missing_adj_dates[:5]
1082
+ ellipsis = "..." if len(missing_adj_dates) > 5 else ""
1083
+ data_unavailable.append(
1084
+ f"adj_factor missing on {len(missing_adj_dates)}/{len(history_dates)} "
1085
+ f"days (raw vol used for those rows): {sample}{ellipsis}"
1086
+ )
1087
+ elif missing_t_codes:
1088
+ sample = missing_t_codes[:5]
1089
+ ellipsis = "..." if len(missing_t_codes) > 5 else ""
1090
+ diag.vol_adjust_status = (
1091
+ f"degraded: T-day adj_factor missing for {len(missing_t_codes)} code(s)"
1092
+ )
1093
+ data_unavailable.append(
1094
+ f"adj_factor(T) missing for {len(missing_t_codes)} candidate(s) — "
1095
+ f"those codes use raw vol: {sample}{ellipsis}"
1096
+ )
1097
+ else:
1098
+ diag.vol_adjust_status = "ok"
1099
+
1100
+ return pair_lookup, f_T_lookup
1101
+
1102
+
1103
+ def _fetch_adj_factor_history_by_date(
1104
+ tushare: TushareClient,
1105
+ trade_dates: list[str],
1106
+ candidate_codes: set[str],
1107
+ *,
1108
+ force_sync: bool = False,
1109
+ ) -> tuple[pd.DataFrame, list[str]]:
1110
+ """Fetch adj_factor(trade_date=X) per X — same per-day-batch pattern as daily.
1111
+
1112
+ adj_factor is end-of-day immutable, so cache hits dominate after the first
1113
+ pass. Missing days are returned to the caller (P2 L3) so the diagnostic
1114
+ can record whether vol-adjust ran on complete data.
1115
+
1116
+ Permission may be missing on free Tushare tiers — callers must handle
1117
+ TushareUnauthorizedError or wrap with `_try_optional`.
1118
+ """
1119
+ frames: list[pd.DataFrame] = []
1120
+ missing_dates: list[str] = []
1121
+ for d in trade_dates:
1122
+ df, _err = _try_optional(tushare, "adj_factor", trade_date=d, force_sync=force_sync)
1123
+ if df is None or df.empty:
1124
+ missing_dates.append(d)
1125
+ continue
1126
+ df = _normalize_id_cols(df)
1127
+ if df is None or df.empty:
1128
+ missing_dates.append(d)
1129
+ continue
1130
+ if candidate_codes:
1131
+ df = df[df["ts_code"].isin(candidate_codes)]
1132
+ frames.append(df)
1133
+ out = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
1134
+ return out, missing_dates
1135
+
1136
+
1137
+ # ---------------------------------------------------------------------------
1138
+ # Watchlist persistence
1139
+ # ---------------------------------------------------------------------------
1140
+
1141
+
1142
+ def upsert_watchlist(db: Any, hits: list[dict[str, Any]], trade_date: str) -> tuple[int, int]:
1143
+ """Insert new hits / update existing rows. Returns (n_new, n_updated).
1144
+
1145
+ Original tracked_since is PRESERVED on duplicate hits — that's the whole
1146
+ point of the追踪日数 metric (a stock that re-triggers shouldn't reset its
1147
+ tracking start).
1148
+ """
1149
+ if not hits:
1150
+ return 0, 0
1151
+ existing = {
1152
+ row[0]: row[1]
1153
+ for row in db.fetchall("SELECT ts_code, tracked_since FROM va_watchlist")
1154
+ }
1155
+ n_new = 0
1156
+ n_updated = 0
1157
+ for h in hits:
1158
+ code = h["ts_code"]
1159
+ if code in existing:
1160
+ db.execute(
1161
+ "UPDATE va_watchlist SET name=?, industry=?, last_screened=?, "
1162
+ "last_pct_chg=?, last_close=?, last_vol=?, last_amount=?, "
1163
+ "last_body_ratio=?, last_turnover_rate=?, last_vol_ratio_5d=?, "
1164
+ "last_max_vol_60d=? WHERE ts_code=?",
1165
+ (
1166
+ h.get("name"),
1167
+ h.get("industry"),
1168
+ trade_date,
1169
+ h.get("pct_chg"),
1170
+ h.get("close"),
1171
+ h.get("vol"),
1172
+ h.get("amount"),
1173
+ h.get("body_ratio"),
1174
+ h.get("turnover_rate"),
1175
+ h.get("vol_ratio_5d"),
1176
+ h.get("max_vol_60d"),
1177
+ code,
1178
+ ),
1179
+ )
1180
+ n_updated += 1
1181
+ else:
1182
+ db.execute(
1183
+ "INSERT INTO va_watchlist(ts_code, name, industry, tracked_since, "
1184
+ "last_screened, last_pct_chg, last_close, last_vol, last_amount, "
1185
+ "last_body_ratio, last_turnover_rate, last_vol_ratio_5d, last_max_vol_60d) "
1186
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
1187
+ (
1188
+ code,
1189
+ h.get("name"),
1190
+ h.get("industry"),
1191
+ trade_date,
1192
+ trade_date,
1193
+ h.get("pct_chg"),
1194
+ h.get("close"),
1195
+ h.get("vol"),
1196
+ h.get("amount"),
1197
+ h.get("body_ratio"),
1198
+ h.get("turnover_rate"),
1199
+ h.get("vol_ratio_5d"),
1200
+ h.get("max_vol_60d"),
1201
+ ),
1202
+ )
1203
+ n_new += 1
1204
+ return n_new, n_updated
1205
+
1206
+
1207
+ def append_anomaly_history(db: Any, hits: list[dict[str, Any]]) -> None:
1208
+ """Append every hit row to va_anomaly_history (audit log).
1209
+
1210
+ Uses INSERT OR REPLACE semantics via DELETE-then-INSERT on (trade_date, ts_code)
1211
+ since DuckDB lacks ON CONFLICT for composite PKs in older versions.
1212
+ """
1213
+ if not hits:
1214
+ return
1215
+ for h in hits:
1216
+ db.execute(
1217
+ "DELETE FROM va_anomaly_history WHERE trade_date=? AND ts_code=?",
1218
+ (h["trade_date"], h["ts_code"]),
1219
+ )
1220
+ db.execute(
1221
+ "INSERT INTO va_anomaly_history(trade_date, ts_code, name, industry, "
1222
+ "pct_chg, close, open, high, low, vol, amount, body_ratio, turnover_rate, "
1223
+ "vol_ratio_5d, max_vol_60d, raw_metrics_json) "
1224
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
1225
+ (
1226
+ h["trade_date"],
1227
+ h["ts_code"],
1228
+ h.get("name"),
1229
+ h.get("industry"),
1230
+ h.get("pct_chg"),
1231
+ h.get("close"),
1232
+ h.get("open"),
1233
+ h.get("high"),
1234
+ h.get("low"),
1235
+ h.get("vol"),
1236
+ h.get("amount"),
1237
+ h.get("body_ratio"),
1238
+ h.get("turnover_rate"),
1239
+ h.get("vol_ratio_5d"),
1240
+ h.get("max_vol_60d"),
1241
+ json.dumps(h, ensure_ascii=False),
1242
+ ),
1243
+ )
1244
+
1245
+
1246
+ def fetch_watchlist(db: Any) -> list[dict[str, Any]]:
1247
+ """Read all watchlist rows as dicts."""
1248
+ rows = db.fetchall(
1249
+ "SELECT ts_code, name, industry, tracked_since, last_screened, last_pct_chg, "
1250
+ "last_close, last_vol, last_amount, last_body_ratio, last_turnover_rate, "
1251
+ "last_vol_ratio_5d, last_max_vol_60d FROM va_watchlist ORDER BY tracked_since"
1252
+ )
1253
+ cols = [
1254
+ "ts_code",
1255
+ "name",
1256
+ "industry",
1257
+ "tracked_since",
1258
+ "last_screened",
1259
+ "last_pct_chg",
1260
+ "last_close",
1261
+ "last_vol",
1262
+ "last_amount",
1263
+ "last_body_ratio",
1264
+ "last_turnover_rate",
1265
+ "last_vol_ratio_5d",
1266
+ "last_max_vol_60d",
1267
+ ]
1268
+ return [dict(zip(cols, r, strict=False)) for r in rows]
1269
+
1270
+
1271
+ def prune_watchlist(db: Any, *, min_tracked_calendar_days: int, today: str) -> list[dict[str, Any]]:
1272
+ """Remove every watchlist row whose calendar-day age ≥ N. Return removed rows."""
1273
+ rows = fetch_watchlist(db)
1274
+ pruned: list[dict[str, Any]] = []
1275
+ for r in rows:
1276
+ age = _calendar_days_between(r["tracked_since"], today)
1277
+ if age >= min_tracked_calendar_days:
1278
+ r["tracked_days"] = age
1279
+ pruned.append(r)
1280
+ if pruned:
1281
+ codes = [r["ts_code"] for r in pruned]
1282
+ # DuckDB executemany via parameterized loop; safer than IN-clause stitching
1283
+ for code in codes:
1284
+ db.execute("DELETE FROM va_watchlist WHERE ts_code=?", (code,))
1285
+ return pruned
1286
+
1287
+
1288
+ # ---------------------------------------------------------------------------
1289
+ # ANALYZE MODE — assemble per-stock context for LLM
1290
+ # ---------------------------------------------------------------------------
1291
+
1292
+
1293
+ # v0.3.0 P0-3 — VCP feature windows (kept module-level so callers / tests can
1294
+ # read them without instantiating AnalyzeBundle).
1295
+ ATR_WINDOW = 10 # 10-day ATR window (simple-average TR)
1296
+ ATR_QUANTILE_LOOKBACK = 60 # rank current ATR within the trailing 60-day series
1297
+ BBW_WINDOW = 20 # 20-day Bollinger band width
1298
+ BBW_COMPRESSION_LOOKBACK = 60 # current BBW vs trailing 60-day mean BBW
1299
+ # v0.3.0 P0-4 — resistance windows (E3-A: only `low_120d`, no `low_250d`).
1300
+ RESIST_120D = 120
1301
+ RESIST_250D = 250
1302
+ # Default extended history window for analyze mode (E2-A: single 250d fetch
1303
+ # is sliced internally for verbatim / VCP / resistance consumers).
1304
+ DEFAULT_EXTENDED_LOOKBACK_TRADE_DAYS = RESIST_250D
1305
+
1306
+
1307
+ def _compute_atr_series(history: list[dict[str, Any]]) -> list[float | None]:
1308
+ """Per-row trailing-10-day simple-average True Range.
1309
+
1310
+ TR_t = max(high_t − low_t, |high_t − close_{t-1}|, |low_t − close_{t-1}|)
1311
+ ATR_10_t = mean(TR over the trailing 10 days, t inclusive)
1312
+
1313
+ Returns a list aligned to ``history``. Entries are ``None`` until enough
1314
+ rows are available or whenever any input value is missing in the window.
1315
+ """
1316
+ n = len(history)
1317
+ if n < 2:
1318
+ return [None] * n
1319
+ trs: list[float | None] = [None]
1320
+ for i in range(1, n):
1321
+ h = history[i].get("high")
1322
+ low = history[i].get("low")
1323
+ c_prev = history[i - 1].get("close")
1324
+ if h is None or low is None or c_prev is None:
1325
+ trs.append(None)
1326
+ continue
1327
+ try:
1328
+ h_f, low_f, cp_f = float(h), float(low), float(c_prev)
1329
+ except (TypeError, ValueError):
1330
+ trs.append(None)
1331
+ continue
1332
+ if any(pd.isna(v) for v in (h_f, low_f, cp_f)):
1333
+ trs.append(None)
1334
+ continue
1335
+ trs.append(max(h_f - low_f, abs(h_f - cp_f), abs(low_f - cp_f)))
1336
+
1337
+ out: list[float | None] = []
1338
+ for i in range(n):
1339
+ start = i - ATR_WINDOW + 1
1340
+ if start < 0:
1341
+ out.append(None)
1342
+ continue
1343
+ slice_ = trs[start : i + 1]
1344
+ if any(t is None for t in slice_):
1345
+ out.append(None)
1346
+ continue
1347
+ out.append(sum(slice_) / ATR_WINDOW) # type: ignore[arg-type]
1348
+ return out
1349
+
1350
+
1351
+ def _compute_bbw_series(history: list[dict[str, Any]]) -> list[float | None]:
1352
+ """Per-row 20-day Bollinger Band Width as a percentage of the 20-day MA.
1353
+
1354
+ BBW = 4 × stdev(close_20) / mean(close_20) × 100
1355
+
1356
+ The factor 4 = upper(MA + 2σ) − lower(MA − 2σ) → 4σ. Returns ``None`` until
1357
+ 20 rows are available, or whenever any close in the window is missing /
1358
+ the rolling mean is non-positive.
1359
+ """
1360
+ n = len(history)
1361
+ closes: list[float | None] = []
1362
+ for r in history:
1363
+ c = r.get("close")
1364
+ if c is None:
1365
+ closes.append(None)
1366
+ continue
1367
+ try:
1368
+ f = float(c)
1369
+ except (TypeError, ValueError):
1370
+ closes.append(None)
1371
+ continue
1372
+ closes.append(None if pd.isna(f) else f)
1373
+
1374
+ out: list[float | None] = []
1375
+ for i in range(n):
1376
+ start = i - BBW_WINDOW + 1
1377
+ if start < 0:
1378
+ out.append(None)
1379
+ continue
1380
+ slice_ = closes[start : i + 1]
1381
+ if any(c is None for c in slice_):
1382
+ out.append(None)
1383
+ continue
1384
+ floats: list[float] = [c for c in slice_ if c is not None]
1385
+ mean = sum(floats) / BBW_WINDOW
1386
+ if mean <= 0:
1387
+ out.append(None)
1388
+ continue
1389
+ # Population std — same family as Bollinger's original (close enough
1390
+ # for our discrimination purposes; the choice is uniform across the
1391
+ # series so trend comparisons are unbiased).
1392
+ var = sum((c - mean) ** 2 for c in floats) / BBW_WINDOW
1393
+ std = math.sqrt(var)
1394
+ out.append(4 * std / mean * 100)
1395
+ return out
1396
+
1397
+
1398
+ def _quantile_in_window(
1399
+ series: list[float | None], idx: int, lookback: int
1400
+ ) -> float | None:
1401
+ """Return the [0, 1] quantile of ``series[idx]`` within the trailing
1402
+ ``lookback`` non-None values (idx inclusive). 0 = historical min,
1403
+ 1 = historical max. ``None`` when fewer than ``lookback`` non-None values
1404
+ in the window or the current value itself is None."""
1405
+ if idx < 0 or idx >= len(series):
1406
+ return None
1407
+ cur = series[idx]
1408
+ if cur is None:
1409
+ return None
1410
+ start = max(0, idx - lookback + 1)
1411
+ window = [v for v in series[start : idx + 1] if v is not None]
1412
+ if len(window) < lookback:
1413
+ return None
1414
+ less_or_eq = sum(1 for v in window if v <= cur)
1415
+ return (less_or_eq - 1) / (len(window) - 1) if len(window) > 1 else 0.0
1416
+
1417
+
1418
+ @dataclass
1419
+ class AnalyzeBundle:
1420
+ """Everything the走势分析 LLM stage needs."""
1421
+
1422
+ trade_date: str
1423
+ next_trade_date: str
1424
+ candidates: list[dict[str, Any]] = field(default_factory=list)
1425
+ market_summary: dict[str, Any] = field(default_factory=dict)
1426
+ sector_strength_source: str = "industry_fallback"
1427
+ sector_strength_data: dict[str, Any] = field(default_factory=dict)
1428
+ data_unavailable: list[str] = field(default_factory=list)
1429
+
1430
+
1431
+ # v0.5.0 P1-1 — RPS / 大盘相对 alpha 配置
1432
+ DEFAULT_BASELINE_INDEX_CODE = "000300.SH"
1433
+ ALPHA_LEADING_THRESHOLD = 5.0 # alpha_20d_pct > +5 → leading
1434
+ ALPHA_LAGGING_THRESHOLD = -5.0 # alpha_20d_pct < -5 → lagging
1435
+
1436
+
1437
+ def collect_analyze_bundle(
1438
+ *,
1439
+ tushare: TushareClient,
1440
+ db: Any,
1441
+ calendar: TradeCalendar,
1442
+ trade_date: str,
1443
+ next_trade_date: str,
1444
+ history_lookback: int = DEFAULT_EXTENDED_LOOKBACK_TRADE_DAYS,
1445
+ moneyflow_lookback: int = 5,
1446
+ baseline_index_code: str = DEFAULT_BASELINE_INDEX_CODE,
1447
+ force_sync: bool = False,
1448
+ ) -> AnalyzeBundle:
1449
+ """Read watchlist + pull historical windows + assemble compact LLM context.
1450
+
1451
+ Per the design spec:
1452
+ * 60-trade-day window for OHLCV → compressed into MA/aggregate features
1453
+ * 5-day moneyflow → compressed into trend + cumulative net flow
1454
+ * 60-day limit_list_d → flag历史涨停 (optional)
1455
+ * sector_strength: limit_cpt_list (tier 1) / industry aggregation fallback
1456
+ * tracked_days: calendar days since first added to watchlist
1457
+ """
1458
+ bundle = AnalyzeBundle(trade_date=trade_date, next_trade_date=next_trade_date)
1459
+ data_unavailable: list[str] = []
1460
+
1461
+ watchlist = fetch_watchlist(db)
1462
+ if not watchlist:
1463
+ return bundle
1464
+
1465
+ candidate_codes = {w["ts_code"] for w in watchlist}
1466
+
1467
+ # -------- historical OHLCV (extended trade-day window, batch by date) ---
1468
+ history_dates = _last_n_trade_dates(calendar, trade_date, history_lookback)
1469
+ daily_df, missing_history_dates = _fetch_daily_history_by_date(
1470
+ tushare, history_dates, candidate_codes, force_sync=force_sync
1471
+ )
1472
+ if daily_df.empty:
1473
+ data_unavailable.append(
1474
+ f"daily({history_lookback}d-window) returned empty"
1475
+ )
1476
+ elif missing_history_dates:
1477
+ sample = missing_history_dates[:5]
1478
+ ellipsis = "..." if len(missing_history_dates) > 5 else ""
1479
+ data_unavailable.append(
1480
+ f"daily history missing on {len(missing_history_dates)}/"
1481
+ f"{len(history_dates)} planned days: {sample}{ellipsis}"
1482
+ )
1483
+
1484
+ # -------- baseline index daily (v0.5.0 P1-1 — alpha computation) --------
1485
+ # F1: 沪深 300; G1: 250d window matched to per-stock daily history.
1486
+ # G8: failures emit a WARN-level mention into data_unavailable; the runner
1487
+ # surfaces it as an EventLevel.WARN LOG instead of silently degrading.
1488
+ baseline_close_by_date: dict[str, float] = {}
1489
+ if history_dates:
1490
+ idx_df, idx_err = _try_optional(
1491
+ tushare,
1492
+ "index_daily",
1493
+ params={
1494
+ "ts_code": baseline_index_code,
1495
+ "start_date": history_dates[0],
1496
+ "end_date": history_dates[-1],
1497
+ },
1498
+ force_sync=force_sync,
1499
+ )
1500
+ if idx_err:
1501
+ data_unavailable.append(
1502
+ f"index_daily ({idx_err}) — alpha 字段降级为 None;"
1503
+ f"如需启用 alpha,请确认 Tushare 账户已开通 index_daily 权限"
1504
+ )
1505
+ else:
1506
+ idx_df = _normalize_id_cols(idx_df)
1507
+ if idx_df is not None and not idx_df.empty and "close" in idx_df.columns:
1508
+ for r in idx_df[["trade_date", "close"]].itertuples(index=False):
1509
+ if r.close is not None:
1510
+ baseline_close_by_date[str(r.trade_date)] = float(r.close)
1511
+ else:
1512
+ data_unavailable.append(
1513
+ f"index_daily({baseline_index_code}) returned empty — alpha 字段降级为 None"
1514
+ )
1515
+
1516
+ # -------- daily_basic on T (turnover, circ_mv, pe, pb) -------------------
1517
+ db_basic_t = tushare.call("daily_basic", trade_date=trade_date, force_sync=force_sync)
1518
+ db_basic_lookup: dict[str, dict[str, Any]] = {}
1519
+ if not db_basic_t.empty:
1520
+ for r in db_basic_t.itertuples(index=False):
1521
+ db_basic_lookup[str(r.ts_code)] = {
1522
+ "turnover_rate": getattr(r, "turnover_rate", None),
1523
+ "volume_ratio": getattr(r, "volume_ratio", None),
1524
+ "pe": getattr(r, "pe", None),
1525
+ "pb": getattr(r, "pb", None),
1526
+ "circ_mv": getattr(r, "circ_mv", None),
1527
+ "total_mv": getattr(r, "total_mv", None),
1528
+ }
1529
+ else:
1530
+ data_unavailable.append("daily_basic(T)")
1531
+
1532
+ # -------- moneyflow (5-day per stock, optional) -------------------------
1533
+ mf_start = _shift_calendar_days(trade_date, -(moneyflow_lookback + 7))
1534
+ mf_df, mf_err = _try_optional(
1535
+ tushare,
1536
+ "moneyflow",
1537
+ params={"start_date": mf_start, "end_date": trade_date},
1538
+ force_sync=force_sync,
1539
+ )
1540
+ if mf_err:
1541
+ data_unavailable.append(f"moneyflow ({mf_err})")
1542
+ mf_df = _normalize_id_cols(mf_df)
1543
+ if mf_df is not None and not mf_df.empty:
1544
+ mf_df = mf_df[mf_df["ts_code"].isin(candidate_codes)]
1545
+
1546
+ # -------- limit_list_d 60-day (flag stocks with prior涨停) ---------------
1547
+ lu_start = history_dates[0] if history_dates else trade_date
1548
+ lu_df, lu_err = _try_optional(
1549
+ tushare,
1550
+ "limit_list_d",
1551
+ params={"start_date": lu_start, "end_date": trade_date, "limit_type": "U"},
1552
+ force_sync=force_sync,
1553
+ )
1554
+ if lu_err:
1555
+ data_unavailable.append(f"limit_list_d ({lu_err})")
1556
+ lu_by_code: dict[str, list[str]] = {}
1557
+ if lu_df is not None and not lu_df.empty:
1558
+ for r in lu_df.itertuples(index=False):
1559
+ lu_by_code.setdefault(str(r.ts_code), []).append(str(r.trade_date))
1560
+
1561
+ # -------- sector strength (tier 1: limit_cpt_list, fallback: industry agg)
1562
+ cpt_df, cpt_err = _try_optional(
1563
+ tushare, "limit_cpt_list", trade_date=trade_date, force_sync=force_sync
1564
+ )
1565
+ if cpt_err:
1566
+ data_unavailable.append(f"limit_cpt_list ({cpt_err})")
1567
+ if cpt_df is not None and not cpt_df.empty:
1568
+ bundle.sector_strength_source = "limit_cpt_list"
1569
+ top = cpt_df.sort_values("rank").head(10) if "rank" in cpt_df.columns else cpt_df.head(10)
1570
+ bundle.sector_strength_data = {"top_sectors": top.to_dict(orient="records")}
1571
+ else:
1572
+ # Industry fallback: aggregate watchlist by industry
1573
+ agg: dict[str, int] = {}
1574
+ for w in watchlist:
1575
+ ind = w.get("industry") or "未分类"
1576
+ agg[ind] = agg.get(ind, 0) + 1
1577
+ bundle.sector_strength_source = "industry_fallback"
1578
+ bundle.sector_strength_data = {
1579
+ "top_sectors": [
1580
+ {"sector": k, "watchlist_count": v}
1581
+ for k, v in sorted(agg.items(), key=lambda kv: kv[1], reverse=True)[:10]
1582
+ ]
1583
+ }
1584
+
1585
+ # -------- per-stock context assembly ------------------------------------
1586
+ daily_by_code = _index_daily_by_code(daily_df)
1587
+ mf_by_code = _index_moneyflow_by_code(mf_df)
1588
+ candidates: list[dict[str, Any]] = []
1589
+ for w in watchlist:
1590
+ code = w["ts_code"]
1591
+ history = daily_by_code.get(code, [])
1592
+ if not history:
1593
+ # No data for this stock — still include it so LLM sees missing_data
1594
+ candidates.append(
1595
+ {
1596
+ "candidate_id": code,
1597
+ "ts_code": code,
1598
+ "name": w.get("name"),
1599
+ "industry": w.get("industry"),
1600
+ "tracked_since": w.get("tracked_since"),
1601
+ "tracked_days": _calendar_days_between(w["tracked_since"], trade_date),
1602
+ "_missing_history": True,
1603
+ }
1604
+ )
1605
+ continue
1606
+
1607
+ rec = _build_candidate_row(
1608
+ watchlist_row=w,
1609
+ trade_date=trade_date,
1610
+ history=history,
1611
+ daily_basic=db_basic_lookup.get(code, {}),
1612
+ moneyflow_5d=mf_by_code.get(code, [])[-moneyflow_lookback:],
1613
+ limit_up_dates=sorted(lu_by_code.get(code, [])),
1614
+ baseline_index_code=baseline_index_code,
1615
+ baseline_close_by_date=baseline_close_by_date,
1616
+ )
1617
+ candidates.append(rec)
1618
+
1619
+ # -------- market summary -------------------------------------------------
1620
+ bundle.market_summary = {
1621
+ "watchlist_total": len(watchlist),
1622
+ "history_lookback_trade_days": history_lookback,
1623
+ "moneyflow_lookback_days": moneyflow_lookback,
1624
+ }
1625
+ bundle.candidates = candidates
1626
+ bundle.data_unavailable = data_unavailable
1627
+ return bundle
1628
+
1629
+
1630
+ def _index_daily_by_code(df: pd.DataFrame | None) -> dict[str, list[dict[str, Any]]]:
1631
+ if df is None or df.empty or "ts_code" not in df.columns:
1632
+ return {}
1633
+ df = _normalize_id_cols(df)
1634
+ if df is None:
1635
+ return {}
1636
+ df = df.sort_values("trade_date") if "trade_date" in df.columns else df
1637
+ out: dict[str, list[dict[str, Any]]] = {}
1638
+ for code, group in df.groupby("ts_code"):
1639
+ out[str(code)] = group.to_dict(orient="records")
1640
+ return out
1641
+
1642
+
1643
+ def _index_moneyflow_by_code(df: pd.DataFrame | None) -> dict[str, list[dict[str, Any]]]:
1644
+ if df is None or df.empty or "ts_code" not in df.columns:
1645
+ return {}
1646
+ df = _normalize_id_cols(df)
1647
+ if df is None:
1648
+ return {}
1649
+ df = df.sort_values("trade_date") if "trade_date" in df.columns else df
1650
+ out: dict[str, list[dict[str, Any]]] = {}
1651
+ for code, group in df.groupby("ts_code"):
1652
+ out[str(code)] = group.to_dict(orient="records")
1653
+ return out
1654
+
1655
+
1656
+ def _compute_alpha_pct(
1657
+ history: list[dict[str, Any]],
1658
+ baseline_close_by_date: dict[str, float],
1659
+ n: int,
1660
+ ) -> float | None:
1661
+ """alpha_n = stock_pct_chg_n − baseline_pct_chg_n (over the last n trade days).
1662
+
1663
+ Both legs use simple compounded close-to-close return. Returns None when
1664
+ either leg can't be computed (insufficient history / baseline data missing
1665
+ on the required dates).
1666
+ """
1667
+ if len(history) <= n:
1668
+ return None
1669
+ end_row = history[-1]
1670
+ start_row = history[-1 - n]
1671
+ end_close = end_row.get("close")
1672
+ start_close = start_row.get("close")
1673
+ if end_close is None or start_close is None or start_close <= 0:
1674
+ return None
1675
+ end_date = str(end_row.get("trade_date") or "")
1676
+ start_date = str(start_row.get("trade_date") or "")
1677
+ base_end = baseline_close_by_date.get(end_date)
1678
+ base_start = baseline_close_by_date.get(start_date)
1679
+ if base_end is None or base_start is None or base_start <= 0:
1680
+ return None
1681
+ stock_ret = (float(end_close) / float(start_close) - 1.0) * 100.0
1682
+ base_ret = (float(base_end) / float(base_start) - 1.0) * 100.0
1683
+ return round(stock_ret - base_ret, 2)
1684
+
1685
+
1686
+ def _classify_rel_strength(alpha_20d: float | None) -> str | None:
1687
+ if alpha_20d is None:
1688
+ return None
1689
+ if alpha_20d > ALPHA_LEADING_THRESHOLD:
1690
+ return "leading"
1691
+ if alpha_20d < ALPHA_LAGGING_THRESHOLD:
1692
+ return "lagging"
1693
+ return "in_line"
1694
+
1695
+
1696
+ def _build_candidate_row(
1697
+ *,
1698
+ watchlist_row: dict[str, Any],
1699
+ trade_date: str,
1700
+ history: list[dict[str, Any]],
1701
+ daily_basic: dict[str, Any],
1702
+ moneyflow_5d: list[dict[str, Any]],
1703
+ limit_up_dates: list[str],
1704
+ baseline_index_code: str = DEFAULT_BASELINE_INDEX_CODE,
1705
+ baseline_close_by_date: dict[str, float] | None = None,
1706
+ ) -> dict[str, Any]:
1707
+ """Compress (up to) 250-day history → moving averages + base/washout +
1708
+ VCP / resistance features.
1709
+
1710
+ Reduces token usage by emitting compact scalars; the recent 5 OHLCV rows
1711
+ are still passed verbatim for form reference. v0.3.0 (PR-2):
1712
+ * input window widened from 60 → 250 trading days (E2-A) and sliced
1713
+ internally — 60d for MAs / aggregates, full window for VCP and
1714
+ 120d / 250d resistance.
1715
+ * new fields: atr_10d_pct / atr_10d_quantile_in_60d / bbw_20d /
1716
+ bbw_compression_ratio (P0-3) and high_120d / high_250d / low_120d /
1717
+ dist_to_120d_high_pct / dist_to_250d_high_pct / is_above_120d_high /
1718
+ is_above_250d_high / pos_in_120d_range (P0-4).
1719
+ """
1720
+ closes = [float(r["close"]) for r in history if r.get("close") is not None]
1721
+ if not closes:
1722
+ return {
1723
+ "candidate_id": watchlist_row["ts_code"],
1724
+ "ts_code": watchlist_row["ts_code"],
1725
+ "name": watchlist_row.get("name"),
1726
+ "tracked_since": watchlist_row["tracked_since"],
1727
+ "tracked_days": _calendar_days_between(watchlist_row["tracked_since"], trade_date),
1728
+ "_missing_history": True,
1729
+ }
1730
+
1731
+ # The 60d "compressed feature" slice — preserve pre-v0.3.0 semantics for
1732
+ # ma60 / high_60d / low_60d / pct_chg_60d when the input history is now
1733
+ # 250d wide.
1734
+ closes_60 = closes[-60:]
1735
+ last_close = closes[-1]
1736
+
1737
+ def _ma(n: int) -> float | None:
1738
+ if len(closes) < n:
1739
+ return None
1740
+ return round(sum(closes[-n:]) / n, 3)
1741
+
1742
+ ma5, ma10, ma20, ma60 = _ma(5), _ma(10), _ma(20), _ma(60)
1743
+ above_ma60 = ma60 is not None and last_close > ma60
1744
+ above_ma20 = ma20 is not None and last_close > ma20
1745
+
1746
+ # 60d aggregates (over the most-recent 60 closes)
1747
+ high_60d = round(max(closes_60), 3)
1748
+ low_60d = round(min(closes_60), 3)
1749
+ range_pct_60d = round((high_60d - low_60d) / max(low_60d, 1e-9) * 100, 2)
1750
+ pct_chg_60d = (
1751
+ round((last_close / closes_60[0] - 1) * 100, 2)
1752
+ if len(closes_60) >= 60 and closes_60[0] > 0
1753
+ else None
1754
+ )
1755
+
1756
+ # v0.3.0 P0-3 — VCP波动率收敛指标
1757
+ atr_series = _compute_atr_series(history)
1758
+ bbw_series = _compute_bbw_series(history)
1759
+ last_idx = len(history) - 1
1760
+ atr_now = atr_series[last_idx] if atr_series else None
1761
+ bbw_now = bbw_series[last_idx] if bbw_series else None
1762
+ atr_10d_pct: float | None = None
1763
+ if atr_now is not None and last_close > 0:
1764
+ atr_10d_pct = round(atr_now / last_close * 100, 3)
1765
+ atr_10d_quantile_in_60d = _quantile_in_window(
1766
+ atr_series, last_idx, ATR_QUANTILE_LOOKBACK
1767
+ )
1768
+ if atr_10d_quantile_in_60d is not None:
1769
+ atr_10d_quantile_in_60d = round(atr_10d_quantile_in_60d, 3)
1770
+ bbw_20d = round(bbw_now, 3) if bbw_now is not None else None
1771
+ bbw_compression_ratio: float | None = None
1772
+ if bbw_now is not None:
1773
+ prior = [b for b in bbw_series[-BBW_COMPRESSION_LOOKBACK:] if b is not None]
1774
+ if len(prior) >= BBW_COMPRESSION_LOOKBACK:
1775
+ mean_prior = sum(prior) / len(prior)
1776
+ if mean_prior > 0:
1777
+ bbw_compression_ratio = round(bbw_now / mean_prior, 3)
1778
+
1779
+ # v0.3.0 P0-4 — 120d / 250d 阻力位距离 (closes-based to match high_60d).
1780
+ # Compute the raw (unrounded) extremes for comparison against last_close so
1781
+ # boundary cases like "last close IS the 60d high" don't flip on rounding;
1782
+ # round only the emitted scalar field.
1783
+ def _window_extremes(n: int) -> tuple[float | None, float | None]:
1784
+ if len(closes) < n:
1785
+ return None, None
1786
+ sl = closes[-n:]
1787
+ return max(sl), min(sl)
1788
+
1789
+ high_120d_raw, low_120d_raw = _window_extremes(RESIST_120D)
1790
+ high_250d_raw, _ = _window_extremes(RESIST_250D)
1791
+ high_120d = round(high_120d_raw, 3) if high_120d_raw is not None else None
1792
+ high_250d = round(high_250d_raw, 3) if high_250d_raw is not None else None
1793
+ low_120d = round(low_120d_raw, 3) if low_120d_raw is not None else None
1794
+ dist_to_120d_high_pct = (
1795
+ round((last_close - high_120d_raw) / high_120d_raw * 100, 2)
1796
+ if high_120d_raw not in (None, 0)
1797
+ else None
1798
+ )
1799
+ dist_to_250d_high_pct = (
1800
+ round((last_close - high_250d_raw) / high_250d_raw * 100, 2)
1801
+ if high_250d_raw not in (None, 0)
1802
+ else None
1803
+ )
1804
+ is_above_120d_high = high_120d_raw is not None and last_close > high_120d_raw
1805
+ is_above_250d_high = high_250d_raw is not None and last_close > high_250d_raw
1806
+ pos_in_120d_range: float | None = None
1807
+ if (
1808
+ high_120d_raw is not None
1809
+ and low_120d_raw is not None
1810
+ and high_120d_raw > low_120d_raw
1811
+ ):
1812
+ pos_in_120d_range = round(
1813
+ (last_close - low_120d_raw) / (high_120d_raw - low_120d_raw), 3
1814
+ )
1815
+
1816
+ # v0.5.0 P1-1 — RPS / 大盘相对 alpha. F10 — 5d / 20d / 60d。
1817
+ baseline_close_by_date = baseline_close_by_date or {}
1818
+ alpha_5d_pct = _compute_alpha_pct(history, baseline_close_by_date, 5)
1819
+ alpha_20d_pct = _compute_alpha_pct(history, baseline_close_by_date, 20)
1820
+ alpha_60d_pct = _compute_alpha_pct(history, baseline_close_by_date, 60)
1821
+ rel_strength_label = _classify_rel_strength(alpha_20d_pct)
1822
+
1823
+ # Base / washout features — find the最近 anomaly day (T) and the platform before it
1824
+ # The异动 day is `trade_date` itself (or the最近 row matching T). The "base" is
1825
+ # the period between the previous notable up-move and T.
1826
+ t_idx = next(
1827
+ (i for i, r in enumerate(history) if str(r.get("trade_date")) == trade_date),
1828
+ len(history) - 1,
1829
+ )
1830
+ # v0.3.0 PR-2 — keep the base/washout window at 60d even though `history`
1831
+ # is now up to 250d, so `base_*` field semantics stay backward-compatible.
1832
+ base_window_size = 60
1833
+ if t_idx > 0:
1834
+ base_start = max(0, t_idx - base_window_size)
1835
+ base_window_pre_t = history[base_start:t_idx]
1836
+ else:
1837
+ base_window_pre_t = []
1838
+
1839
+ # base_days = consecutive days before T where pct_chg is moderate (|pct_chg| < 4)
1840
+ base_days = 0
1841
+ for r in reversed(base_window_pre_t):
1842
+ if abs(float(r.get("pct_chg") or 0)) < 4.0:
1843
+ base_days += 1
1844
+ else:
1845
+ break
1846
+
1847
+ # Drawdown within base window: (max_close - min_close) / max_close * 100
1848
+ base_closes = [float(r["close"]) for r in base_window_pre_t if r.get("close") is not None]
1849
+ base_max_drawdown_pct = None
1850
+ base_avg_vol = None
1851
+ base_vol_shrink_ratio = None
1852
+ base_avg_turnover_rate = None
1853
+ if base_closes:
1854
+ bmax = max(base_closes)
1855
+ bmin = min(base_closes)
1856
+ if bmax > 0:
1857
+ base_max_drawdown_pct = round((bmax - bmin) / bmax * 100, 2)
1858
+ base_vols_pre = [float(r["vol"]) for r in base_window_pre_t if r.get("vol") is not None]
1859
+ if base_vols_pre:
1860
+ base_avg_vol = round(sum(base_vols_pre) / len(base_vols_pre), 2)
1861
+ # Compare平均 of整理后期 vs 整理前期 — shrinkage indicator
1862
+ if len(base_vols_pre) >= 10:
1863
+ half = len(base_vols_pre) // 2
1864
+ early = sum(base_vols_pre[:half]) / max(half, 1)
1865
+ late = sum(base_vols_pre[half:]) / max(len(base_vols_pre) - half, 1)
1866
+ if early > 0:
1867
+ base_vol_shrink_ratio = round(late / early, 2)
1868
+
1869
+ # days_since_last_limit_up — strictly before T
1870
+ prior_limit_ups = [d for d in limit_up_dates if d < trade_date]
1871
+ days_since_last_limit_up: int | None = None
1872
+ if prior_limit_ups:
1873
+ days_since_last_limit_up = _calendar_days_between(prior_limit_ups[-1], trade_date)
1874
+
1875
+ # Recent 5 days OHLCV (verbatim, for form reference)
1876
+ recent5 = [
1877
+ {
1878
+ "date": str(r.get("trade_date")),
1879
+ "open": round2(r.get("open")),
1880
+ "high": round2(r.get("high")),
1881
+ "low": round2(r.get("low")),
1882
+ "close": round2(r.get("close")),
1883
+ "pct_chg": round2(r.get("pct_chg")),
1884
+ "vol": _opt_int(r.get("vol")),
1885
+ }
1886
+ for r in history[-5:]
1887
+ ]
1888
+
1889
+ # Moneyflow summary
1890
+ mf_summary: dict[str, Any] = {}
1891
+ if moneyflow_5d:
1892
+ net_amounts = [float(r.get("net_mf_amount") or 0) for r in moneyflow_5d]
1893
+ elg_amounts = [float(r.get("buy_elg_amount") or 0) for r in moneyflow_5d]
1894
+ lg_amounts = [float(r.get("buy_lg_amount") or 0) for r in moneyflow_5d]
1895
+ cum_net_yi = round(sum(net_amounts) / 1e4, 3) # 万元 → 亿
1896
+ cum_elg_lg_yi = round(sum(elg_amounts + lg_amounts) / 1e4, 3)
1897
+ # trend: increasing if last3 > first2 mean
1898
+ trend = "flat"
1899
+ if len(net_amounts) >= 5:
1900
+ first2 = sum(net_amounts[:2]) / 2
1901
+ last3 = sum(net_amounts[-3:]) / 3
1902
+ if last3 > first2 * 1.2:
1903
+ trend = "rising"
1904
+ elif last3 < first2 * 0.8:
1905
+ trend = "falling"
1906
+ mf_summary = {
1907
+ "cum_net_mf_yi": cum_net_yi,
1908
+ "cum_elg_plus_lg_buy_yi": cum_elg_lg_yi,
1909
+ "net_mf_trend": trend,
1910
+ "rows_used": len(moneyflow_5d),
1911
+ }
1912
+ else:
1913
+ mf_summary = {"rows_used": 0}
1914
+
1915
+ tracked_days = _calendar_days_between(watchlist_row["tracked_since"], trade_date)
1916
+ return {
1917
+ "candidate_id": watchlist_row["ts_code"],
1918
+ "ts_code": watchlist_row["ts_code"],
1919
+ "name": watchlist_row.get("name"),
1920
+ "industry": watchlist_row.get("industry"),
1921
+ "tracked_since": watchlist_row["tracked_since"],
1922
+ "tracked_days": tracked_days,
1923
+ # T-day snapshot (from watchlist row — the异动 day metrics)
1924
+ "anomaly_day": watchlist_row.get("last_screened"),
1925
+ "anomaly_pct_chg": watchlist_row.get("last_pct_chg"),
1926
+ "anomaly_body_ratio": watchlist_row.get("last_body_ratio"),
1927
+ "anomaly_turnover_rate": watchlist_row.get("last_turnover_rate"),
1928
+ "anomaly_vol_ratio_5d": watchlist_row.get("last_vol_ratio_5d"),
1929
+ # Latest market data
1930
+ "last_close": round2(last_close),
1931
+ "ma5": ma5,
1932
+ "ma10": ma10,
1933
+ "ma20": ma20,
1934
+ "ma60": ma60,
1935
+ "above_ma20": above_ma20,
1936
+ "above_ma60": above_ma60,
1937
+ "high_60d": high_60d,
1938
+ "low_60d": low_60d,
1939
+ "range_pct_60d": range_pct_60d,
1940
+ "pct_chg_60d": pct_chg_60d,
1941
+ # v0.3.0 P0-3 — VCP波动率收敛
1942
+ "atr_10d_pct": atr_10d_pct,
1943
+ "atr_10d_quantile_in_60d": atr_10d_quantile_in_60d,
1944
+ "bbw_20d": bbw_20d,
1945
+ "bbw_compression_ratio": bbw_compression_ratio,
1946
+ # v0.5.0 P1-1 — RPS / 大盘相对 alpha
1947
+ "alpha_5d_pct": alpha_5d_pct,
1948
+ "alpha_20d_pct": alpha_20d_pct,
1949
+ "alpha_60d_pct": alpha_60d_pct,
1950
+ "baseline_index_code": baseline_index_code,
1951
+ "rel_strength_label": rel_strength_label,
1952
+ # v0.3.0 P0-4 — 120d/250d 阻力位 (E3-A:不补 low_250d / pos_in_250d_range)
1953
+ "high_120d": high_120d,
1954
+ "high_250d": high_250d,
1955
+ "low_120d": low_120d,
1956
+ "dist_to_120d_high_pct": dist_to_120d_high_pct,
1957
+ "dist_to_250d_high_pct": dist_to_250d_high_pct,
1958
+ "is_above_120d_high": is_above_120d_high,
1959
+ "is_above_250d_high": is_above_250d_high,
1960
+ "pos_in_120d_range": pos_in_120d_range,
1961
+ # Washout / base features (the user's要求 #7 维度)
1962
+ "base_days": base_days,
1963
+ "base_max_drawdown_pct": base_max_drawdown_pct,
1964
+ "base_avg_vol": base_avg_vol,
1965
+ "base_vol_shrink_ratio": base_vol_shrink_ratio,
1966
+ "base_avg_turnover_rate": base_avg_turnover_rate,
1967
+ "days_since_last_limit_up": days_since_last_limit_up,
1968
+ "prior_limit_up_count_60d": len(prior_limit_ups),
1969
+ # Latest daily_basic
1970
+ "turnover_rate_t": round2(daily_basic.get("turnover_rate")),
1971
+ "volume_ratio_t": round2(daily_basic.get("volume_ratio")),
1972
+ "pe_t": round2(daily_basic.get("pe")),
1973
+ "pb_t": round2(daily_basic.get("pb")),
1974
+ "circ_mv_yi": normalize_to_yi("circ_mv", daily_basic.get("circ_mv")),
1975
+ "total_mv_yi": normalize_to_yi("total_mv", daily_basic.get("total_mv")),
1976
+ # Recent 5 OHLCV verbatim
1977
+ "recent_5d": recent5,
1978
+ # Moneyflow摘要 (5d)
1979
+ "moneyflow_5d_summary": mf_summary,
1980
+ }
1981
+
1982
+
1983
+ # ---------------------------------------------------------------------------
1984
+ # EVALUATE MODE — T+N realized-return computation (v0.4.0 P1-3)
1985
+ # ---------------------------------------------------------------------------
1986
+
1987
+
1988
+ def _resolve_horizon_dates(
1989
+ calendar: TradeCalendar,
1990
+ anomaly_date: str,
1991
+ horizons: tuple[int, ...] = EVALUATE_HORIZONS,
1992
+ ) -> dict[int, str]:
1993
+ """For each horizon n, resolve the trade_date that is n trade days AFTER
1994
+ ``anomaly_date`` (skipping non-open days, including holidays / weekends).
1995
+
1996
+ Returns ``{n: yyyymmdd_string}`` for every requested horizon. Raises
1997
+ ``ValueError`` only when the calendar has no future trade days at all
1998
+ (which would indicate the calendar fixture is too short).
1999
+ """
2000
+ out: dict[int, str] = {}
2001
+ cursor = anomaly_date
2002
+ advanced = 0
2003
+ target_n = max(horizons)
2004
+ while advanced < target_n:
2005
+ cursor = calendar.next_open(cursor)
2006
+ advanced += 1
2007
+ if advanced in horizons:
2008
+ out[advanced] = cursor
2009
+ return out
2010
+
2011
+
2012
+ def _compute_realized_returns(
2013
+ *,
2014
+ t_close: float | None,
2015
+ horizon_closes: dict[int, float | None],
2016
+ window_5d_closes: list[float | None],
2017
+ window_10d_closes: list[float | None],
2018
+ ) -> dict[str, float | None]:
2019
+ """Convert raw OHLCV inputs into the realised-return scalar metrics
2020
+ persisted in ``va_realized_returns``.
2021
+
2022
+ Args:
2023
+ t_close: T-day close (basis for all percentage calcs).
2024
+ horizon_closes: ``{1: c1, 3: c3, 5: c5, 10: c10}``. Any missing horizon
2025
+ value is OK — it surfaces as ``None`` in the result.
2026
+ window_5d_closes: ordered closes for T+1..T+5 (length ≤ 5; may
2027
+ contain ``None`` for suspended days).
2028
+ window_10d_closes: same idea for T+1..T+10.
2029
+
2030
+ Output keys: ``ret_t1`` ``ret_t3`` ``ret_t5`` ``ret_t10`` ``max_close_5d``
2031
+ ``max_close_10d`` ``max_ret_5d`` ``max_ret_10d`` ``max_dd_5d``.
2032
+ """
2033
+
2034
+ def _pct(num: float | None) -> float | None:
2035
+ if num is None or t_close is None or t_close <= 0:
2036
+ return None
2037
+ return round((num / t_close - 1) * 100, 2)
2038
+
2039
+ out: dict[str, float | None] = {
2040
+ "ret_t1": _pct(horizon_closes.get(1)),
2041
+ "ret_t3": _pct(horizon_closes.get(3)),
2042
+ "ret_t5": _pct(horizon_closes.get(5)),
2043
+ "ret_t10": _pct(horizon_closes.get(10)),
2044
+ }
2045
+ valid_5 = [c for c in window_5d_closes if c is not None]
2046
+ valid_10 = [c for c in window_10d_closes if c is not None]
2047
+ out["max_close_5d"] = round(max(valid_5), 3) if valid_5 else None
2048
+ out["max_close_10d"] = round(max(valid_10), 3) if valid_10 else None
2049
+ out["max_ret_5d"] = _pct(out["max_close_5d"])
2050
+ out["max_ret_10d"] = _pct(out["max_close_10d"])
2051
+ # G2 决策: max_dd from T = (min(close[T+1..T+5]) - t_close) / t_close × 100
2052
+ out["max_dd_5d"] = (
2053
+ _pct(min(valid_5)) if valid_5 else None
2054
+ )
2055
+ return out
2056
+
2057
+
2058
+ def _classify_data_status(
2059
+ *,
2060
+ horizon_closes: dict[int, float | None],
2061
+ horizons: tuple[int, ...],
2062
+ today: str,
2063
+ horizon_dates: dict[int, str],
2064
+ ) -> str:
2065
+ """Determine ``data_status`` per the v3 G5 rule:
2066
+
2067
+ * ``pending`` — T+1 trade_date is still in the future (no horizon column
2068
+ can possibly be filled yet).
2069
+ * ``partial`` — max_horizon trade_date is in the future, OR any reachable
2070
+ horizon row has missing close (suspension / data gap).
2071
+ * ``complete`` — max_horizon is in the past AND every horizon was filled.
2072
+ """
2073
+ if not horizon_dates:
2074
+ return "pending"
2075
+ h1_date = horizon_dates.get(min(horizons))
2076
+ if h1_date is not None and h1_date > today:
2077
+ return "pending"
2078
+ max_n = max(horizons)
2079
+ max_date = horizon_dates.get(max_n)
2080
+ max_reached = max_date is not None and max_date <= today
2081
+ all_filled = all(horizon_closes.get(n) is not None for n in horizons)
2082
+ if max_reached and all_filled:
2083
+ return "complete"
2084
+ return "partial"
2085
+
2086
+
2087
+ def fetch_anomaly_dates_within_lookback(
2088
+ db: Any, *, today: str, lookback_days: int
2089
+ ) -> list[tuple[str, str]]:
2090
+ """Return ``[(anomaly_date, ts_code)]`` for every va_anomaly_history row
2091
+ whose ``anomaly_date`` is within the trailing ``lookback_days`` calendar
2092
+ days of ``today``."""
2093
+ cutoff = _shift_calendar_days(today, -int(lookback_days))
2094
+ rows = db.fetchall(
2095
+ "SELECT trade_date, ts_code FROM va_anomaly_history "
2096
+ "WHERE trade_date >= ? ORDER BY trade_date, ts_code",
2097
+ (cutoff,),
2098
+ )
2099
+ return [(str(r[0]), str(r[1])) for r in rows]
2100
+
2101
+
2102
+ def upsert_realized_return(
2103
+ db: Any,
2104
+ *,
2105
+ anomaly_date: str,
2106
+ ts_code: str,
2107
+ t_close: float | None,
2108
+ horizon_closes: dict[int, float | None],
2109
+ metrics: dict[str, float | None],
2110
+ data_status: str,
2111
+ ) -> None:
2112
+ """UPSERT one row into ``va_realized_returns``."""
2113
+ db.execute(
2114
+ "DELETE FROM va_realized_returns WHERE anomaly_date=? AND ts_code=?",
2115
+ (anomaly_date, ts_code),
2116
+ )
2117
+ db.execute(
2118
+ "INSERT INTO va_realized_returns(anomaly_date, ts_code, t_close, "
2119
+ "t1_close, t3_close, t5_close, t10_close, "
2120
+ "ret_t1, ret_t3, ret_t5, ret_t10, "
2121
+ "max_close_5d, max_close_10d, max_ret_5d, max_ret_10d, max_dd_5d, "
2122
+ "data_status) "
2123
+ "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
2124
+ (
2125
+ anomaly_date,
2126
+ ts_code,
2127
+ t_close,
2128
+ horizon_closes.get(1),
2129
+ horizon_closes.get(3),
2130
+ horizon_closes.get(5),
2131
+ horizon_closes.get(10),
2132
+ metrics.get("ret_t1"),
2133
+ metrics.get("ret_t3"),
2134
+ metrics.get("ret_t5"),
2135
+ metrics.get("ret_t10"),
2136
+ metrics.get("max_close_5d"),
2137
+ metrics.get("max_close_10d"),
2138
+ metrics.get("max_ret_5d"),
2139
+ metrics.get("max_ret_10d"),
2140
+ metrics.get("max_dd_5d"),
2141
+ data_status,
2142
+ ),
2143
+ )
2144
+
2145
+
2146
+ def fetch_completed_realized_keys(db: Any) -> set[tuple[str, str]]:
2147
+ """Return ``{(anomaly_date, ts_code)}`` for every row with
2148
+ ``data_status='complete'`` — used to skip work on subsequent evaluate
2149
+ runs (idempotency)."""
2150
+ rows = db.fetchall(
2151
+ "SELECT anomaly_date, ts_code FROM va_realized_returns "
2152
+ "WHERE data_status = 'complete'"
2153
+ )
2154
+ return {(str(r[0]), str(r[1])) for r in rows}