deeptrade-quant 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deeptrade/__init__.py +8 -0
  2. deeptrade/channels_builtin/__init__.py +0 -0
  3. deeptrade/channels_builtin/stdout/__init__.py +0 -0
  4. deeptrade/channels_builtin/stdout/deeptrade_plugin.yaml +25 -0
  5. deeptrade/channels_builtin/stdout/migrations/20260429_001_init.sql +13 -0
  6. deeptrade/channels_builtin/stdout/stdout_channel/__init__.py +0 -0
  7. deeptrade/channels_builtin/stdout/stdout_channel/channel.py +180 -0
  8. deeptrade/cli.py +214 -0
  9. deeptrade/cli_config.py +396 -0
  10. deeptrade/cli_data.py +33 -0
  11. deeptrade/cli_plugin.py +176 -0
  12. deeptrade/core/__init__.py +8 -0
  13. deeptrade/core/config.py +344 -0
  14. deeptrade/core/config_migrations.py +138 -0
  15. deeptrade/core/db.py +176 -0
  16. deeptrade/core/llm_client.py +591 -0
  17. deeptrade/core/llm_manager.py +174 -0
  18. deeptrade/core/logging_config.py +61 -0
  19. deeptrade/core/migrations/__init__.py +0 -0
  20. deeptrade/core/migrations/core/20260427_001_init.sql +121 -0
  21. deeptrade/core/migrations/core/20260501_002_drop_llm_calls_stage.sql +10 -0
  22. deeptrade/core/migrations/core/__init__.py +0 -0
  23. deeptrade/core/notifier.py +302 -0
  24. deeptrade/core/paths.py +49 -0
  25. deeptrade/core/plugin_manager.py +616 -0
  26. deeptrade/core/run_status.py +29 -0
  27. deeptrade/core/secrets.py +152 -0
  28. deeptrade/core/tushare_client.py +824 -0
  29. deeptrade/plugins_api/__init__.py +44 -0
  30. deeptrade/plugins_api/base.py +66 -0
  31. deeptrade/plugins_api/channel.py +42 -0
  32. deeptrade/plugins_api/events.py +61 -0
  33. deeptrade/plugins_api/llm.py +46 -0
  34. deeptrade/plugins_api/metadata.py +84 -0
  35. deeptrade/plugins_api/notify.py +67 -0
  36. deeptrade/strategies_builtin/__init__.py +0 -0
  37. deeptrade/strategies_builtin/limit_up_board/__init__.py +0 -0
  38. deeptrade/strategies_builtin/limit_up_board/deeptrade_plugin.yaml +101 -0
  39. deeptrade/strategies_builtin/limit_up_board/limit_up_board/__init__.py +0 -0
  40. deeptrade/strategies_builtin/limit_up_board/limit_up_board/calendar.py +65 -0
  41. deeptrade/strategies_builtin/limit_up_board/limit_up_board/cli.py +269 -0
  42. deeptrade/strategies_builtin/limit_up_board/limit_up_board/config.py +76 -0
  43. deeptrade/strategies_builtin/limit_up_board/limit_up_board/data.py +1191 -0
  44. deeptrade/strategies_builtin/limit_up_board/limit_up_board/pipeline.py +869 -0
  45. deeptrade/strategies_builtin/limit_up_board/limit_up_board/plugin.py +30 -0
  46. deeptrade/strategies_builtin/limit_up_board/limit_up_board/profiles.py +85 -0
  47. deeptrade/strategies_builtin/limit_up_board/limit_up_board/prompts.py +485 -0
  48. deeptrade/strategies_builtin/limit_up_board/limit_up_board/render.py +890 -0
  49. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runner.py +1087 -0
  50. deeptrade/strategies_builtin/limit_up_board/limit_up_board/runtime.py +172 -0
  51. deeptrade/strategies_builtin/limit_up_board/limit_up_board/schemas.py +178 -0
  52. deeptrade/strategies_builtin/limit_up_board/migrations/20260430_001_init.sql +150 -0
  53. deeptrade/strategies_builtin/limit_up_board/migrations/20260501_002_lub_stage_results_llm_provider.sql +8 -0
  54. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_001_lub_lhb_tables.sql +36 -0
  55. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_002_lub_cyq_perf.sql +18 -0
  56. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_003_lub_lhb_pk_fix.sql +46 -0
  57. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_004_lub_lhb_drop_pk.sql +53 -0
  58. deeptrade/strategies_builtin/limit_up_board/migrations/20260508_005_lub_config.sql +17 -0
  59. deeptrade/strategies_builtin/volume_anomaly/__init__.py +0 -0
  60. deeptrade/strategies_builtin/volume_anomaly/deeptrade_plugin.yaml +59 -0
  61. deeptrade/strategies_builtin/volume_anomaly/migrations/20260430_001_init.sql +94 -0
  62. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_001_realized_returns.sql +44 -0
  63. deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_002_dimension_scores.sql +13 -0
  64. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/__init__.py +0 -0
  65. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/calendar.py +52 -0
  66. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/cli.py +247 -0
  67. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/data.py +2154 -0
  68. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/pipeline.py +327 -0
  69. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/plugin.py +22 -0
  70. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/profiles.py +49 -0
  71. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts.py +187 -0
  72. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts_examples.py +84 -0
  73. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/render.py +906 -0
  74. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runner.py +772 -0
  75. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runtime.py +90 -0
  76. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/schemas.py +97 -0
  77. deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/stats.py +174 -0
  78. deeptrade/theme.py +48 -0
  79. deeptrade_quant-0.0.2.dist-info/METADATA +166 -0
  80. deeptrade_quant-0.0.2.dist-info/RECORD +83 -0
  81. deeptrade_quant-0.0.2.dist-info/WHEEL +4 -0
  82. deeptrade_quant-0.0.2.dist-info/entry_points.txt +2 -0
  83. deeptrade_quant-0.0.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1191 @@
1
+ """Data layer for the limit-up-board strategy.
2
+
3
+ DESIGN §12.2 (T-resolution) + §11.3 (sector_strength fallback chain) + S2 (close_after config) +
4
+ S4 (zero candidates legal) + Q2 (main board only) + C5 (raw units in DB, normalized in prompt).
5
+
6
+ Key public entry points:
7
+ resolve_trade_date(...) — Step 0
8
+ collect_round1(...) — Step 1 (returns candidates + market summary +
9
+ sector_strength + data_unavailable)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, time
17
+ from typing import Any, Literal
18
+
19
+ import pandas as pd
20
+
21
+ from deeptrade.core.tushare_client import (
22
+ TushareClient,
23
+ TushareUnauthorizedError,
24
+ )
25
+
26
+ from .calendar import TradeCalendar
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Step 0 — resolve trade date
33
+ # ---------------------------------------------------------------------------
34
+
35
+
36
+ def resolve_trade_date(
37
+ now_dt: datetime,
38
+ calendar: TradeCalendar,
39
+ *,
40
+ user_specified: str | None = None,
41
+ allow_intraday: bool = False,
42
+ close_after: time = time(18, 0),
43
+ ) -> tuple[str, str]:
44
+ """Return (T, T+1) per DESIGN §12.2.
45
+
46
+ T defaults to the most recent CLOSED trade day:
47
+ * if today is open AND now ≥ close_after → today
48
+ * if today is open AND allow_intraday → today (with intraday banner)
49
+ * else → pretrade_date(today)
50
+
51
+ T+1 is the first open day strictly after T.
52
+ """
53
+ if user_specified:
54
+ T = user_specified
55
+ return T, calendar.next_open(T)
56
+
57
+ today = now_dt.strftime("%Y%m%d")
58
+ today_is_open = calendar.is_open(today)
59
+
60
+ if today_is_open and (now_dt.time() >= close_after or allow_intraday):
61
+ T = today
62
+ elif today_is_open:
63
+ # Today is a trade day but it's intraday and user has not opted in.
64
+ T = calendar.pretrade_date(today)
65
+ else:
66
+ # Non-trading day (weekend/holiday). Walk back.
67
+ T = calendar.pretrade_date(today)
68
+
69
+ return T, calendar.next_open(T)
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Filters: main board / ST / suspended
74
+ # ---------------------------------------------------------------------------
75
+
76
+
77
+ def main_board_filter(stock_basic: pd.DataFrame) -> pd.DataFrame:
78
+ """Keep only Shanghai/Shenzhen MAIN board (Q2 fix).
79
+
80
+ Excludes ChiNext (300xxx), STAR (688xxx), BSE (8xxxxx), and CDR.
81
+ Tushare ``stock_basic.market`` is a Chinese label like '主板'.
82
+ """
83
+ if "market" not in stock_basic.columns or "exchange" not in stock_basic.columns:
84
+ raise ValueError("stock_basic missing market/exchange columns")
85
+ df = stock_basic[
86
+ (stock_basic["market"] == "主板") & (stock_basic["exchange"].isin(["SSE", "SZSE"]))
87
+ ].copy()
88
+ if "list_status" in df.columns:
89
+ df = df[df["list_status"] == "L"]
90
+ return df.reset_index(drop=True)
91
+
92
+
93
+ def exclude_st(df: pd.DataFrame, st_codes: set[str]) -> pd.DataFrame:
94
+ """Drop rows whose ts_code is in the ST / *ST set."""
95
+ if df.empty:
96
+ return df
97
+ return df[~df["ts_code"].isin(st_codes)].reset_index(drop=True)
98
+
99
+
100
+ def exclude_suspended(df: pd.DataFrame, suspended_codes: set[str]) -> pd.DataFrame:
101
+ """Drop rows whose ts_code is suspended on T."""
102
+ if df.empty:
103
+ return df
104
+ return df[~df["ts_code"].isin(suspended_codes)].reset_index(drop=True)
105
+
106
+
107
+ def _apply_market_filter(
108
+ candidates_df: pd.DataFrame,
109
+ *,
110
+ max_float_mv_yi: float,
111
+ max_close_yuan: float,
112
+ ) -> tuple[pd.DataFrame, dict[str, Any]]:
113
+ """v0.4 — keep only rows whose 流通市值 < ``max_float_mv_yi`` (亿) AND
114
+ close < ``max_close_yuan`` (元). Null in either field → dropped (conservative;
115
+ we cannot validate "small cap / low price" claims without the data).
116
+
117
+ Returns ``(filtered_df, summary)`` where summary is the candidate_filter_summary
118
+ payload that gets stored in ``bundle.market_summary``.
119
+ """
120
+ n_before = int(len(candidates_df))
121
+ summary: dict[str, Any] = {
122
+ "before": n_before,
123
+ "after": n_before,
124
+ "max_float_mv_yi": max_float_mv_yi,
125
+ "max_close_yuan": max_close_yuan,
126
+ }
127
+ if n_before == 0:
128
+ return candidates_df, summary
129
+ fm_yi = pd.to_numeric(candidates_df.get("float_mv"), errors="coerce") / 1e8
130
+ cl = pd.to_numeric(candidates_df.get("close"), errors="coerce")
131
+ mask = (
132
+ fm_yi.notna()
133
+ & cl.notna()
134
+ & (fm_yi < max_float_mv_yi)
135
+ & (cl < max_close_yuan)
136
+ )
137
+ filtered = candidates_df[mask].reset_index(drop=True)
138
+ summary["after"] = int(len(filtered))
139
+ return filtered, summary
140
+
141
+
142
+ # ---------------------------------------------------------------------------
143
+ # Sector strength resolver — three-tier fallback (F2 fix + §11.3)
144
+ # ---------------------------------------------------------------------------
145
+
146
+
147
+ SectorStrengthSource = Literal["limit_cpt_list", "lu_desc_aggregation", "industry_fallback"]
148
+
149
+
150
+ @dataclass
151
+ class SectorStrength:
152
+ """Sector heat / leadership data fed into the prompt.
153
+
154
+ `source` is exposed verbatim to the LLM via ``sector_strength_source`` so
155
+ the model can downweight confidence when it sees a fallback label.
156
+ """
157
+
158
+ source: SectorStrengthSource
159
+ data: dict[str, Any]
160
+
161
+
162
+ def resolve_sector_strength(
163
+ *,
164
+ candidates: pd.DataFrame,
165
+ limit_cpt_list: pd.DataFrame | None,
166
+ limit_list_ths: pd.DataFrame | None,
167
+ ) -> SectorStrength:
168
+ """Pick the best available sector data and aggregate by candidate's sector tag.
169
+
170
+ Priority: limit_cpt_list > limit_list_ths.lu_desc aggregation >
171
+ stock_basic.industry aggregation.
172
+ """
173
+ # Tier 1: official concept rankings
174
+ if limit_cpt_list is not None and not limit_cpt_list.empty:
175
+ # Top-ranked sectors (rank ascending, take first ~10)
176
+ top = limit_cpt_list.sort_values("rank").head(10)
177
+ return SectorStrength(
178
+ source="limit_cpt_list",
179
+ data={
180
+ "top_sectors": top.to_dict(orient="records"),
181
+ "candidates_with_sector_tag": [], # joined externally if needed
182
+ },
183
+ )
184
+
185
+ # Tier 2: aggregate THS涨停原因
186
+ if limit_list_ths is not None and not limit_list_ths.empty:
187
+ agg = (
188
+ limit_list_ths.groupby("lu_desc", dropna=True)
189
+ .agg(up_nums=("ts_code", "count"))
190
+ .reset_index()
191
+ .sort_values("up_nums", ascending=False)
192
+ .head(10)
193
+ )
194
+ return SectorStrength(
195
+ source="lu_desc_aggregation",
196
+ data={"top_sectors": agg.to_dict(orient="records")},
197
+ )
198
+
199
+ # Tier 3: aggregate by stock_basic.industry (last resort)
200
+ if candidates is not None and not candidates.empty and "industry" in candidates.columns:
201
+ agg = (
202
+ candidates.groupby("industry", dropna=True)
203
+ .agg(up_nums=("ts_code", "count"))
204
+ .reset_index()
205
+ .sort_values("up_nums", ascending=False)
206
+ .head(10)
207
+ )
208
+ return SectorStrength(
209
+ source="industry_fallback",
210
+ data={"top_sectors": agg.to_dict(orient="records")},
211
+ )
212
+
213
+ return SectorStrength(source="industry_fallback", data={"top_sectors": []})
214
+
215
+
216
+ # ---------------------------------------------------------------------------
217
+ # Normalizers (C5 fix: prompt uses normalized units; DB keeps raw)
218
+ # B3.1 (M6) fix: tushare fields have HETEROGENEOUS raw units; a simple
219
+ # `value / 1e8` is wrong for moneyflow.* (which is 万元) and daily_basic.circ_mv
220
+ # (also 万元). FIELD_UNITS_RAW is the source of truth.
221
+ # ---------------------------------------------------------------------------
222
+
223
+
224
+ # Per-field raw unit declarations, sourced from tushare official docs.
225
+ # Values absent from this map default to "元" (the most common unit).
226
+ FIELD_UNITS_RAW: dict[str, str] = {
227
+ # limit_list_d (元)
228
+ "fd_amount": "元",
229
+ "limit_amount": "元",
230
+ "amount": "元",
231
+ "float_mv": "元",
232
+ "total_mv": "元",
233
+ # top_list (元)
234
+ "net_amount": "元",
235
+ # daily_basic (mixed: market values are 万元 in tushare!)
236
+ "circ_mv": "万元",
237
+ "free_share": "万股",
238
+ "float_share": "万股",
239
+ "total_share": "万股",
240
+ # moneyflow (all amounts in 万元)
241
+ "net_mf_amount": "万元",
242
+ "buy_lg_amount": "万元",
243
+ "buy_elg_amount": "万元",
244
+ "buy_md_amount": "万元",
245
+ "buy_sm_amount": "万元",
246
+ "sell_lg_amount": "万元",
247
+ "sell_elg_amount": "万元",
248
+ # daily (千元 for amount, 手 for vol)
249
+ # Note: limit_list_d.amount is 元 but daily.amount is 千元 — context-dependent
250
+ # callers must use normalize_field with the API context if they need disambiguation.
251
+ }
252
+
253
+
254
+ # B1 — known A-share 游资席位 substring hints. Match is verbatim against
255
+ # top_inst.exalter; on hit, the actual exalter string is written into
256
+ # lhb_famous_seats (we never expose the hint label to the LLM, preserving
257
+ # anonymity per DESIGN §12 R3 spirit).
258
+ FAMOUS_SEATS_HINTS: tuple[str, ...] = (
259
+ "拉萨团结路",
260
+ "拉萨东环路",
261
+ "拉萨金融城南环路",
262
+ "宁波桑田路",
263
+ "宁波解放南路",
264
+ "深圳益田路荣超商务中心",
265
+ "中信证券上海溧阳路",
266
+ "华泰证券厦门厦禾路",
267
+ "国泰君安上海江苏路",
268
+ "国泰君安顺德大良",
269
+ "财通证券杭州体育场路",
270
+ "光大证券宁波解放南路",
271
+ "东方财富证券拉萨",
272
+ "国金证券上海互联网金融",
273
+ "招商证券深圳深南大道",
274
+ )
275
+
276
+
277
+ def normalize_to_yi(field: str, raw_value: float | None) -> float | None:
278
+ """Convert a raw field value to 亿 based on its declared unit."""
279
+ if raw_value is None or pd.isna(raw_value):
280
+ return None
281
+ unit = FIELD_UNITS_RAW.get(field, "元")
282
+ if unit == "元":
283
+ factor = 1e8
284
+ elif unit == "万元":
285
+ factor = 1e4
286
+ elif unit == "千元":
287
+ factor = 1e5
288
+ else:
289
+ return None
290
+ return round(float(raw_value) / factor, 2)
291
+
292
+
293
+ def normalize_to_wan(field: str, raw_value: float | None) -> float | None:
294
+ """Convert a raw field value to 万 based on its declared unit."""
295
+ if raw_value is None or pd.isna(raw_value):
296
+ return None
297
+ unit = FIELD_UNITS_RAW.get(field, "元")
298
+ if unit == "元":
299
+ factor = 1e4
300
+ elif unit == "万元":
301
+ factor = 1.0
302
+ elif unit == "千元":
303
+ factor = 0.1
304
+ else:
305
+ return None
306
+ return round(float(raw_value) / factor, 2)
307
+
308
+
309
+ def yi(value: float | None) -> float | None:
310
+ """Legacy helper assuming raw='元'. Prefer ``normalize_to_yi(field, value)``."""
311
+ if value is None or pd.isna(value):
312
+ return None
313
+ return round(float(value) / 1e8, 2)
314
+
315
+
316
+ def wan(value: float | None) -> float | None:
317
+ """Legacy helper assuming raw='元'. Prefer ``normalize_to_wan(field, value)``."""
318
+ if value is None or pd.isna(value):
319
+ return None
320
+ return round(float(value) / 1e4, 2)
321
+
322
+
323
+ def round2(value: float | None) -> float | None:
324
+ if value is None or pd.isna(value):
325
+ return None
326
+ return round(float(value), 2)
327
+
328
+
329
+ # ---------------------------------------------------------------------------
330
+ # Round-1 collection
331
+ # ---------------------------------------------------------------------------
332
+
333
+
334
+ @dataclass
335
+ class Round1Bundle:
336
+ """Everything the R1 LLM stage needs."""
337
+
338
+ trade_date: str
339
+ next_trade_date: str
340
+ candidates: list[dict[str, Any]] = field(default_factory=list)
341
+ market_summary: dict[str, Any] = field(default_factory=dict)
342
+ sector_strength: SectorStrength = field(
343
+ default_factory=lambda: SectorStrength(source="industry_fallback", data={"top_sectors": []})
344
+ )
345
+ data_unavailable: list[str] = field(default_factory=list)
346
+
347
+
348
+ def collect_round1(
349
+ *,
350
+ tushare: TushareClient,
351
+ trade_date: str,
352
+ next_trade_date: str,
353
+ prev_trade_date: str | None = None,
354
+ daily_lookback: int = 30,
355
+ moneyflow_lookback: int = 5,
356
+ max_float_mv_yi: float = 100.0,
357
+ max_close_yuan: float = 15.0,
358
+ force_sync: bool = False,
359
+ ) -> Round1Bundle:
360
+ """Assemble the R1 input bundle.
361
+
362
+ The flow:
363
+ 1. stock_basic (static) → main_board_filter()
364
+ 2. limit_list_d(T, limit='U') → join main_board → DROP if 0 candidates
365
+ (zero candidates is a LEGAL outcome — S4)
366
+ 2b. v0.4 — drop candidates whose 流通市值 ≥ ``max_float_mv_yi``
367
+ or 当前股价 ≥ ``max_close_yuan``; null in either field → drop
368
+ (conservative; thresholds owned by ``LubConfig``).
369
+ 3. stock_st(T) (REQUIRED) / suspend_d(T) (optional) → drop codes
370
+ 4. limit_list_ths(T) (optional) → bring in lu_desc, tag, suc_rate
371
+ 5. limit_cpt_list(T) (optional) → sector strength tier 1
372
+ 6. limit_step(T) (REQUIRED) — for global ladder distribution
373
+ 7. daily / daily_basic / moneyflow over T-N..T (B1.2): histories that
374
+ let the LLM see trend, turnover, market value, capital flow
375
+ 8. Build normalized prompt fields per candidate (raw → normalized via FIELD_UNITS_RAW)
376
+ """
377
+ bundle = Round1Bundle(trade_date=trade_date, next_trade_date=next_trade_date)
378
+ data_unavailable: list[str] = []
379
+
380
+ # 1. main board pool
381
+ stock_basic = tushare.call("stock_basic", force_sync=force_sync)
382
+ main_pool = main_board_filter(stock_basic)
383
+
384
+ # 2. limit-up rows (limit='U'); we filter by limit afterward in case the
385
+ # transport returns the full list_d.
386
+ limit_list_d = tushare.call(
387
+ "limit_list_d",
388
+ trade_date=trade_date,
389
+ params={"limit_type": "U"},
390
+ force_sync=force_sync,
391
+ )
392
+ if "limit" in limit_list_d.columns:
393
+ limit_list_d = limit_list_d[limit_list_d["limit"] == "U"]
394
+
395
+ # join on ts_code
396
+ if limit_list_d.empty:
397
+ bundle.candidates = []
398
+ return bundle # zero candidates: legal end state (S4)
399
+ candidates_df = limit_list_d.merge(
400
+ main_pool[["ts_code", "market", "exchange", "industry", "list_date"]].rename(
401
+ columns={"industry": "industry_basic"}
402
+ ),
403
+ on="ts_code",
404
+ how="inner",
405
+ )
406
+ if candidates_df.empty:
407
+ bundle.candidates = []
408
+ return bundle
409
+
410
+ # 2b. v0.4 — 流通市值 / 股价上限筛选(null → 过滤)。
411
+ candidates_df, market_filter_summary = _apply_market_filter(
412
+ candidates_df,
413
+ max_float_mv_yi=max_float_mv_yi,
414
+ max_close_yuan=max_close_yuan,
415
+ )
416
+ bundle.market_summary["candidate_filter_summary"] = market_filter_summary
417
+ if candidates_df.empty:
418
+ bundle.candidates = []
419
+ return bundle
420
+
421
+ # B1 — LHB (top_list / top_inst) — REQUIRED. Unauthorized must propagate.
422
+ # candidate 未上榜时 lhb_* 字段为 null(合法事实),不进 data_unavailable。
423
+ top_list_df = tushare.call("top_list", trade_date=trade_date, force_sync=force_sync)
424
+ top_inst_df = tushare.call("top_inst", trade_date=trade_date, force_sync=force_sync)
425
+
426
+ # B2 — cyq_perf (chip distribution) — REQUIRED.
427
+ # 单只 candidate 在返回中无记录 → 该 candidate.missing_data 写入 cyq 字段名(LLM 自动填)。
428
+ cyq_perf_df = tushare.call("cyq_perf", trade_date=trade_date, force_sync=force_sync)
429
+
430
+ # 3a. ST exclusion — REQUIRED. Unauthorized must propagate to the runner.
431
+ # Per DESIGN §11.1 + B1.3 fix: stock_st is in metadata.required → cannot
432
+ # be silently skipped; runner will mark the run failed.
433
+ st_df = tushare.call("stock_st", trade_date=trade_date, force_sync=force_sync)
434
+ st_codes = set(st_df["ts_code"].astype(str)) if not st_df.empty else set()
435
+ candidates_df = exclude_st(candidates_df, st_codes)
436
+
437
+ # 3b. Suspended exclusion — OPTIONAL. F-H3: catch all transient errors.
438
+ susp_df, susp_err = _try_optional(
439
+ tushare, "suspend_d", trade_date=trade_date, force_sync=force_sync
440
+ )
441
+ if susp_err:
442
+ data_unavailable.append(f"suspend_d ({susp_err})")
443
+ susp_codes: set[str] = set()
444
+ else:
445
+ susp_codes = set(susp_df["ts_code"].astype(str)) if not susp_df.empty else set()
446
+ candidates_df = exclude_suspended(candidates_df, susp_codes)
447
+
448
+ if candidates_df.empty:
449
+ bundle.candidates = []
450
+ return bundle
451
+
452
+ # 4. THS涨停榜 (optional). F-H3: catch all transient errors.
453
+ ths_df, ths_err = _try_optional(
454
+ tushare,
455
+ "limit_list_ths",
456
+ trade_date=trade_date,
457
+ params={"limit_type": "U"},
458
+ force_sync=force_sync,
459
+ )
460
+ if ths_err:
461
+ data_unavailable.append(f"limit_list_ths ({ths_err})")
462
+
463
+ # 5. concept ranking (optional). F-H3: same.
464
+ cpt_df, cpt_err = _try_optional(
465
+ tushare, "limit_cpt_list", trade_date=trade_date, force_sync=force_sync
466
+ )
467
+ if cpt_err:
468
+ data_unavailable.append(f"limit_cpt_list ({cpt_err})")
469
+
470
+ sector = resolve_sector_strength(
471
+ candidates=candidates_df,
472
+ limit_cpt_list=cpt_df,
473
+ limit_list_ths=ths_df,
474
+ )
475
+ bundle.sector_strength = sector
476
+
477
+ # 6. limit_step (required) — for global ladder distribution
478
+ step_df = tushare.call("limit_step", trade_date=trade_date, force_sync=force_sync)
479
+ today_step = _summarize_limit_step(step_df)
480
+ # update() (not reassign) to preserve candidate_filter_summary set in step 2b.
481
+ bundle.market_summary.update(
482
+ {
483
+ "limit_up_count": int(len(candidates_df)),
484
+ "limit_step_distribution": today_step,
485
+ }
486
+ )
487
+ # A2 — yesterday context: three keys (limit_step_trend / yesterday_failure_rate /
488
+ # yesterday_winners_today). Best-effort; sub-fetch failures degrade individual
489
+ # sections to null rather than failing the run.
490
+ if prev_trade_date is not None:
491
+ yctx, yctx_err = _collect_yesterday_context(
492
+ tushare,
493
+ trade_date=trade_date,
494
+ prev_trade_date=prev_trade_date,
495
+ today_step=today_step,
496
+ force_sync=force_sync,
497
+ )
498
+ bundle.market_summary.update(yctx)
499
+ if yctx_err:
500
+ data_unavailable.extend(yctx_err)
501
+
502
+ # 7. B1.2 — REQUIRED histories: daily / daily_basic / moneyflow over a window.
503
+ # Tushare returns ALL stocks for one trade_date in one call; we instead query
504
+ # by trade_date range so each ts_code's history is one slice of the result.
505
+ # Buffer ×2 (calendar-day basis) covers weekends/holidays so even a 30-day
506
+ # lookback (= ma20 + up_count_30d) reliably yields ≥30 trade rows.
507
+ candidate_codes = set(candidates_df["ts_code"].astype(str))
508
+ start_date = _shift_date(trade_date, -(daily_lookback * 2))
509
+ daily_df = _fetch_history_window(
510
+ tushare, "daily", start_date, trade_date, candidate_codes, force_sync=force_sync
511
+ )
512
+ daily_basic_df = _fetch_history_window(
513
+ tushare,
514
+ "daily_basic",
515
+ start_date,
516
+ trade_date,
517
+ candidate_codes,
518
+ force_sync=force_sync,
519
+ )
520
+ mf_start = _shift_date(trade_date, -(moneyflow_lookback + 5))
521
+ moneyflow_df = _fetch_history_window(
522
+ tushare,
523
+ "moneyflow",
524
+ mf_start,
525
+ trade_date,
526
+ candidate_codes,
527
+ force_sync=force_sync,
528
+ )
529
+
530
+ # 8. Build normalized rows
531
+ bundle.candidates = _build_candidate_rows(
532
+ candidates_df,
533
+ ths_df,
534
+ daily_df=daily_df,
535
+ daily_basic_df=daily_basic_df,
536
+ moneyflow_df=moneyflow_df,
537
+ top_list_df=top_list_df,
538
+ top_inst_df=top_inst_df,
539
+ cyq_perf_df=cyq_perf_df,
540
+ daily_lookback=daily_lookback,
541
+ moneyflow_lookback=moneyflow_lookback,
542
+ )
543
+ bundle.data_unavailable = data_unavailable
544
+
545
+ # B2.3 + F-M4 — Persist to business tables (DuckDB is the persistence layer
546
+ # per DESIGN). Errors don't fail the run (cache_blob still holds the data),
547
+ # but they DO surface via data_unavailable so users see them in the report.
548
+ materialize_errors = _materialize_business_tables(
549
+ tushare,
550
+ stock_basic=stock_basic,
551
+ limit_list_d=limit_list_d,
552
+ ths_df=ths_df,
553
+ daily_df=daily_df,
554
+ daily_basic_df=daily_basic_df,
555
+ moneyflow_df=moneyflow_df,
556
+ top_list_df=top_list_df,
557
+ top_inst_df=top_inst_df,
558
+ cyq_perf_df=cyq_perf_df,
559
+ )
560
+ if materialize_errors:
561
+ bundle.data_unavailable.extend(materialize_errors)
562
+ return bundle
563
+
564
+
565
+ def _materialize_business_tables(
566
+ tushare: TushareClient,
567
+ *,
568
+ stock_basic: pd.DataFrame,
569
+ limit_list_d: pd.DataFrame,
570
+ ths_df: pd.DataFrame | None,
571
+ daily_df: pd.DataFrame | None,
572
+ daily_basic_df: pd.DataFrame | None,
573
+ moneyflow_df: pd.DataFrame | None,
574
+ top_list_df: pd.DataFrame | None = None,
575
+ top_inst_df: pd.DataFrame | None = None,
576
+ cyq_perf_df: pd.DataFrame | None = None,
577
+ ) -> list[str]:
578
+ """B2.3 + F-M4 — write tushare frames into the named business tables.
579
+
580
+ Returns a list of error strings for any tables that failed to materialize.
581
+ Caller surfaces these via data_unavailable / events instead of silent log.
582
+ """
583
+ errors: list[str] = []
584
+
585
+ def _safe(table: str, df: pd.DataFrame, key_cols: list[str]) -> None:
586
+ if df is None or df.empty:
587
+ return
588
+ try:
589
+ tushare.materialize(table, df, key_cols=key_cols)
590
+ except Exception as e: # noqa: BLE001
591
+ msg = f"materialize:{table} ({type(e).__name__}: {e})"
592
+ logger.warning(msg)
593
+ errors.append(msg)
594
+
595
+ # All tables live under the lub_* prefix — this plugin owns its own
596
+ # copy of every tushare-derived business table (Plan A pure isolation).
597
+ _safe("lub_stock_basic", stock_basic, ["ts_code"])
598
+ _safe("lub_limit_list_d", limit_list_d, ["trade_date", "ts_code", "limit"])
599
+ _safe(
600
+ "lub_limit_ths",
601
+ ths_df if ths_df is not None else pd.DataFrame(),
602
+ ["trade_date", "ts_code", "limit_type"],
603
+ )
604
+ _safe(
605
+ "lub_daily",
606
+ daily_df if daily_df is not None else pd.DataFrame(),
607
+ ["ts_code", "trade_date"],
608
+ )
609
+ _safe(
610
+ "lub_daily_basic",
611
+ daily_basic_df if daily_basic_df is not None else pd.DataFrame(),
612
+ ["ts_code", "trade_date"],
613
+ )
614
+ _safe(
615
+ "lub_moneyflow",
616
+ moneyflow_df if moneyflow_df is not None else pd.DataFrame(),
617
+ ["ts_code", "trade_date"],
618
+ )
619
+ _safe(
620
+ "lub_top_list",
621
+ top_list_df if top_list_df is not None else pd.DataFrame(),
622
+ ["trade_date", "ts_code", "reason"],
623
+ )
624
+ _safe(
625
+ "lub_top_inst",
626
+ top_inst_df if top_inst_df is not None else pd.DataFrame(),
627
+ ["trade_date", "ts_code", "exalter", "side", "reason"],
628
+ )
629
+ _safe(
630
+ "lub_cyq_perf",
631
+ cyq_perf_df if cyq_perf_df is not None else pd.DataFrame(),
632
+ ["trade_date", "ts_code"],
633
+ )
634
+ return errors
635
+
636
+
637
+ def _shift_date(yyyymmdd: str, days: int) -> str:
638
+ """Naive ±days shift on YYYYMMDD (calendar days, not trade days). Adequate for
639
+ setting a tushare query upper bound; result is filtered by trade_cal anyway."""
640
+ from datetime import datetime as _dt
641
+ from datetime import timedelta as _td
642
+
643
+ d = _dt.strptime(yyyymmdd, "%Y%m%d") + _td(days=days)
644
+ return d.strftime("%Y%m%d")
645
+
646
+
647
+ def _fetch_history_window(
648
+ tushare: TushareClient,
649
+ api_name: str,
650
+ start_date: str,
651
+ end_date: str,
652
+ candidate_codes: set[str],
653
+ *,
654
+ force_sync: bool = False,
655
+ ) -> pd.DataFrame:
656
+ """Fetch (api_name) for [start_date, end_date]; filter to candidates."""
657
+ # tushare daily/daily_basic/moneyflow accept start_date/end_date for batch fetch.
658
+ df = tushare.call(
659
+ api_name,
660
+ params={"start_date": start_date, "end_date": end_date},
661
+ force_sync=force_sync,
662
+ )
663
+ if df is None or df.empty:
664
+ return pd.DataFrame()
665
+ if "ts_code" in df.columns and candidate_codes:
666
+ df = df[df["ts_code"].astype(str).isin(candidate_codes)]
667
+ return df.reset_index(drop=True)
668
+
669
+
670
+ # ---------------------------------------------------------------------------
671
+ # F-H3 — optional API wrapper
672
+ # ---------------------------------------------------------------------------
673
+
674
+
675
+ def _try_optional(
676
+ tushare: TushareClient, api_name: str, **kwargs: Any
677
+ ) -> tuple[pd.DataFrame, str | None]:
678
+ """Call an optional tushare API; on transient failure return (empty df, err msg).
679
+
680
+ Catches: TushareUnauthorizedError, TushareServerError, TushareRateLimitError.
681
+ Required APIs should NOT use this — they should propagate failure.
682
+ """
683
+ from deeptrade.core.tushare_client import ( # noqa: PLC0415
684
+ TushareRateLimitError,
685
+ TushareServerError,
686
+ )
687
+
688
+ try:
689
+ return tushare.call(api_name, **kwargs), None
690
+ except TushareUnauthorizedError as e:
691
+ return pd.DataFrame(), f"unauthorized: {e}"
692
+ except TushareServerError as e:
693
+ return pd.DataFrame(), f"server_error: {e}"
694
+ except TushareRateLimitError as e:
695
+ return pd.DataFrame(), f"rate_limited: {e}"
696
+
697
+
698
+ # ---------------------------------------------------------------------------
699
+ # A2 — yesterday-context (market sentiment three-pack)
700
+ # ---------------------------------------------------------------------------
701
+
702
+
703
+ def _collect_yesterday_context(
704
+ tushare: TushareClient,
705
+ *,
706
+ trade_date: str,
707
+ prev_trade_date: str,
708
+ today_step: dict[str, int],
709
+ force_sync: bool = False,
710
+ ) -> tuple[dict[str, Any], list[str]]:
711
+ """Fetch T-1 limit_step / limit_list_d + T daily, derive market sentiment summary.
712
+
713
+ Returns (market_summary_patch, errors). Sub-fetch failures degrade gracefully
714
+ (the corresponding section becomes null) and are reported in errors.
715
+ """
716
+ errors: list[str] = []
717
+
718
+ step_prev_df, err = _try_optional(
719
+ tushare, "limit_step", trade_date=prev_trade_date, force_sync=force_sync
720
+ )
721
+ if err:
722
+ errors.append(f"limit_step[T-1] ({err})")
723
+ step_prev = _summarize_limit_step(step_prev_df)
724
+
725
+ ll_prev_df, err = _try_optional(
726
+ tushare, "limit_list_d", trade_date=prev_trade_date, force_sync=force_sync
727
+ )
728
+ if err:
729
+ errors.append(f"limit_list_d[T-1] ({err})")
730
+
731
+ daily_t_df, err = _try_optional(
732
+ tushare, "daily", trade_date=trade_date, force_sync=force_sync
733
+ )
734
+ if err:
735
+ errors.append(f"daily[T] ({err})")
736
+
737
+ return {
738
+ "limit_step_distribution_prev": step_prev,
739
+ "limit_step_trend": _limit_step_trend(today_step, step_prev),
740
+ "yesterday_failure_rate": _yesterday_failure_rate(prev_trade_date, ll_prev_df),
741
+ "yesterday_winners_today": _yesterday_winners_today(
742
+ prev_trade_date, ll_prev_df, daily_t_df
743
+ ),
744
+ }, errors
745
+
746
+
747
+ def _max_height(step: dict[str, int]) -> int:
748
+ if not step:
749
+ return 0
750
+ keys: list[int] = []
751
+ for k in step:
752
+ try:
753
+ keys.append(int(k))
754
+ except (TypeError, ValueError):
755
+ continue
756
+ return max(keys) if keys else 0
757
+
758
+
759
+ def _limit_step_trend(today: dict[str, int], prev: dict[str, int]) -> dict[str, Any]:
760
+ today_max = _max_height(today)
761
+ prev_max = _max_height(prev)
762
+ today_total = sum(today.values())
763
+ prev_total = sum(prev.values())
764
+ high_delta = today_max - prev_max
765
+ total_delta = today_total - prev_total
766
+ if high_delta > 0 and total_delta > 0:
767
+ interp = "spectrum_lifting"
768
+ elif high_delta < 0 or total_delta < -10:
769
+ interp = "spectrum_collapsing"
770
+ else:
771
+ interp = "stable"
772
+ return {
773
+ "max_height": today_max,
774
+ "max_height_prev": prev_max,
775
+ "high_board_delta": high_delta,
776
+ "total_limit_up_delta": total_delta,
777
+ "interpretation": interp,
778
+ }
779
+
780
+
781
+ def _yesterday_failure_rate(
782
+ prev_trade_date: str, ll_prev_df: pd.DataFrame | None
783
+ ) -> dict[str, Any]:
784
+ if ll_prev_df is None or ll_prev_df.empty or "limit" not in ll_prev_df.columns:
785
+ return {
786
+ "trade_date_prev": prev_trade_date,
787
+ "u_count": 0,
788
+ "z_count": 0,
789
+ "rate_pct": None,
790
+ "interpretation": None,
791
+ }
792
+ u = int((ll_prev_df["limit"] == "U").sum())
793
+ z = int((ll_prev_df["limit"] == "Z").sum())
794
+ total = u + z
795
+ rate = round(z / total * 100, 2) if total > 0 else None
796
+ if rate is None:
797
+ interp: str | None = None
798
+ elif rate >= 25:
799
+ interp = "high"
800
+ elif rate <= 10:
801
+ interp = "low"
802
+ else:
803
+ interp = "moderate"
804
+ return {
805
+ "trade_date_prev": prev_trade_date,
806
+ "u_count": u,
807
+ "z_count": z,
808
+ "rate_pct": rate,
809
+ "interpretation": interp,
810
+ }
811
+
812
+
813
+ def _yesterday_winners_today(
814
+ prev_trade_date: str,
815
+ ll_prev_df: pd.DataFrame | None,
816
+ daily_t_df: pd.DataFrame | None,
817
+ ) -> dict[str, Any]:
818
+ if ll_prev_df is None or ll_prev_df.empty or "limit" not in ll_prev_df.columns:
819
+ return {
820
+ "trade_date_prev": prev_trade_date,
821
+ "n_winners": 0,
822
+ "n_continued_today": 0,
823
+ "continuation_rate_pct": None,
824
+ "n_negative_today": 0,
825
+ "avg_pct_chg_today": None,
826
+ "interpretation": None,
827
+ }
828
+ winners = ll_prev_df[ll_prev_df["limit"] == "U"]
829
+ n_winners = int(len(winners))
830
+ if n_winners == 0 or daily_t_df is None or daily_t_df.empty:
831
+ return {
832
+ "trade_date_prev": prev_trade_date,
833
+ "n_winners": n_winners,
834
+ "n_continued_today": 0,
835
+ "continuation_rate_pct": None,
836
+ "n_negative_today": 0,
837
+ "avg_pct_chg_today": None,
838
+ "interpretation": None,
839
+ }
840
+ winner_codes = set(winners["ts_code"].astype(str))
841
+ today_rows = daily_t_df[daily_t_df["ts_code"].astype(str).isin(winner_codes)]
842
+ if today_rows.empty:
843
+ return {
844
+ "trade_date_prev": prev_trade_date,
845
+ "n_winners": n_winners,
846
+ "n_continued_today": 0,
847
+ "continuation_rate_pct": None,
848
+ "n_negative_today": 0,
849
+ "avg_pct_chg_today": None,
850
+ "interpretation": None,
851
+ }
852
+ pct = today_rows["pct_chg"].dropna()
853
+ n_continued = int((pct >= 9.8).sum())
854
+ n_negative = int((pct < -2).sum())
855
+ avg = round(float(pct.mean()), 2) if not pct.empty else None
856
+ cont_rate = round(n_continued / n_winners * 100, 2) if n_winners > 0 else None
857
+
858
+ if cont_rate is None or avg is None:
859
+ interp: str | None = None
860
+ elif cont_rate >= 50 and avg >= 3:
861
+ interp = "strong_money_effect"
862
+ elif cont_rate <= 25 or avg <= 0:
863
+ interp = "weak_money_effect"
864
+ else:
865
+ interp = "neutral"
866
+
867
+ return {
868
+ "trade_date_prev": prev_trade_date,
869
+ "n_winners": n_winners,
870
+ "n_continued_today": n_continued,
871
+ "continuation_rate_pct": cont_rate,
872
+ "n_negative_today": n_negative,
873
+ "avg_pct_chg_today": avg,
874
+ "interpretation": interp,
875
+ }
876
+
877
+
878
+ def _summarize_limit_step(step_df: pd.DataFrame) -> dict[str, int]:
879
+ """Convert limit_step rows to a {board_height: count} mapping."""
880
+ if step_df is None or step_df.empty:
881
+ return {}
882
+ if "nums" not in step_df.columns:
883
+ return {}
884
+ counts = step_df.groupby("nums").size().to_dict()
885
+ return {str(k): int(v) for k, v in counts.items()}
886
+
887
+
888
+ # ---------------------------------------------------------------------------
889
+ # A1 derived factors (Phase A — pure compute, no new APIs)
890
+ # ---------------------------------------------------------------------------
891
+
892
+
893
+ def _amplitude_pct(daily_t_row: dict[str, Any] | None) -> float | None:
894
+ if not daily_t_row:
895
+ return None
896
+ high = _to_float(daily_t_row.get("high"))
897
+ low = _to_float(daily_t_row.get("low"))
898
+ pre_close = _to_float(daily_t_row.get("pre_close"))
899
+ if high is None or low is None or not pre_close:
900
+ return None
901
+ return round((high - low) / pre_close * 100, 2)
902
+
903
+
904
+ def _fd_amount_ratio(fd_amount: float | None, amount: float | None) -> float | None:
905
+ fd = _to_float(fd_amount)
906
+ amt = _to_float(amount)
907
+ if fd is None or not amt:
908
+ return None
909
+ return round(fd / amt * 100, 2)
910
+
911
+
912
+ def _to_float(v: Any) -> float | None:
913
+ if v is None or pd.isna(v):
914
+ return None
915
+ try:
916
+ return float(v)
917
+ except (TypeError, ValueError):
918
+ return None
919
+
920
+
921
+ def _ma_metrics(closes: list[float]) -> dict[str, float | bool | None]:
922
+ """Compute ma5/ma10/ma20 + ma_bull_aligned from a trailing-close list
923
+ (ascending by date, last element = T-day close).
924
+ Returns null for any window that has insufficient history."""
925
+ out: dict[str, float | bool | None] = {
926
+ "ma5": None,
927
+ "ma10": None,
928
+ "ma20": None,
929
+ "ma_bull_aligned": None,
930
+ }
931
+ if not closes:
932
+ return out
933
+
934
+ def _ma(window: int) -> float | None:
935
+ if len(closes) < window:
936
+ return None
937
+ return round(sum(closes[-window:]) / window, 2)
938
+
939
+ out["ma5"] = _ma(5)
940
+ out["ma10"] = _ma(10)
941
+ out["ma20"] = _ma(20)
942
+ if all(out[k] is not None for k in ("ma5", "ma10", "ma20")):
943
+ latest = closes[-1]
944
+ out["ma_bull_aligned"] = bool(
945
+ latest > out["ma5"] > out["ma10"] > out["ma20"] # type: ignore[operator]
946
+ )
947
+ return out
948
+
949
+
950
+ def _up_count_30d(d_hist: list[dict[str, Any]]) -> int | None:
951
+ """Count of trade days in the last 30 with pct_chg ≥ 9.8 (10cm main board)."""
952
+ if len(d_hist) < 30:
953
+ return None
954
+ recent = d_hist[-30:]
955
+ return sum(1 for r in recent if (r.get("pct_chg") or 0) >= 9.8)
956
+
957
+
958
+ def _trailing_closes(d_hist: list[dict[str, Any]]) -> list[float]:
959
+ out: list[float] = []
960
+ for r in d_hist:
961
+ c = r.get("close")
962
+ if c is None or pd.isna(c):
963
+ continue
964
+ out.append(float(c))
965
+ return out
966
+
967
+
968
+ def _build_cyq_lookup(cyq_df: pd.DataFrame | None) -> dict[str, dict[str, Any]]:
969
+ """Per-ts_code dict of derived chip-distribution fields."""
970
+ out: dict[str, dict[str, Any]] = {}
971
+ if cyq_df is None or cyq_df.empty or "ts_code" not in cyq_df.columns:
972
+ return out
973
+ for row in cyq_df.itertuples(index=False):
974
+ ts = str(row.ts_code)
975
+ weight_avg = _to_float(getattr(row, "weight_avg", None))
976
+ winner_rate = _to_float(getattr(row, "winner_rate", None))
977
+ cost_5 = _to_float(getattr(row, "cost_5pct", None))
978
+ cost_95 = _to_float(getattr(row, "cost_95pct", None))
979
+ out[ts] = {
980
+ "cyq_winner_pct": round(winner_rate, 2) if winner_rate is not None else None,
981
+ "cyq_avg_cost_yuan": round(weight_avg, 2) if weight_avg is not None else None,
982
+ "cyq_top10_concentration": _cyq_concentration(cost_5, cost_95, weight_avg),
983
+ }
984
+ return out
985
+
986
+
987
+ def _cyq_concentration(
988
+ cost_5: float | None, cost_95: float | None, weight_avg: float | None
989
+ ) -> float | None:
990
+ """Concentration score in [0, 100]; higher = chips more clustered around weight_avg.
991
+
992
+ Definition: 100 − (cost_95pct − cost_5pct) / weight_avg × 100.
993
+ A 90% chip-price spread of 30% of weight_avg yields concentration = 70.
994
+ """
995
+ if cost_5 is None or cost_95 is None or not weight_avg:
996
+ return None
997
+ spread_pct = (cost_95 - cost_5) / weight_avg * 100
998
+ return round(max(0.0, min(100.0, 100.0 - spread_pct)), 2)
999
+
1000
+
1001
+ def _close_to_avg_cost_pct(
1002
+ close: float | None, weight_avg: float | None
1003
+ ) -> float | None:
1004
+ if close is None or not weight_avg:
1005
+ return None
1006
+ return round((close - weight_avg) / weight_avg * 100, 2)
1007
+
1008
+
1009
+ def _famous_seats_hits(seats: list[str]) -> list[str]:
1010
+ """Return de-duplicated exalter strings whose substring matches any
1011
+ famous-seat hint (case-insensitive)."""
1012
+ out: list[str] = []
1013
+ seen: set[str] = set()
1014
+ hints_lower = tuple(h.lower() for h in FAMOUS_SEATS_HINTS)
1015
+ for s in seats:
1016
+ if not isinstance(s, str) or s in seen:
1017
+ continue
1018
+ sl = s.lower()
1019
+ if any(h in sl for h in hints_lower):
1020
+ out.append(s)
1021
+ seen.add(s)
1022
+ return out
1023
+
1024
+
1025
+ def _build_lhb_rollup(
1026
+ top_list_df: pd.DataFrame | None,
1027
+ top_inst_df: pd.DataFrame | None,
1028
+ ) -> dict[str, dict[str, Any]]:
1029
+ """Roll up top_list / top_inst into per-ts_code lhb_* fields.
1030
+
1031
+ Returns ``{ts_code: {lhb_net_buy_yi, lhb_inst_count, lhb_famous_seats}}``.
1032
+ Candidates absent from this map → 未上榜(lhb_* = null in their row)。
1033
+ """
1034
+ rollup: dict[str, dict[str, Any]] = {}
1035
+
1036
+ if top_list_df is not None and not top_list_df.empty and "ts_code" in top_list_df.columns:
1037
+ for row in top_list_df.itertuples(index=False):
1038
+ ts = str(row.ts_code)
1039
+ net = normalize_to_yi("net_amount", getattr(row, "net_amount", None))
1040
+ rollup.setdefault(ts, {})["lhb_net_buy_yi"] = net
1041
+
1042
+ if top_inst_df is not None and not top_inst_df.empty and "ts_code" in top_inst_df.columns:
1043
+ for ts, group in top_inst_df.groupby("ts_code"):
1044
+ ts_str = str(ts)
1045
+ seats = [str(e) for e in group["exalter"].tolist()] if "exalter" in group.columns else []
1046
+ entry = rollup.setdefault(ts_str, {})
1047
+ entry["lhb_inst_count"] = int(len(set(seats)))
1048
+ entry["lhb_famous_seats"] = _famous_seats_hits(seats)
1049
+
1050
+ return rollup
1051
+
1052
+
1053
+ def _build_candidate_rows(
1054
+ candidates_df: pd.DataFrame,
1055
+ ths_df: pd.DataFrame | None,
1056
+ *,
1057
+ daily_df: pd.DataFrame | None = None,
1058
+ daily_basic_df: pd.DataFrame | None = None,
1059
+ moneyflow_df: pd.DataFrame | None = None,
1060
+ top_list_df: pd.DataFrame | None = None,
1061
+ top_inst_df: pd.DataFrame | None = None,
1062
+ cyq_perf_df: pd.DataFrame | None = None,
1063
+ daily_lookback: int = 30,
1064
+ moneyflow_lookback: int = 5,
1065
+ ) -> list[dict[str, Any]]:
1066
+ """Project candidates to a list of dicts with raw + normalized fields + history.
1067
+
1068
+ B1.2 additions:
1069
+ prev_daily — last N daily rows: [(date, close, pct_chg, vol), ...]
1070
+ prev_moneyflow — last N moneyflow rows: [(date, net_mf_yi, buy_lg_yi, buy_elg_yi)]
1071
+ turnover_rate, volume_ratio, circ_mv_yi — from daily_basic on T
1072
+
1073
+ All numeric fields go through normalize_to_yi/wan with FIELD_UNITS_RAW for
1074
+ correct unit conversion (B3.1 / M6 fix).
1075
+ """
1076
+ if ths_df is not None and not ths_df.empty:
1077
+ ths_lookup = ths_df.set_index("ts_code").to_dict(orient="index")
1078
+ else:
1079
+ ths_lookup = {}
1080
+
1081
+ daily_by_code = _index_by_code(daily_df)
1082
+ daily_basic_by_code = _index_by_code(daily_basic_df)
1083
+ moneyflow_by_code = _index_by_code(moneyflow_df)
1084
+ lhb_rollup = _build_lhb_rollup(top_list_df, top_inst_df)
1085
+ cyq_lookup = _build_cyq_lookup(cyq_perf_df)
1086
+
1087
+ out: list[dict[str, Any]] = []
1088
+ for row in candidates_df.itertuples(index=False):
1089
+ ts_code = str(row.ts_code)
1090
+ fd_amount_raw = getattr(row, "fd_amount", None)
1091
+ amount_raw = getattr(row, "amount", None)
1092
+ rec = {
1093
+ "candidate_id": ts_code,
1094
+ "ts_code": ts_code,
1095
+ "name": getattr(row, "name", None),
1096
+ "industry": getattr(row, "industry_basic", None) or getattr(row, "industry", None),
1097
+ "first_time": getattr(row, "first_time", None),
1098
+ "last_time": getattr(row, "last_time", None),
1099
+ "open_times": _opt_int(getattr(row, "open_times", None)),
1100
+ "limit_times": _opt_int(getattr(row, "limit_times", None)),
1101
+ "up_stat": getattr(row, "up_stat", None),
1102
+ "pct_chg": round2(getattr(row, "pct_chg", None)),
1103
+ "close_yuan": round2(getattr(row, "close", None)),
1104
+ "turnover_ratio": round2(getattr(row, "turnover_ratio", None)),
1105
+ "fd_amount_yi": normalize_to_yi("fd_amount", fd_amount_raw),
1106
+ "limit_amount_yi": normalize_to_yi("limit_amount", getattr(row, "limit_amount", None)),
1107
+ "amount_yi": normalize_to_yi("amount", amount_raw),
1108
+ "total_mv_yi": normalize_to_yi("total_mv", getattr(row, "total_mv", None)),
1109
+ "float_mv_yi": normalize_to_yi("float_mv", getattr(row, "float_mv", None)),
1110
+ "fd_amount_ratio": _fd_amount_ratio(fd_amount_raw, amount_raw),
1111
+ }
1112
+ ths = ths_lookup.get(ts_code)
1113
+ if ths is not None:
1114
+ rec["lu_desc"] = ths.get("lu_desc")
1115
+ rec["tag"] = ths.get("tag")
1116
+ rec["limit_up_suc_rate"] = round2(ths.get("limit_up_suc_rate"))
1117
+ rec["free_float_yi"] = normalize_to_yi("free_float", ths.get("free_float"))
1118
+
1119
+ # B1.2 history attachments
1120
+ d_hist = daily_by_code.get(ts_code, [])
1121
+ if d_hist:
1122
+ rec["prev_daily"] = [
1123
+ {
1124
+ "date": r.get("trade_date"),
1125
+ "close": round2(r.get("close")),
1126
+ "pct_chg": round2(r.get("pct_chg")),
1127
+ "vol": _opt_int(r.get("vol")),
1128
+ }
1129
+ for r in d_hist[-daily_lookback:]
1130
+ ]
1131
+ rec["amplitude_pct"] = _amplitude_pct(d_hist[-1])
1132
+ rec.update(_ma_metrics(_trailing_closes(d_hist)))
1133
+ rec["up_count_30d"] = _up_count_30d(d_hist)
1134
+ else:
1135
+ rec["amplitude_pct"] = None
1136
+ rec["ma5"] = rec["ma10"] = rec["ma20"] = None
1137
+ rec["ma_bull_aligned"] = None
1138
+ rec["up_count_30d"] = None
1139
+ db_hist = daily_basic_by_code.get(ts_code, [])
1140
+ if db_hist:
1141
+ latest = db_hist[-1]
1142
+ rec["turnover_rate"] = round2(latest.get("turnover_rate"))
1143
+ rec["volume_ratio"] = round2(latest.get("volume_ratio"))
1144
+ rec["circ_mv_yi"] = normalize_to_yi("circ_mv", latest.get("circ_mv"))
1145
+ mf_hist = moneyflow_by_code.get(ts_code, [])
1146
+ if mf_hist:
1147
+ rec["prev_moneyflow"] = [
1148
+ {
1149
+ "date": r.get("trade_date"),
1150
+ "net_mf_yi": normalize_to_yi("net_mf_amount", r.get("net_mf_amount")),
1151
+ "buy_lg_yi": normalize_to_yi("buy_lg_amount", r.get("buy_lg_amount")),
1152
+ "buy_elg_yi": normalize_to_yi("buy_elg_amount", r.get("buy_elg_amount")),
1153
+ }
1154
+ for r in mf_hist[-moneyflow_lookback:]
1155
+ ]
1156
+ # B1 LHB roll-up — null when candidate didn't make the day's top_list
1157
+ # (合法事实,不进 missing_data,由 LLM 通过 null 判断"未上榜")
1158
+ lhb = lhb_rollup.get(ts_code, {})
1159
+ rec["lhb_net_buy_yi"] = lhb.get("lhb_net_buy_yi")
1160
+ rec["lhb_inst_count"] = lhb.get("lhb_inst_count")
1161
+ rec["lhb_famous_seats"] = lhb.get("lhb_famous_seats") or []
1162
+ # B2 cyq_perf — null when no row for this ts_code (LLM puts cyq_* in
1163
+ # candidate.missing_data via the standard prompt rule)
1164
+ cyq = cyq_lookup.get(ts_code, {})
1165
+ rec["cyq_winner_pct"] = cyq.get("cyq_winner_pct")
1166
+ rec["cyq_top10_concentration"] = cyq.get("cyq_top10_concentration")
1167
+ rec["cyq_avg_cost_yuan"] = cyq.get("cyq_avg_cost_yuan")
1168
+ rec["cyq_close_to_avg_cost_pct"] = _close_to_avg_cost_pct(
1169
+ _to_float(getattr(row, "close", None)),
1170
+ cyq.get("cyq_avg_cost_yuan"),
1171
+ )
1172
+ out.append(rec)
1173
+ return out
1174
+
1175
+
1176
+ def _index_by_code(df: pd.DataFrame | None) -> dict[str, list[dict[str, Any]]]:
1177
+ """Group a DataFrame by ts_code into ascending-by-trade_date row lists."""
1178
+ if df is None or df.empty or "ts_code" not in df.columns:
1179
+ return {}
1180
+ if "trade_date" in df.columns:
1181
+ df = df.sort_values("trade_date")
1182
+ out: dict[str, list[dict[str, Any]]] = {}
1183
+ for code, group in df.groupby("ts_code"):
1184
+ out[str(code)] = group.to_dict(orient="records")
1185
+ return out
1186
+
1187
+
1188
+ def _opt_int(v: Any) -> int | None:
1189
+ if v is None or pd.isna(v):
1190
+ return None
1191
+ return int(v)