deeptrade-quant 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deeptrade/__init__.py +8 -0
- deeptrade/channels_builtin/__init__.py +0 -0
- deeptrade/channels_builtin/stdout/__init__.py +0 -0
- deeptrade/channels_builtin/stdout/deeptrade_plugin.yaml +25 -0
- deeptrade/channels_builtin/stdout/migrations/20260429_001_init.sql +13 -0
- deeptrade/channels_builtin/stdout/stdout_channel/__init__.py +0 -0
- deeptrade/channels_builtin/stdout/stdout_channel/channel.py +180 -0
- deeptrade/cli.py +214 -0
- deeptrade/cli_config.py +396 -0
- deeptrade/cli_data.py +33 -0
- deeptrade/cli_plugin.py +176 -0
- deeptrade/core/__init__.py +8 -0
- deeptrade/core/config.py +344 -0
- deeptrade/core/config_migrations.py +138 -0
- deeptrade/core/db.py +176 -0
- deeptrade/core/llm_client.py +591 -0
- deeptrade/core/llm_manager.py +174 -0
- deeptrade/core/logging_config.py +61 -0
- deeptrade/core/migrations/__init__.py +0 -0
- deeptrade/core/migrations/core/20260427_001_init.sql +121 -0
- deeptrade/core/migrations/core/20260501_002_drop_llm_calls_stage.sql +10 -0
- deeptrade/core/migrations/core/__init__.py +0 -0
- deeptrade/core/notifier.py +302 -0
- deeptrade/core/paths.py +49 -0
- deeptrade/core/plugin_manager.py +616 -0
- deeptrade/core/run_status.py +29 -0
- deeptrade/core/secrets.py +152 -0
- deeptrade/core/tushare_client.py +824 -0
- deeptrade/plugins_api/__init__.py +44 -0
- deeptrade/plugins_api/base.py +66 -0
- deeptrade/plugins_api/channel.py +42 -0
- deeptrade/plugins_api/events.py +61 -0
- deeptrade/plugins_api/llm.py +46 -0
- deeptrade/plugins_api/metadata.py +84 -0
- deeptrade/plugins_api/notify.py +67 -0
- deeptrade/strategies_builtin/__init__.py +0 -0
- deeptrade/strategies_builtin/limit_up_board/__init__.py +0 -0
- deeptrade/strategies_builtin/limit_up_board/deeptrade_plugin.yaml +101 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/__init__.py +0 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/calendar.py +65 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/cli.py +269 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/config.py +76 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/data.py +1191 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/pipeline.py +869 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/plugin.py +30 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/profiles.py +85 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/prompts.py +485 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/render.py +890 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/runner.py +1087 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/runtime.py +172 -0
- deeptrade/strategies_builtin/limit_up_board/limit_up_board/schemas.py +178 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260430_001_init.sql +150 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260501_002_lub_stage_results_llm_provider.sql +8 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_001_lub_lhb_tables.sql +36 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_002_lub_cyq_perf.sql +18 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_003_lub_lhb_pk_fix.sql +46 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_004_lub_lhb_drop_pk.sql +53 -0
- deeptrade/strategies_builtin/limit_up_board/migrations/20260508_005_lub_config.sql +17 -0
- deeptrade/strategies_builtin/volume_anomaly/__init__.py +0 -0
- deeptrade/strategies_builtin/volume_anomaly/deeptrade_plugin.yaml +59 -0
- deeptrade/strategies_builtin/volume_anomaly/migrations/20260430_001_init.sql +94 -0
- deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_001_realized_returns.sql +44 -0
- deeptrade/strategies_builtin/volume_anomaly/migrations/20260601_002_dimension_scores.sql +13 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/__init__.py +0 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/calendar.py +52 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/cli.py +247 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/data.py +2154 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/pipeline.py +327 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/plugin.py +22 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/profiles.py +49 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts.py +187 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/prompts_examples.py +84 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/render.py +906 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runner.py +772 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/runtime.py +90 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/schemas.py +97 -0
- deeptrade/strategies_builtin/volume_anomaly/volume_anomaly/stats.py +174 -0
- deeptrade/theme.py +48 -0
- deeptrade_quant-0.0.2.dist-info/METADATA +166 -0
- deeptrade_quant-0.0.2.dist-info/RECORD +83 -0
- deeptrade_quant-0.0.2.dist-info/WHEEL +4 -0
- deeptrade_quant-0.0.2.dist-info/entry_points.txt +2 -0
- deeptrade_quant-0.0.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,2154 @@
|
|
|
1
|
+
"""Data layer for the volume-anomaly strategy.
|
|
2
|
+
|
|
3
|
+
Two distinct data flows:
|
|
4
|
+
screen_anomalies(...) — apply local rules to find new anomaly hits on T
|
|
5
|
+
collect_analyze_bundle(...) — read watchlist + assemble per-stock context for LLM
|
|
6
|
+
|
|
7
|
+
Reuses limit_up_board's main_board_filter / FIELD_UNITS_RAW conventions where
|
|
8
|
+
sensible but does NOT import from limit_up_board (plugins are self-contained).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import math
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime, time, timedelta
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from deeptrade.core.tushare_client import (
|
|
23
|
+
TushareClient,
|
|
24
|
+
TushareUnauthorizedError,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from .calendar import TradeCalendar
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Step 0 — resolve trade date (mirrors limit_up_board behaviour)
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def resolve_trade_date(
|
|
38
|
+
now_dt: datetime,
|
|
39
|
+
calendar: TradeCalendar,
|
|
40
|
+
*,
|
|
41
|
+
user_specified: str | None = None,
|
|
42
|
+
allow_intraday: bool = False,
|
|
43
|
+
close_after: time = time(18, 0),
|
|
44
|
+
) -> tuple[str, str]:
|
|
45
|
+
"""Return (T, T+1).
|
|
46
|
+
|
|
47
|
+
T defaults to the most recent CLOSED trade day:
|
|
48
|
+
* if today is open AND now ≥ close_after → today
|
|
49
|
+
* if today is open AND allow_intraday → today (intraday banner)
|
|
50
|
+
* else → pretrade_date(today)
|
|
51
|
+
"""
|
|
52
|
+
if user_specified:
|
|
53
|
+
T = user_specified
|
|
54
|
+
return T, calendar.next_open(T)
|
|
55
|
+
|
|
56
|
+
today = now_dt.strftime("%Y%m%d")
|
|
57
|
+
today_is_open = calendar.is_open(today)
|
|
58
|
+
if today_is_open and (now_dt.time() >= close_after or allow_intraday):
|
|
59
|
+
T = today
|
|
60
|
+
elif today_is_open:
|
|
61
|
+
T = calendar.pretrade_date(today)
|
|
62
|
+
else:
|
|
63
|
+
T = calendar.pretrade_date(today)
|
|
64
|
+
return T, calendar.next_open(T)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Main board filter
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main_board_filter(stock_basic: pd.DataFrame) -> pd.DataFrame:
|
|
73
|
+
"""Keep only Shanghai/Shenzhen MAIN board, listed.
|
|
74
|
+
|
|
75
|
+
Excludes ChiNext (300xxx), STAR (688xxx), BSE (8xxxxx), CDR.
|
|
76
|
+
"""
|
|
77
|
+
if "market" not in stock_basic.columns or "exchange" not in stock_basic.columns:
|
|
78
|
+
raise ValueError("stock_basic missing market/exchange columns")
|
|
79
|
+
df = stock_basic[
|
|
80
|
+
(stock_basic["market"] == "主板") & (stock_basic["exchange"].isin(["SSE", "SZSE"]))
|
|
81
|
+
].copy()
|
|
82
|
+
if "list_status" in df.columns:
|
|
83
|
+
df = df[df["list_status"] == "L"]
|
|
84
|
+
return df.reset_index(drop=True)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Optional API wrapper (transient failure → empty df + reason string)
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _try_optional(
|
|
93
|
+
tushare: TushareClient, api_name: str, **kwargs: Any
|
|
94
|
+
) -> tuple[pd.DataFrame, str | None]:
|
|
95
|
+
"""Call an optional tushare API; on transient failure return (empty df, err)."""
|
|
96
|
+
from deeptrade.core.tushare_client import ( # noqa: PLC0415
|
|
97
|
+
TushareRateLimitError,
|
|
98
|
+
TushareServerError,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
return tushare.call(api_name, **kwargs), None
|
|
103
|
+
except TushareUnauthorizedError as e:
|
|
104
|
+
return pd.DataFrame(), f"unauthorized: {e}"
|
|
105
|
+
except TushareServerError as e:
|
|
106
|
+
return pd.DataFrame(), f"server_error: {e}"
|
|
107
|
+
except TushareRateLimitError as e:
|
|
108
|
+
return pd.DataFrame(), f"rate_limited: {e}"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ---------------------------------------------------------------------------
|
|
112
|
+
# Unit normalizers (per-field; tushare units are heterogeneous)
|
|
113
|
+
# ---------------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
FIELD_UNITS_RAW: dict[str, str] = {
|
|
117
|
+
# daily.amount is 千元; daily.vol is 手 (handled separately)
|
|
118
|
+
"amount_daily": "千元",
|
|
119
|
+
# daily_basic
|
|
120
|
+
"circ_mv": "万元",
|
|
121
|
+
"total_mv": "万元",
|
|
122
|
+
"free_share": "万股",
|
|
123
|
+
"float_share": "万股",
|
|
124
|
+
"total_share": "万股",
|
|
125
|
+
# moneyflow (all amounts in 万元)
|
|
126
|
+
"net_mf_amount": "万元",
|
|
127
|
+
"buy_lg_amount": "万元",
|
|
128
|
+
"buy_elg_amount": "万元",
|
|
129
|
+
"buy_md_amount": "万元",
|
|
130
|
+
"buy_sm_amount": "万元",
|
|
131
|
+
"sell_lg_amount": "万元",
|
|
132
|
+
"sell_elg_amount": "万元",
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def normalize_to_yi(field: str, raw_value: float | None) -> float | None:
|
|
137
|
+
if raw_value is None or pd.isna(raw_value):
|
|
138
|
+
return None
|
|
139
|
+
unit = FIELD_UNITS_RAW.get(field, "元")
|
|
140
|
+
if unit == "元":
|
|
141
|
+
factor = 1e8
|
|
142
|
+
elif unit == "万元":
|
|
143
|
+
factor = 1e4
|
|
144
|
+
elif unit == "千元":
|
|
145
|
+
factor = 1e5
|
|
146
|
+
else:
|
|
147
|
+
return None
|
|
148
|
+
return round(float(raw_value) / factor, 2)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def round2(v: float | None) -> float | None:
|
|
152
|
+
if v is None or pd.isna(v):
|
|
153
|
+
return None
|
|
154
|
+
return round(float(v), 2)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _opt_int(v: Any) -> int | None:
|
|
158
|
+
if v is None or pd.isna(v):
|
|
159
|
+
return None
|
|
160
|
+
return int(v)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _normalize_id_cols(df: pd.DataFrame | None) -> pd.DataFrame | None:
|
|
164
|
+
"""Coerce identifier columns to str so cross-frame sort/compare is stable.
|
|
165
|
+
|
|
166
|
+
The tushare-on-disk JSON cache widens "20260428" → 20260428 on round-trip;
|
|
167
|
+
if some rows come fresh from the SDK (str) and others from the cache (int),
|
|
168
|
+
pandas .sort_values()/comparisons raise:
|
|
169
|
+
TypeError: '<' not supported between instances of 'int' and 'str'
|
|
170
|
+
"""
|
|
171
|
+
if df is None or df.empty:
|
|
172
|
+
return df
|
|
173
|
+
df = df.copy()
|
|
174
|
+
for col in ("trade_date", "ts_code", "cal_date"):
|
|
175
|
+
if col in df.columns:
|
|
176
|
+
df[col] = df[col].astype(str)
|
|
177
|
+
return df
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _shift_calendar_days(yyyymmdd: str, days: int) -> str:
|
|
181
|
+
"""Naive ±days shift on YYYYMMDD (calendar days, not trade days)."""
|
|
182
|
+
d = datetime.strptime(yyyymmdd, "%Y%m%d") + timedelta(days=days)
|
|
183
|
+
return d.strftime("%Y%m%d")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _calendar_days_between(earlier: str, later: str) -> int:
|
|
187
|
+
"""Calendar-day diff (later - earlier) on YYYYMMDD strings; negative if reversed."""
|
|
188
|
+
d1 = datetime.strptime(earlier, "%Y%m%d")
|
|
189
|
+
d2 = datetime.strptime(later, "%Y%m%d")
|
|
190
|
+
return (d2 - d1).days
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# SCREEN MODE — anomaly screening rules
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Default lookback window (kept module-level so analyze mode can re-use it
|
|
199
|
+
# without depending on the screen-only ScreenRules dataclass).
|
|
200
|
+
RULE_LOOKBACK_TRADE_DAYS = 60 # ~3 months
|
|
201
|
+
|
|
202
|
+
# v0.4.0 P1-3 — T+N realized-return evaluation horizons. F6 decision: keep as a
|
|
203
|
+
# module-level constant rather than introducing a new `va_config` table.
|
|
204
|
+
EVALUATE_HORIZONS: tuple[int, ...] = (1, 3, 5, 10)
|
|
205
|
+
EVALUATE_DEFAULT_LOOKBACK_DAYS: int = 30
|
|
206
|
+
EVALUATE_MAX_HORIZON: int = max(EVALUATE_HORIZONS)
|
|
207
|
+
EVALUATE_WINDOW_5D = 5
|
|
208
|
+
EVALUATE_WINDOW_10D = 10
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# v0.3.0 P0-2 — default circ_mv-bucketed turnover thresholds.
|
|
212
|
+
# Each tuple is (circ_mv_yi_max, turnover_min, turnover_max). The first bucket
|
|
213
|
+
# whose `max` is ≥ the candidate's circ_mv_yi (亿元) wins; boundary values fall
|
|
214
|
+
# into the smaller bucket (E4 — `circ_mv_yi ≤ bucket_max`).
|
|
215
|
+
DEFAULT_TURNOVER_BUCKETS: list[tuple[float, float, float]] = [
|
|
216
|
+
(50.0, 5.0, 15.0), # ≤ 50亿 — 微盘
|
|
217
|
+
(200.0, 3.5, 12.0), # 50–200亿 — 中小盘
|
|
218
|
+
(1000.0, 2.5, 9.0), # 200–1000亿 — 中盘
|
|
219
|
+
(math.inf, 1.5, 6.0), # > 1000亿 — 大盘
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _bucket_label(bucket_max: float, prev_max: float) -> str:
|
|
224
|
+
"""Render a human-readable bucket label like "≤50亿" / "50-200亿" / ">1000亿"."""
|
|
225
|
+
if prev_max <= 0:
|
|
226
|
+
return f"≤{int(bucket_max)}亿"
|
|
227
|
+
if math.isinf(bucket_max):
|
|
228
|
+
return f">{int(prev_max)}亿"
|
|
229
|
+
return f"{int(prev_max)}-{int(bucket_max)}亿"
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _resolve_turnover_bucket(
|
|
233
|
+
circ_mv_yi: float, buckets: list[tuple[float, float, float]]
|
|
234
|
+
) -> tuple[int, str, float, float]:
|
|
235
|
+
"""Return (idx, label, t_min, t_max) for the first bucket where circ_mv_yi ≤ max."""
|
|
236
|
+
prev_max = 0.0
|
|
237
|
+
for idx, (b_max, t_min, t_max) in enumerate(buckets):
|
|
238
|
+
if circ_mv_yi <= b_max:
|
|
239
|
+
return idx, _bucket_label(b_max, prev_max), t_min, t_max
|
|
240
|
+
prev_max = b_max
|
|
241
|
+
# Past the last bucket (only possible if last bucket isn't math.inf —
|
|
242
|
+
# ScreenRules.__post_init__ guards against that, but be defensive).
|
|
243
|
+
last_max, t_min, t_max = buckets[-1]
|
|
244
|
+
return len(buckets) - 1, _bucket_label(last_max, prev_max), t_min, t_max
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
@dataclass
|
|
248
|
+
class ScreenRules:
|
|
249
|
+
"""User-tunable screening thresholds.
|
|
250
|
+
|
|
251
|
+
Plan A (v0.2): turnover_max raised 7 → 10 — empirically the dominant
|
|
252
|
+
bottleneck on real funnel data.
|
|
253
|
+
Plan B (v0.2): vol rule split into "short-window must be max" OR
|
|
254
|
+
"long-window top-N", because strict 60d-max disqualifies any stock
|
|
255
|
+
that happened to have a single大量 day in the past 3 months.
|
|
256
|
+
Plan C (v0.2): all knobs collected by configure() at runtime.
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
pct_chg_min: float = 5.0
|
|
260
|
+
pct_chg_max: float = 8.0
|
|
261
|
+
body_ratio_min: float = 0.6
|
|
262
|
+
turnover_min: float = 3.0
|
|
263
|
+
turnover_max: float = 10.0 # Plan A — was 7.0
|
|
264
|
+
vol_ratio_5d_min: float = 2.0
|
|
265
|
+
# Plan B — vol passes if EITHER:
|
|
266
|
+
# (a) vol_t == max(vol over last `vol_max_short_window` trade days) OR
|
|
267
|
+
# (b) vol_t is among the top `vol_top_n_long` over `lookback_trade_days`
|
|
268
|
+
vol_max_short_window: int = 30
|
|
269
|
+
vol_top_n_long: int = 3
|
|
270
|
+
lookback_trade_days: int = RULE_LOOKBACK_TRADE_DAYS
|
|
271
|
+
# P0 H2 — minimum fraction of `lookback_trade_days` a stock must have
|
|
272
|
+
# in its history before vol-rule evaluation. Stocks with less are
|
|
273
|
+
# surfaced in `insufficient_history` rather than silently passing through.
|
|
274
|
+
min_history_coverage: float = 0.8
|
|
275
|
+
# P2 L3 — apply adj_factor-based forward-volume adjustment to historical
|
|
276
|
+
# vol so that vol_max comparisons stay valid across splits/送转 events.
|
|
277
|
+
# Falls back to raw vol when adj_factor is unavailable (with a diagnostic).
|
|
278
|
+
vol_adjust: bool = True
|
|
279
|
+
# v0.3.0 P0-1 — drop hits whose upper shadow exceeds this fraction of the
|
|
280
|
+
# day's range (避雷针 / 长上影). None disables the filter entirely.
|
|
281
|
+
upper_shadow_ratio_max: float | None = 0.35
|
|
282
|
+
# v0.3.0 P0-2 — circ_mv-bucketed (turnover_min, turnover_max). Each entry is
|
|
283
|
+
# (circ_mv_yi_max, turnover_min, turnover_max); the first bucket where
|
|
284
|
+
# circ_mv_yi ≤ max wins. None falls back to the global turnover_min/max.
|
|
285
|
+
turnover_buckets: list[tuple[float, float, float]] | None = field(
|
|
286
|
+
default_factory=lambda: list(DEFAULT_TURNOVER_BUCKETS)
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
def __post_init__(self) -> None:
|
|
290
|
+
"""P1 L2 — fail loud on impossible threshold combos.
|
|
291
|
+
|
|
292
|
+
These checks run at construction (defaults / from_dict / explicit),
|
|
293
|
+
so misconfigured runs surface a ValueError immediately rather than
|
|
294
|
+
silently producing 0 hits.
|
|
295
|
+
"""
|
|
296
|
+
if not (0 <= self.pct_chg_min <= self.pct_chg_max):
|
|
297
|
+
raise ValueError(
|
|
298
|
+
f"invalid pct_chg range [{self.pct_chg_min}, {self.pct_chg_max}] "
|
|
299
|
+
"(require 0 ≤ min ≤ max)"
|
|
300
|
+
)
|
|
301
|
+
if not (0 <= self.turnover_min <= self.turnover_max):
|
|
302
|
+
raise ValueError(
|
|
303
|
+
f"invalid turnover range [{self.turnover_min}, {self.turnover_max}] "
|
|
304
|
+
"(require 0 ≤ min ≤ max)"
|
|
305
|
+
)
|
|
306
|
+
if not (0.0 <= self.body_ratio_min <= 1.0):
|
|
307
|
+
raise ValueError(
|
|
308
|
+
f"body_ratio_min must be in [0, 1], got {self.body_ratio_min}"
|
|
309
|
+
)
|
|
310
|
+
if self.vol_ratio_5d_min < 0:
|
|
311
|
+
raise ValueError(f"vol_ratio_5d_min must be ≥ 0, got {self.vol_ratio_5d_min}")
|
|
312
|
+
if self.vol_max_short_window <= 0:
|
|
313
|
+
raise ValueError(
|
|
314
|
+
f"vol_max_short_window must be > 0, got {self.vol_max_short_window}"
|
|
315
|
+
)
|
|
316
|
+
if self.vol_top_n_long <= 0:
|
|
317
|
+
raise ValueError(f"vol_top_n_long must be > 0, got {self.vol_top_n_long}")
|
|
318
|
+
if self.lookback_trade_days < 6:
|
|
319
|
+
# 6 = 5 prev-day window + the T row itself
|
|
320
|
+
raise ValueError(
|
|
321
|
+
f"lookback_trade_days must be ≥ 6 (5 prev + T), got {self.lookback_trade_days}"
|
|
322
|
+
)
|
|
323
|
+
if self.vol_max_short_window > self.lookback_trade_days:
|
|
324
|
+
raise ValueError(
|
|
325
|
+
f"vol_max_short_window ({self.vol_max_short_window}) must be ≤ "
|
|
326
|
+
f"lookback_trade_days ({self.lookback_trade_days})"
|
|
327
|
+
)
|
|
328
|
+
if not (0.0 < self.min_history_coverage <= 1.0):
|
|
329
|
+
raise ValueError(
|
|
330
|
+
f"min_history_coverage must be in (0, 1], got {self.min_history_coverage}"
|
|
331
|
+
)
|
|
332
|
+
if self.upper_shadow_ratio_max is not None and not (
|
|
333
|
+
0.0 < self.upper_shadow_ratio_max <= 1.0
|
|
334
|
+
):
|
|
335
|
+
raise ValueError(
|
|
336
|
+
f"upper_shadow_ratio_max must be in (0, 1] or None, "
|
|
337
|
+
f"got {self.upper_shadow_ratio_max}"
|
|
338
|
+
)
|
|
339
|
+
if self.turnover_buckets is not None:
|
|
340
|
+
if not self.turnover_buckets:
|
|
341
|
+
raise ValueError("turnover_buckets, if set, must be non-empty")
|
|
342
|
+
prev_max = float("-inf")
|
|
343
|
+
for entry in self.turnover_buckets:
|
|
344
|
+
if not isinstance(entry, tuple) or len(entry) != 3:
|
|
345
|
+
raise ValueError(
|
|
346
|
+
f"each turnover_buckets entry must be a 3-tuple "
|
|
347
|
+
f"(circ_mv_yi_max, turnover_min, turnover_max); got {entry}"
|
|
348
|
+
)
|
|
349
|
+
b_max, t_min, t_max = entry
|
|
350
|
+
if b_max <= prev_max:
|
|
351
|
+
raise ValueError(
|
|
352
|
+
f"turnover_buckets circ_mv_yi_max must be strictly increasing; "
|
|
353
|
+
f"{prev_max} → {b_max}"
|
|
354
|
+
)
|
|
355
|
+
if not (0 <= t_min <= t_max):
|
|
356
|
+
raise ValueError(
|
|
357
|
+
f"turnover_buckets entry has invalid turnover range "
|
|
358
|
+
f"[{t_min}, {t_max}] (require 0 ≤ min ≤ max)"
|
|
359
|
+
)
|
|
360
|
+
prev_max = b_max
|
|
361
|
+
|
|
362
|
+
@classmethod
|
|
363
|
+
def defaults(cls) -> ScreenRules:
|
|
364
|
+
return cls()
|
|
365
|
+
|
|
366
|
+
@classmethod
|
|
367
|
+
def from_dict(cls, d: dict[str, Any] | None) -> ScreenRules:
|
|
368
|
+
"""Build from a partial dict (configure() output); missing keys → default."""
|
|
369
|
+
if not d:
|
|
370
|
+
return cls.defaults()
|
|
371
|
+
type_hints: dict[str, type] = {
|
|
372
|
+
"pct_chg_min": float,
|
|
373
|
+
"pct_chg_max": float,
|
|
374
|
+
"body_ratio_min": float,
|
|
375
|
+
"turnover_min": float,
|
|
376
|
+
"turnover_max": float,
|
|
377
|
+
"vol_ratio_5d_min": float,
|
|
378
|
+
"vol_max_short_window": int,
|
|
379
|
+
"vol_top_n_long": int,
|
|
380
|
+
"lookback_trade_days": int,
|
|
381
|
+
"min_history_coverage": float,
|
|
382
|
+
}
|
|
383
|
+
defaults = cls.defaults()
|
|
384
|
+
kwargs: dict[str, Any] = {}
|
|
385
|
+
for name, ty in type_hints.items():
|
|
386
|
+
v = d.get(name)
|
|
387
|
+
kwargs[name] = ty(v) if v is not None else getattr(defaults, name)
|
|
388
|
+
# vol_adjust handled separately so we don't rely on bool(str) (which
|
|
389
|
+
# is True for non-empty strings — an easy footgun for "false").
|
|
390
|
+
if "vol_adjust" in d and d["vol_adjust"] is not None:
|
|
391
|
+
v = d["vol_adjust"]
|
|
392
|
+
if isinstance(v, str):
|
|
393
|
+
kwargs["vol_adjust"] = v.strip().lower() in {"1", "true", "t", "yes", "y"}
|
|
394
|
+
else:
|
|
395
|
+
kwargs["vol_adjust"] = bool(v)
|
|
396
|
+
# v0.3.0 P0-1 — `upper_shadow_ratio_max`: explicit `null` → disable filter;
|
|
397
|
+
# missing key → keep default (0.35).
|
|
398
|
+
if "upper_shadow_ratio_max" in d:
|
|
399
|
+
v = d["upper_shadow_ratio_max"]
|
|
400
|
+
kwargs["upper_shadow_ratio_max"] = float(v) if v is not None else None
|
|
401
|
+
# v0.3.0 P0-2 — `turnover_buckets`: accept list-of-list (JSON has no tuple);
|
|
402
|
+
# explicit `null` → fall back to global turnover_min/max; missing key →
|
|
403
|
+
# keep default DEFAULT_TURNOVER_BUCKETS. The first element of any entry
|
|
404
|
+
# may be `null` to mean "no upper bound" (math.inf).
|
|
405
|
+
if "turnover_buckets" in d:
|
|
406
|
+
raw = d["turnover_buckets"]
|
|
407
|
+
if raw is None:
|
|
408
|
+
kwargs["turnover_buckets"] = None
|
|
409
|
+
else:
|
|
410
|
+
parsed: list[tuple[float, float, float]] = []
|
|
411
|
+
for entry in raw:
|
|
412
|
+
if len(entry) != 3:
|
|
413
|
+
raise ValueError(
|
|
414
|
+
f"each turnover_buckets entry must have 3 elements, got {entry}"
|
|
415
|
+
)
|
|
416
|
+
b_max_raw, t_min, t_max = entry
|
|
417
|
+
b_max = math.inf if b_max_raw is None else float(b_max_raw)
|
|
418
|
+
parsed.append((b_max, float(t_min), float(t_max)))
|
|
419
|
+
kwargs["turnover_buckets"] = parsed
|
|
420
|
+
return cls(**kwargs)
|
|
421
|
+
|
|
422
|
+
def as_dict(self) -> dict[str, Any]:
|
|
423
|
+
from dataclasses import asdict as _asdict # noqa: PLC0415
|
|
424
|
+
|
|
425
|
+
out = _asdict(self)
|
|
426
|
+
# JSON has no `inf`; round-trip-friendly form mirrors what `from_dict`
|
|
427
|
+
# accepts (`null` for an unbounded last bucket).
|
|
428
|
+
if self.turnover_buckets is not None:
|
|
429
|
+
out["turnover_buckets"] = [
|
|
430
|
+
[None if math.isinf(b_max) else b_max, t_min, t_max]
|
|
431
|
+
for (b_max, t_min, t_max) in self.turnover_buckets
|
|
432
|
+
]
|
|
433
|
+
return out
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
@dataclass
|
|
437
|
+
class ScreenDiagnostics:
|
|
438
|
+
"""P0 — observable data-completeness counters surfaced in the report.
|
|
439
|
+
|
|
440
|
+
Populated by `screen_anomalies` regardless of outcome so the user can
|
|
441
|
+
自证 that no silent degradation happened on this run.
|
|
442
|
+
"""
|
|
443
|
+
|
|
444
|
+
# Step 1
|
|
445
|
+
stock_basic_rows: int = 0
|
|
446
|
+
main_board_rows: int = 0
|
|
447
|
+
# Step 2
|
|
448
|
+
stock_st_count: int = 0
|
|
449
|
+
stock_st_status: str = "ok" # 'ok' | 'empty' (suspicious) | 'error: ...'
|
|
450
|
+
suspend_d_count: int = 0
|
|
451
|
+
suspend_d_status: str = "ok"
|
|
452
|
+
# Step 3
|
|
453
|
+
daily_t_total_rows: int = 0
|
|
454
|
+
daily_t_main_board_rows: int = 0 # ts_codes intersected with main_codes
|
|
455
|
+
# Step 4
|
|
456
|
+
daily_basic_t_total_rows: int = 0
|
|
457
|
+
daily_basic_t_main_board_rows: int = 0
|
|
458
|
+
daily_basic_status: str = "ok"
|
|
459
|
+
turnover_missing_codes: list[str] = field(default_factory=list)
|
|
460
|
+
n_turnover_missing: int = 0
|
|
461
|
+
# Step 5 (history window)
|
|
462
|
+
history_window_planned_days: int = 0
|
|
463
|
+
history_window_actual_days: int = 0
|
|
464
|
+
history_window_missing_dates: list[str] = field(default_factory=list)
|
|
465
|
+
history_min_required_days: int = 0
|
|
466
|
+
insufficient_history: list[dict[str, Any]] = field(default_factory=list)
|
|
467
|
+
# P2 L3 — adj_factor coverage; surfaces whether vol-adjust ran on full data,
|
|
468
|
+
# degraded to raw vol for some codes, or was disabled.
|
|
469
|
+
vol_adjust_enabled: bool = False
|
|
470
|
+
vol_adjust_status: str = "disabled" # 'ok' | 'disabled' | 'degraded: ...'
|
|
471
|
+
adj_factor_planned_days: int = 0
|
|
472
|
+
adj_factor_actual_days: int = 0
|
|
473
|
+
adj_factor_missing_dates: list[str] = field(default_factory=list)
|
|
474
|
+
adj_factor_missing_codes: list[str] = field(default_factory=list)
|
|
475
|
+
# v0.3.0 P0-1 — upper-shadow filter; `enabled=False` when rules disable it.
|
|
476
|
+
upper_shadow_filter_enabled: bool = False
|
|
477
|
+
upper_shadow_filter_threshold: float | None = None
|
|
478
|
+
n_after_upper_shadow: int = 0
|
|
479
|
+
# v0.3.0 P0-2 — circ_mv-bucketed turnover bookkeeping.
|
|
480
|
+
turnover_buckets_enabled: bool = False
|
|
481
|
+
turnover_bucket_hits: dict[str, int] = field(default_factory=dict)
|
|
482
|
+
n_missing_circ_mv: int = 0
|
|
483
|
+
circ_mv_missing_codes: list[str] = field(default_factory=list)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@dataclass
|
|
487
|
+
class ScreenResult:
|
|
488
|
+
"""Outcome of a screen pass."""
|
|
489
|
+
|
|
490
|
+
trade_date: str
|
|
491
|
+
n_main_board: int
|
|
492
|
+
n_after_st_susp: int
|
|
493
|
+
n_after_t_day_rules: int # pct_chg + body_ratio
|
|
494
|
+
n_after_upper_shadow: int # v0.3.0 P0-1
|
|
495
|
+
n_after_turnover: int
|
|
496
|
+
n_after_vol_rules: int # vol_ratio_5d + dual vol rule
|
|
497
|
+
rules: ScreenRules = field(default_factory=ScreenRules.defaults)
|
|
498
|
+
diagnostics: ScreenDiagnostics = field(default_factory=ScreenDiagnostics)
|
|
499
|
+
hits: list[dict[str, Any]] = field(default_factory=list)
|
|
500
|
+
data_unavailable: list[str] = field(default_factory=list)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def screen_anomalies(
|
|
504
|
+
*,
|
|
505
|
+
tushare: TushareClient,
|
|
506
|
+
calendar: TradeCalendar,
|
|
507
|
+
trade_date: str,
|
|
508
|
+
rules: ScreenRules | None = None,
|
|
509
|
+
force_sync: bool = False,
|
|
510
|
+
) -> ScreenResult:
|
|
511
|
+
"""Apply the local screening rules and return matched candidates.
|
|
512
|
+
|
|
513
|
+
Pipeline (cheapest filter first):
|
|
514
|
+
1. stock_basic → main board pool
|
|
515
|
+
2. stock_st(T) → drop ST; suspend_d(T) → drop suspended
|
|
516
|
+
3. daily(T) → keep阳线 + pct_chg in [pct_chg_min, pct_chg_max]
|
|
517
|
+
+ body_ratio ≥ body_ratio_min
|
|
518
|
+
4. daily_basic(T) → keep turnover_rate in [turnover_min, turnover_max]
|
|
519
|
+
5. daily(N-trade-day window) → keep
|
|
520
|
+
(vol_t == max(vol_max_short_window) OR
|
|
521
|
+
vol_t in top vol_top_n_long over lookback_trade_days)
|
|
522
|
+
AND vol_t ≥ vol_ratio_5d_min × mean(prev 5d)
|
|
523
|
+
"""
|
|
524
|
+
rules = rules or ScreenRules.defaults()
|
|
525
|
+
data_unavailable: list[str] = []
|
|
526
|
+
diag = ScreenDiagnostics()
|
|
527
|
+
# v0.3.0 P0-1 / P0-2 — surface whether each new filter is engaged this run.
|
|
528
|
+
diag.upper_shadow_filter_enabled = rules.upper_shadow_ratio_max is not None
|
|
529
|
+
diag.upper_shadow_filter_threshold = rules.upper_shadow_ratio_max
|
|
530
|
+
diag.turnover_buckets_enabled = rules.turnover_buckets is not None
|
|
531
|
+
|
|
532
|
+
# 1. main board pool
|
|
533
|
+
stock_basic = tushare.call("stock_basic", force_sync=force_sync)
|
|
534
|
+
diag.stock_basic_rows = int(len(stock_basic)) if stock_basic is not None else 0
|
|
535
|
+
main_pool = main_board_filter(stock_basic)
|
|
536
|
+
main_codes = set(main_pool["ts_code"].astype(str))
|
|
537
|
+
n_main = len(main_codes)
|
|
538
|
+
diag.main_board_rows = n_main
|
|
539
|
+
|
|
540
|
+
# 2a. ST exclusion (REQUIRED — propagate auth failure)
|
|
541
|
+
st_df = tushare.call("stock_st", trade_date=trade_date, force_sync=force_sync)
|
|
542
|
+
st_codes = set(st_df["ts_code"].astype(str)) if not st_df.empty else set()
|
|
543
|
+
diag.stock_st_count = len(st_codes)
|
|
544
|
+
if not st_codes:
|
|
545
|
+
# P0 M2 — A股每日 ST 数稳定在 100+;返空一定是数据异常,应警示
|
|
546
|
+
diag.stock_st_status = "empty (suspicious — verify data freshness)"
|
|
547
|
+
data_unavailable.append(
|
|
548
|
+
"stock_st(T) returned 0 ST codes — abnormal for A股, "
|
|
549
|
+
"ST stocks may have leaked into candidates; verify data freshness"
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# 2b. suspended exclusion (OPTIONAL)
|
|
553
|
+
susp_df, susp_err = _try_optional(
|
|
554
|
+
tushare, "suspend_d", trade_date=trade_date, force_sync=force_sync
|
|
555
|
+
)
|
|
556
|
+
if susp_err:
|
|
557
|
+
data_unavailable.append(f"suspend_d ({susp_err})")
|
|
558
|
+
diag.suspend_d_status = susp_err
|
|
559
|
+
susp_codes = set(susp_df["ts_code"].astype(str)) if susp_df is not None and not susp_df.empty else set()
|
|
560
|
+
diag.suspend_d_count = len(susp_codes)
|
|
561
|
+
|
|
562
|
+
eligible = main_codes - st_codes - susp_codes
|
|
563
|
+
n_after_st = len(eligible)
|
|
564
|
+
|
|
565
|
+
# 3. T-day daily — single API call returns all stocks for that date
|
|
566
|
+
daily_t_full = tushare.call("daily", trade_date=trade_date, force_sync=force_sync)
|
|
567
|
+
daily_t_full = _normalize_id_cols(daily_t_full)
|
|
568
|
+
if daily_t_full is None or daily_t_full.empty:
|
|
569
|
+
data_unavailable.append("daily(T) returned empty")
|
|
570
|
+
return ScreenResult(
|
|
571
|
+
trade_date=trade_date,
|
|
572
|
+
n_main_board=n_main,
|
|
573
|
+
n_after_st_susp=n_after_st,
|
|
574
|
+
n_after_t_day_rules=0,
|
|
575
|
+
n_after_upper_shadow=0,
|
|
576
|
+
n_after_turnover=0,
|
|
577
|
+
n_after_vol_rules=0,
|
|
578
|
+
rules=rules,
|
|
579
|
+
diagnostics=diag,
|
|
580
|
+
data_unavailable=data_unavailable,
|
|
581
|
+
)
|
|
582
|
+
diag.daily_t_total_rows = int(len(daily_t_full))
|
|
583
|
+
diag.daily_t_main_board_rows = int(
|
|
584
|
+
daily_t_full["ts_code"].astype(str).isin(main_codes).sum()
|
|
585
|
+
)
|
|
586
|
+
daily_t = daily_t_full[daily_t_full["ts_code"].astype(str).isin(eligible)].copy()
|
|
587
|
+
|
|
588
|
+
# T-day阳线 + 实体占比 + 涨幅区间
|
|
589
|
+
daily_t["body"] = daily_t["close"] - daily_t["open"]
|
|
590
|
+
daily_t["range"] = (daily_t["high"] - daily_t["low"]).clip(lower=1e-9)
|
|
591
|
+
daily_t["body_ratio"] = daily_t["body"] / daily_t["range"]
|
|
592
|
+
# v0.3.0 P0-1 — upper shadow as a fraction of the day's range.
|
|
593
|
+
# = (high − max(open, close)) / range; pure upper wick → 1.0.
|
|
594
|
+
daily_t["upper_shadow_ratio"] = (
|
|
595
|
+
daily_t["high"] - daily_t[["open", "close"]].max(axis=1)
|
|
596
|
+
) / daily_t["range"]
|
|
597
|
+
t_day_hits = daily_t[
|
|
598
|
+
(daily_t["close"] > daily_t["open"])
|
|
599
|
+
& (daily_t["body_ratio"] >= rules.body_ratio_min)
|
|
600
|
+
& (daily_t["pct_chg"] >= rules.pct_chg_min)
|
|
601
|
+
& (daily_t["pct_chg"] <= rules.pct_chg_max)
|
|
602
|
+
].copy()
|
|
603
|
+
n_after_t_rules = len(t_day_hits)
|
|
604
|
+
if t_day_hits.empty:
|
|
605
|
+
return ScreenResult(
|
|
606
|
+
trade_date=trade_date,
|
|
607
|
+
n_main_board=n_main,
|
|
608
|
+
n_after_st_susp=n_after_st,
|
|
609
|
+
n_after_t_day_rules=0,
|
|
610
|
+
n_after_upper_shadow=0,
|
|
611
|
+
n_after_turnover=0,
|
|
612
|
+
n_after_vol_rules=0,
|
|
613
|
+
rules=rules,
|
|
614
|
+
diagnostics=diag,
|
|
615
|
+
data_unavailable=data_unavailable,
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
# v0.3.0 P0-1 — upper-shadow filter (skipped when threshold is None).
|
|
619
|
+
if rules.upper_shadow_ratio_max is not None:
|
|
620
|
+
t_day_hits = t_day_hits[
|
|
621
|
+
t_day_hits["upper_shadow_ratio"] <= rules.upper_shadow_ratio_max
|
|
622
|
+
].copy()
|
|
623
|
+
n_after_upper_shadow = len(t_day_hits)
|
|
624
|
+
diag.n_after_upper_shadow = n_after_upper_shadow
|
|
625
|
+
if t_day_hits.empty:
|
|
626
|
+
return ScreenResult(
|
|
627
|
+
trade_date=trade_date,
|
|
628
|
+
n_main_board=n_main,
|
|
629
|
+
n_after_st_susp=n_after_st,
|
|
630
|
+
n_after_t_day_rules=n_after_t_rules,
|
|
631
|
+
n_after_upper_shadow=0,
|
|
632
|
+
n_after_turnover=0,
|
|
633
|
+
n_after_vol_rules=0,
|
|
634
|
+
rules=rules,
|
|
635
|
+
diagnostics=diag,
|
|
636
|
+
data_unavailable=data_unavailable,
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
# 4. daily_basic — turnover_rate (+ circ_mv for v0.3.0 bucketing) filter
|
|
640
|
+
db_t = tushare.call("daily_basic", trade_date=trade_date, force_sync=force_sync)
|
|
641
|
+
db_t = _normalize_id_cols(db_t)
|
|
642
|
+
db_lookup: dict[str, dict[str, Any]] = {}
|
|
643
|
+
if db_t is not None and not db_t.empty and "turnover_rate" in db_t.columns:
|
|
644
|
+
cols = ["turnover_rate"]
|
|
645
|
+
if "circ_mv" in db_t.columns:
|
|
646
|
+
cols.append("circ_mv")
|
|
647
|
+
db_lookup = db_t.set_index("ts_code")[cols].to_dict("index")
|
|
648
|
+
diag.daily_basic_t_total_rows = int(len(db_t))
|
|
649
|
+
diag.daily_basic_t_main_board_rows = int(
|
|
650
|
+
db_t["ts_code"].astype(str).isin(main_codes).sum()
|
|
651
|
+
)
|
|
652
|
+
else:
|
|
653
|
+
diag.daily_basic_status = "empty"
|
|
654
|
+
data_unavailable.append("daily_basic.turnover_rate (frame empty)")
|
|
655
|
+
t_day_hits["turnover_rate"] = t_day_hits["ts_code"].map(
|
|
656
|
+
lambda c: db_lookup.get(c, {}).get("turnover_rate")
|
|
657
|
+
)
|
|
658
|
+
# v0.3.0 P0-2 — circ_mv lookup (亿元 via normalize_to_yi).
|
|
659
|
+
t_day_hits["circ_mv_yi"] = t_day_hits["ts_code"].map(
|
|
660
|
+
lambda c: normalize_to_yi("circ_mv", db_lookup.get(c, {}).get("circ_mv"))
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
# P0 M1 — surface candidates whose turnover_rate lookup returned NaN.
|
|
664
|
+
# They will be silently dropped by the comparison below; we make that visible.
|
|
665
|
+
missing_mask = t_day_hits["turnover_rate"].isna()
|
|
666
|
+
n_missing_turnover = int(missing_mask.sum())
|
|
667
|
+
diag.n_turnover_missing = n_missing_turnover
|
|
668
|
+
if n_missing_turnover > 0:
|
|
669
|
+
miss_codes = t_day_hits.loc[missing_mask, "ts_code"].astype(str).tolist()
|
|
670
|
+
diag.turnover_missing_codes = miss_codes
|
|
671
|
+
sample = miss_codes[:5]
|
|
672
|
+
ellipsis = "..." if n_missing_turnover > 5 else ""
|
|
673
|
+
data_unavailable.append(
|
|
674
|
+
f"daily_basic.turnover_rate missing for {n_missing_turnover} candidates "
|
|
675
|
+
f"(silently dropped at turnover step): {sample}{ellipsis}"
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
# v0.3.0 P0-2 — bucket lookup. circ_mv missing → fall back to global thresholds.
|
|
679
|
+
buckets = rules.turnover_buckets
|
|
680
|
+
bucket_label_per_row: dict[Any, str | None] = {}
|
|
681
|
+
bucket_hit_counter: dict[str, int] = {}
|
|
682
|
+
circ_mv_missing_codes: list[str] = []
|
|
683
|
+
|
|
684
|
+
def _row_passes_turnover(row: Any) -> bool:
|
|
685
|
+
tr = row.turnover_rate
|
|
686
|
+
if pd.isna(tr):
|
|
687
|
+
return False
|
|
688
|
+
circ = row.circ_mv_yi
|
|
689
|
+
if buckets is None or circ is None or pd.isna(circ):
|
|
690
|
+
t_min, t_max = rules.turnover_min, rules.turnover_max
|
|
691
|
+
label = None
|
|
692
|
+
if buckets is not None and (circ is None or pd.isna(circ)):
|
|
693
|
+
circ_mv_missing_codes.append(str(row.ts_code))
|
|
694
|
+
else:
|
|
695
|
+
_, label, t_min, t_max = _resolve_turnover_bucket(float(circ), buckets)
|
|
696
|
+
bucket_label_per_row[row.Index] = label
|
|
697
|
+
return t_min <= tr <= t_max
|
|
698
|
+
|
|
699
|
+
# We need pandas Index access — use `itertuples(index=True)` and rebuild filter mask.
|
|
700
|
+
keep_mask = []
|
|
701
|
+
for row in t_day_hits.itertuples(index=True):
|
|
702
|
+
keep_mask.append(_row_passes_turnover(row))
|
|
703
|
+
turnover_hits = t_day_hits.loc[keep_mask].copy()
|
|
704
|
+
turnover_hits["turnover_bucket"] = turnover_hits.index.map(
|
|
705
|
+
lambda i: bucket_label_per_row.get(i)
|
|
706
|
+
)
|
|
707
|
+
# Tally bucket distribution among rows that PASSED the filter.
|
|
708
|
+
for label in turnover_hits["turnover_bucket"].tolist():
|
|
709
|
+
if label is None:
|
|
710
|
+
bucket_hit_counter["fallback (no circ_mv)"] = (
|
|
711
|
+
bucket_hit_counter.get("fallback (no circ_mv)", 0) + 1
|
|
712
|
+
)
|
|
713
|
+
else:
|
|
714
|
+
bucket_hit_counter[label] = bucket_hit_counter.get(label, 0) + 1
|
|
715
|
+
diag.turnover_bucket_hits = bucket_hit_counter
|
|
716
|
+
diag.n_missing_circ_mv = len(circ_mv_missing_codes)
|
|
717
|
+
diag.circ_mv_missing_codes = circ_mv_missing_codes
|
|
718
|
+
if circ_mv_missing_codes:
|
|
719
|
+
sample = circ_mv_missing_codes[:5]
|
|
720
|
+
ellipsis = "..." if len(circ_mv_missing_codes) > 5 else ""
|
|
721
|
+
data_unavailable.append(
|
|
722
|
+
f"daily_basic.circ_mv missing for {len(circ_mv_missing_codes)} candidates "
|
|
723
|
+
f"(fell back to global turnover thresholds): {sample}{ellipsis}"
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
n_after_turnover = len(turnover_hits)
|
|
727
|
+
if turnover_hits.empty:
|
|
728
|
+
return ScreenResult(
|
|
729
|
+
trade_date=trade_date,
|
|
730
|
+
n_main_board=n_main,
|
|
731
|
+
n_after_st_susp=n_after_st,
|
|
732
|
+
n_after_t_day_rules=n_after_t_rules,
|
|
733
|
+
n_after_upper_shadow=n_after_upper_shadow,
|
|
734
|
+
n_after_turnover=0,
|
|
735
|
+
n_after_vol_rules=0,
|
|
736
|
+
rules=rules,
|
|
737
|
+
diagnostics=diag,
|
|
738
|
+
data_unavailable=data_unavailable,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
# 5. N-trade-day vol history for surviving codes (Plan B dual rule)
|
|
742
|
+
survivor_codes = set(turnover_hits["ts_code"].astype(str))
|
|
743
|
+
history_dates = _last_n_trade_dates(calendar, trade_date, rules.lookback_trade_days)
|
|
744
|
+
diag.history_window_planned_days = len(history_dates)
|
|
745
|
+
|
|
746
|
+
# P0 H1 — capture which planned dates returned empty (silent skip → visible).
|
|
747
|
+
history_df, missing_history_dates = _fetch_daily_history_by_date(
|
|
748
|
+
tushare, history_dates, survivor_codes, force_sync=force_sync
|
|
749
|
+
)
|
|
750
|
+
diag.history_window_actual_days = len(history_dates) - len(missing_history_dates)
|
|
751
|
+
diag.history_window_missing_dates = missing_history_dates
|
|
752
|
+
if missing_history_dates:
|
|
753
|
+
sample = missing_history_dates[:5]
|
|
754
|
+
ellipsis = "..." if len(missing_history_dates) > 5 else ""
|
|
755
|
+
data_unavailable.append(
|
|
756
|
+
f"daily history missing on {len(missing_history_dates)}/"
|
|
757
|
+
f"{len(history_dates)} planned days "
|
|
758
|
+
f"(vol_max comparison weakened): {sample}{ellipsis}"
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
# P2 L3 — fetch adj_factor over the same window so vol_max comparisons
|
|
762
|
+
# stay valid across splits/送转. Falls back to raw vol when unavailable.
|
|
763
|
+
adj_factor_lookup, adj_factor_T_lookup = _build_adj_factor_lookups(
|
|
764
|
+
tushare,
|
|
765
|
+
history_dates,
|
|
766
|
+
survivor_codes,
|
|
767
|
+
trade_date=trade_date,
|
|
768
|
+
rules=rules,
|
|
769
|
+
diag=diag,
|
|
770
|
+
data_unavailable=data_unavailable,
|
|
771
|
+
force_sync=force_sync,
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
# P1 L1 — pre-compute the strict 5 trade-dates immediately preceding T.
|
|
775
|
+
# `prior.tail(5)` was permissive: it would happily take any 5 surviving rows,
|
|
776
|
+
# so a stock with gaps could end up averaging vol over a span > 5 trade days.
|
|
777
|
+
expected_prev5_dates = (
|
|
778
|
+
history_dates[-6:-1] if len(history_dates) >= 6 else history_dates[:-1]
|
|
779
|
+
)
|
|
780
|
+
expected_prev5_set = set(expected_prev5_dates)
|
|
781
|
+
|
|
782
|
+
# P0 H2 — enforce minimum history coverage; record stocks that fail
|
|
783
|
+
required_days = max(6, int(rules.lookback_trade_days * rules.min_history_coverage))
|
|
784
|
+
diag.history_min_required_days = required_days
|
|
785
|
+
insufficient_history: list[dict[str, Any]] = []
|
|
786
|
+
|
|
787
|
+
final_hits: list[dict[str, Any]] = []
|
|
788
|
+
industry_lookup = main_pool.set_index("ts_code")[["name", "industry"]].to_dict(orient="index")
|
|
789
|
+
for row in turnover_hits.itertuples(index=False):
|
|
790
|
+
code = str(row.ts_code)
|
|
791
|
+
h = history_df[history_df["ts_code"].astype(str) == code].sort_values("trade_date")
|
|
792
|
+
if len(h) < required_days:
|
|
793
|
+
insufficient_history.append(
|
|
794
|
+
{
|
|
795
|
+
"ts_code": code,
|
|
796
|
+
"name": industry_lookup.get(code, {}).get("name"),
|
|
797
|
+
"available_days": int(len(h)),
|
|
798
|
+
"required_days": required_days,
|
|
799
|
+
"lookback_window": rules.lookback_trade_days,
|
|
800
|
+
}
|
|
801
|
+
)
|
|
802
|
+
continue
|
|
803
|
+
# Identify T row + prev 5 days (excluding T)
|
|
804
|
+
t_row = h[h["trade_date"].astype(str) == trade_date]
|
|
805
|
+
prior = h[h["trade_date"].astype(str) < trade_date]
|
|
806
|
+
if t_row.empty or len(prior) < 5:
|
|
807
|
+
insufficient_history.append(
|
|
808
|
+
{
|
|
809
|
+
"ts_code": code,
|
|
810
|
+
"name": industry_lookup.get(code, {}).get("name"),
|
|
811
|
+
"available_days": int(len(h)),
|
|
812
|
+
"required_days": required_days,
|
|
813
|
+
"lookback_window": rules.lookback_trade_days,
|
|
814
|
+
"reason": "missing T-row or <5 prior days",
|
|
815
|
+
}
|
|
816
|
+
)
|
|
817
|
+
continue
|
|
818
|
+
|
|
819
|
+
# P1 L1 — strict prev-5 trade-day filter: require all 5 calendar
|
|
820
|
+
# positions (history_dates[-6:-1]) to be present, else surface as
|
|
821
|
+
# insufficient_history rather than averaging over a sparse span.
|
|
822
|
+
prior_5d_strict = prior[prior["trade_date"].astype(str).isin(expected_prev5_set)]
|
|
823
|
+
if len(prior_5d_strict) < 5:
|
|
824
|
+
insufficient_history.append(
|
|
825
|
+
{
|
|
826
|
+
"ts_code": code,
|
|
827
|
+
"name": industry_lookup.get(code, {}).get("name"),
|
|
828
|
+
"available_days": int(len(h)),
|
|
829
|
+
"required_days": required_days,
|
|
830
|
+
"lookback_window": rules.lookback_trade_days,
|
|
831
|
+
"reason": (
|
|
832
|
+
f"missing prev-5d trade dates "
|
|
833
|
+
f"(have {len(prior_5d_strict)}/5 of {sorted(expected_prev5_set)})"
|
|
834
|
+
),
|
|
835
|
+
}
|
|
836
|
+
)
|
|
837
|
+
continue
|
|
838
|
+
|
|
839
|
+
# P2 L3 — adj_factor-aware vol values. When vol_adjust is enabled and
|
|
840
|
+
# f_T is available, compute forward-adjusted vol so that a 1:N split
|
|
841
|
+
# between d and T inflates pre-split vol by N (= adj_T / adj_d) and
|
|
842
|
+
# historical vol stays comparable to T-day vol. Falls back to raw vol
|
|
843
|
+
# silently per-row when adj_factor is missing for that row.
|
|
844
|
+
f_T = adj_factor_T_lookup.get(code)
|
|
845
|
+
if rules.vol_adjust and f_T is not None and f_T > 0:
|
|
846
|
+
def _adj(d: str, raw: float) -> float:
|
|
847
|
+
f_d = adj_factor_lookup.get((code, d))
|
|
848
|
+
if f_d is None or f_d <= 0:
|
|
849
|
+
return raw
|
|
850
|
+
return raw * (f_T / f_d)
|
|
851
|
+
vol_t = float(t_row.iloc[0]["vol"]) # at T, f_d == f_T → no change
|
|
852
|
+
vols_long = [
|
|
853
|
+
_adj(str(td), float(v))
|
|
854
|
+
for td, v in zip(
|
|
855
|
+
h["trade_date"].astype(str).tolist(),
|
|
856
|
+
h["vol"].astype(float).tolist(),
|
|
857
|
+
strict=False,
|
|
858
|
+
)
|
|
859
|
+
]
|
|
860
|
+
short_h = h.tail(rules.vol_max_short_window)
|
|
861
|
+
vols_short = [
|
|
862
|
+
_adj(str(td), float(v))
|
|
863
|
+
for td, v in zip(
|
|
864
|
+
short_h["trade_date"].astype(str).tolist(),
|
|
865
|
+
short_h["vol"].astype(float).tolist(),
|
|
866
|
+
strict=False,
|
|
867
|
+
)
|
|
868
|
+
]
|
|
869
|
+
vol_mean_prev5 = float(
|
|
870
|
+
pd.Series(
|
|
871
|
+
[
|
|
872
|
+
_adj(str(td), float(v))
|
|
873
|
+
for td, v in zip(
|
|
874
|
+
prior_5d_strict["trade_date"].astype(str).tolist(),
|
|
875
|
+
prior_5d_strict["vol"].astype(float).tolist(),
|
|
876
|
+
strict=False,
|
|
877
|
+
)
|
|
878
|
+
]
|
|
879
|
+
).mean()
|
|
880
|
+
)
|
|
881
|
+
else:
|
|
882
|
+
vol_t = float(t_row.iloc[0]["vol"])
|
|
883
|
+
vols_long = h["vol"].astype(float).tolist()
|
|
884
|
+
vols_short = [
|
|
885
|
+
float(v) for v in h.tail(rules.vol_max_short_window)["vol"].tolist()
|
|
886
|
+
]
|
|
887
|
+
vol_mean_prev5 = float(prior_5d_strict["vol"].astype(float).mean())
|
|
888
|
+
vol_max_long = max(vols_long)
|
|
889
|
+
vol_max_short = max(vols_short)
|
|
890
|
+
|
|
891
|
+
# Plan B — vol passes if either condition holds
|
|
892
|
+
short_window_max_pass = vol_t >= vol_max_short - 1e-9
|
|
893
|
+
days_with_higher_vol = sum(1 for v in vols_long if v > vol_t + 1e-9)
|
|
894
|
+
long_window_top_n_pass = days_with_higher_vol < rules.vol_top_n_long
|
|
895
|
+
if not (short_window_max_pass or long_window_top_n_pass):
|
|
896
|
+
continue
|
|
897
|
+
|
|
898
|
+
# vol_ratio_5d ≥ rules.vol_ratio_5d_min
|
|
899
|
+
if vol_mean_prev5 <= 0:
|
|
900
|
+
continue
|
|
901
|
+
vol_ratio_5d = vol_t / vol_mean_prev5
|
|
902
|
+
if vol_ratio_5d < rules.vol_ratio_5d_min:
|
|
903
|
+
continue
|
|
904
|
+
|
|
905
|
+
meta = industry_lookup.get(code, {})
|
|
906
|
+
final_hits.append(
|
|
907
|
+
{
|
|
908
|
+
"ts_code": code,
|
|
909
|
+
"name": meta.get("name"),
|
|
910
|
+
"industry": meta.get("industry"),
|
|
911
|
+
"trade_date": trade_date,
|
|
912
|
+
"pct_chg": round2(row.pct_chg),
|
|
913
|
+
"open": round2(row.open),
|
|
914
|
+
"high": round2(row.high),
|
|
915
|
+
"low": round2(row.low),
|
|
916
|
+
"close": round2(row.close),
|
|
917
|
+
"vol": round2(row.vol),
|
|
918
|
+
"amount": round2(row.amount),
|
|
919
|
+
"body_ratio": round2(row.body_ratio),
|
|
920
|
+
"upper_shadow_ratio": round2(getattr(row, "upper_shadow_ratio", None)),
|
|
921
|
+
"turnover_rate": round2(row.turnover_rate),
|
|
922
|
+
"circ_mv_yi": round2(getattr(row, "circ_mv_yi", None)),
|
|
923
|
+
"turnover_bucket": getattr(row, "turnover_bucket", None),
|
|
924
|
+
"vol_ratio_5d": round2(vol_ratio_5d),
|
|
925
|
+
"vol_rank_in_long_window": days_with_higher_vol + 1,
|
|
926
|
+
"max_vol_short_window": round2(vol_max_short),
|
|
927
|
+
"max_vol_long_window": round2(vol_max_long),
|
|
928
|
+
"history_days_used": int(len(h)),
|
|
929
|
+
# Legacy-named column populated by upsert_watchlist /
|
|
930
|
+
# append_anomaly_history. Holds the long-window max regardless
|
|
931
|
+
# of the actual lookback_trade_days setting.
|
|
932
|
+
"max_vol_60d": round2(vol_max_long),
|
|
933
|
+
}
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
diag.insufficient_history = insufficient_history
|
|
937
|
+
if insufficient_history:
|
|
938
|
+
sample = [r["ts_code"] for r in insufficient_history[:5]]
|
|
939
|
+
ellipsis = "..." if len(insufficient_history) > 5 else ""
|
|
940
|
+
data_unavailable.append(
|
|
941
|
+
f"insufficient history (<{required_days} of {rules.lookback_trade_days} days) "
|
|
942
|
+
f"for {len(insufficient_history)} candidates (excluded from vol rule): "
|
|
943
|
+
f"{sample}{ellipsis}"
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
return ScreenResult(
|
|
947
|
+
trade_date=trade_date,
|
|
948
|
+
n_main_board=n_main,
|
|
949
|
+
n_after_st_susp=n_after_st,
|
|
950
|
+
n_after_t_day_rules=n_after_t_rules,
|
|
951
|
+
n_after_upper_shadow=n_after_upper_shadow,
|
|
952
|
+
n_after_turnover=n_after_turnover,
|
|
953
|
+
n_after_vol_rules=len(final_hits),
|
|
954
|
+
rules=rules,
|
|
955
|
+
diagnostics=diag,
|
|
956
|
+
hits=final_hits,
|
|
957
|
+
data_unavailable=data_unavailable,
|
|
958
|
+
)
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
def _last_n_trade_dates(calendar: TradeCalendar, end_date: str, n: int) -> list[str]:
|
|
962
|
+
"""Return the last `n` open trade dates ending at (and including) end_date."""
|
|
963
|
+
dates: list[str] = []
|
|
964
|
+
cursor = end_date
|
|
965
|
+
if calendar.is_open(cursor):
|
|
966
|
+
dates.append(cursor)
|
|
967
|
+
while len(dates) < n:
|
|
968
|
+
cursor = calendar.pretrade_date(cursor)
|
|
969
|
+
dates.append(cursor)
|
|
970
|
+
dates.sort()
|
|
971
|
+
return dates
|
|
972
|
+
|
|
973
|
+
|
|
974
|
+
def _fetch_daily_history_by_date(
|
|
975
|
+
tushare: TushareClient,
|
|
976
|
+
trade_dates: list[str],
|
|
977
|
+
candidate_codes: set[str],
|
|
978
|
+
*,
|
|
979
|
+
force_sync: bool = False,
|
|
980
|
+
) -> tuple[pd.DataFrame, list[str]]:
|
|
981
|
+
"""Fetch daily(trade_date=X) for each X in trade_dates and concat.
|
|
982
|
+
|
|
983
|
+
Per-day calls are O(N) but each call is cached as ``trade_day_immutable`` in
|
|
984
|
+
TushareClient, so subsequent runs hit the cache. Filtering by candidate_codes
|
|
985
|
+
happens client-side.
|
|
986
|
+
|
|
987
|
+
Returns:
|
|
988
|
+
(concat_df, missing_dates) — `missing_dates` lists every planned
|
|
989
|
+
trade_date for which the daily call returned None or an empty frame.
|
|
990
|
+
Caller (P0 H1) MUST surface these so the user knows the vol_max
|
|
991
|
+
comparison was computed on incomplete data.
|
|
992
|
+
"""
|
|
993
|
+
frames: list[pd.DataFrame] = []
|
|
994
|
+
missing_dates: list[str] = []
|
|
995
|
+
for d in trade_dates:
|
|
996
|
+
df = tushare.call("daily", trade_date=d, force_sync=force_sync)
|
|
997
|
+
if df is None or df.empty:
|
|
998
|
+
missing_dates.append(d)
|
|
999
|
+
continue
|
|
1000
|
+
df = _normalize_id_cols(df)
|
|
1001
|
+
if df is None or df.empty:
|
|
1002
|
+
missing_dates.append(d)
|
|
1003
|
+
continue
|
|
1004
|
+
if candidate_codes:
|
|
1005
|
+
df = df[df["ts_code"].isin(candidate_codes)]
|
|
1006
|
+
frames.append(df)
|
|
1007
|
+
out = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
|
|
1008
|
+
return out, missing_dates
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
def _build_adj_factor_lookups(
|
|
1012
|
+
tushare: TushareClient,
|
|
1013
|
+
history_dates: list[str],
|
|
1014
|
+
survivor_codes: set[str],
|
|
1015
|
+
*,
|
|
1016
|
+
trade_date: str,
|
|
1017
|
+
rules: ScreenRules,
|
|
1018
|
+
diag: ScreenDiagnostics,
|
|
1019
|
+
data_unavailable: list[str],
|
|
1020
|
+
force_sync: bool = False,
|
|
1021
|
+
) -> tuple[dict[tuple[str, str], float], dict[str, float]]:
|
|
1022
|
+
"""Fetch adj_factor for the screening window and build (code, date)→f and code→f_T lookups.
|
|
1023
|
+
|
|
1024
|
+
The two returned dicts let the per-stock loop compute forward-adjusted vol
|
|
1025
|
+
in O(1) per row without re-filtering the frame each iteration.
|
|
1026
|
+
|
|
1027
|
+
Diagnostics fields populated (P2 L3):
|
|
1028
|
+
diag.vol_adjust_enabled — whether the rule was on at all
|
|
1029
|
+
diag.vol_adjust_status — 'ok' | 'disabled' | 'degraded: ...'
|
|
1030
|
+
diag.adj_factor_planned_days — len(history_dates) when enabled
|
|
1031
|
+
diag.adj_factor_actual_days — successful per-day fetches
|
|
1032
|
+
diag.adj_factor_missing_dates — list of date strings that returned empty
|
|
1033
|
+
diag.adj_factor_missing_codes — codes whose T-day adj_factor was missing
|
|
1034
|
+
(forces fallback to raw vol for that code)
|
|
1035
|
+
"""
|
|
1036
|
+
if not rules.vol_adjust:
|
|
1037
|
+
diag.vol_adjust_enabled = False
|
|
1038
|
+
diag.vol_adjust_status = "disabled"
|
|
1039
|
+
return {}, {}
|
|
1040
|
+
|
|
1041
|
+
diag.vol_adjust_enabled = True
|
|
1042
|
+
diag.adj_factor_planned_days = len(history_dates)
|
|
1043
|
+
adj_df, missing_adj_dates = _fetch_adj_factor_history_by_date(
|
|
1044
|
+
tushare, history_dates, survivor_codes, force_sync=force_sync
|
|
1045
|
+
)
|
|
1046
|
+
diag.adj_factor_actual_days = len(history_dates) - len(missing_adj_dates)
|
|
1047
|
+
diag.adj_factor_missing_dates = missing_adj_dates
|
|
1048
|
+
|
|
1049
|
+
if adj_df.empty or "adj_factor" not in adj_df.columns:
|
|
1050
|
+
diag.vol_adjust_status = "degraded: adj_factor unavailable (raw vol used)"
|
|
1051
|
+
data_unavailable.append(
|
|
1052
|
+
"adj_factor unavailable for the entire window — vol-adjust disabled, "
|
|
1053
|
+
"raw vol used (splits/送转 in lookback may understate historical vol)"
|
|
1054
|
+
)
|
|
1055
|
+
return {}, {}
|
|
1056
|
+
|
|
1057
|
+
# (code, date) → adj_factor and code → adj_factor at T
|
|
1058
|
+
pair_lookup: dict[tuple[str, str], float] = {}
|
|
1059
|
+
for r in adj_df.itertuples(index=False):
|
|
1060
|
+
try:
|
|
1061
|
+
f = float(r.adj_factor)
|
|
1062
|
+
except (TypeError, ValueError):
|
|
1063
|
+
continue
|
|
1064
|
+
if pd.isna(f):
|
|
1065
|
+
continue
|
|
1066
|
+
pair_lookup[(str(r.ts_code), str(r.trade_date))] = f
|
|
1067
|
+
|
|
1068
|
+
f_T_lookup: dict[str, float] = {
|
|
1069
|
+
code: f
|
|
1070
|
+
for (code, d), f in pair_lookup.items()
|
|
1071
|
+
if d == str(trade_date)
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
missing_t_codes = sorted(survivor_codes - set(f_T_lookup.keys()))
|
|
1075
|
+
diag.adj_factor_missing_codes = missing_t_codes
|
|
1076
|
+
|
|
1077
|
+
if missing_adj_dates and not missing_t_codes:
|
|
1078
|
+
diag.vol_adjust_status = (
|
|
1079
|
+
f"degraded: {len(missing_adj_dates)} historical day(s) missing adj_factor"
|
|
1080
|
+
)
|
|
1081
|
+
sample = missing_adj_dates[:5]
|
|
1082
|
+
ellipsis = "..." if len(missing_adj_dates) > 5 else ""
|
|
1083
|
+
data_unavailable.append(
|
|
1084
|
+
f"adj_factor missing on {len(missing_adj_dates)}/{len(history_dates)} "
|
|
1085
|
+
f"days (raw vol used for those rows): {sample}{ellipsis}"
|
|
1086
|
+
)
|
|
1087
|
+
elif missing_t_codes:
|
|
1088
|
+
sample = missing_t_codes[:5]
|
|
1089
|
+
ellipsis = "..." if len(missing_t_codes) > 5 else ""
|
|
1090
|
+
diag.vol_adjust_status = (
|
|
1091
|
+
f"degraded: T-day adj_factor missing for {len(missing_t_codes)} code(s)"
|
|
1092
|
+
)
|
|
1093
|
+
data_unavailable.append(
|
|
1094
|
+
f"adj_factor(T) missing for {len(missing_t_codes)} candidate(s) — "
|
|
1095
|
+
f"those codes use raw vol: {sample}{ellipsis}"
|
|
1096
|
+
)
|
|
1097
|
+
else:
|
|
1098
|
+
diag.vol_adjust_status = "ok"
|
|
1099
|
+
|
|
1100
|
+
return pair_lookup, f_T_lookup
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
def _fetch_adj_factor_history_by_date(
|
|
1104
|
+
tushare: TushareClient,
|
|
1105
|
+
trade_dates: list[str],
|
|
1106
|
+
candidate_codes: set[str],
|
|
1107
|
+
*,
|
|
1108
|
+
force_sync: bool = False,
|
|
1109
|
+
) -> tuple[pd.DataFrame, list[str]]:
|
|
1110
|
+
"""Fetch adj_factor(trade_date=X) per X — same per-day-batch pattern as daily.
|
|
1111
|
+
|
|
1112
|
+
adj_factor is end-of-day immutable, so cache hits dominate after the first
|
|
1113
|
+
pass. Missing days are returned to the caller (P2 L3) so the diagnostic
|
|
1114
|
+
can record whether vol-adjust ran on complete data.
|
|
1115
|
+
|
|
1116
|
+
Permission may be missing on free Tushare tiers — callers must handle
|
|
1117
|
+
TushareUnauthorizedError or wrap with `_try_optional`.
|
|
1118
|
+
"""
|
|
1119
|
+
frames: list[pd.DataFrame] = []
|
|
1120
|
+
missing_dates: list[str] = []
|
|
1121
|
+
for d in trade_dates:
|
|
1122
|
+
df, _err = _try_optional(tushare, "adj_factor", trade_date=d, force_sync=force_sync)
|
|
1123
|
+
if df is None or df.empty:
|
|
1124
|
+
missing_dates.append(d)
|
|
1125
|
+
continue
|
|
1126
|
+
df = _normalize_id_cols(df)
|
|
1127
|
+
if df is None or df.empty:
|
|
1128
|
+
missing_dates.append(d)
|
|
1129
|
+
continue
|
|
1130
|
+
if candidate_codes:
|
|
1131
|
+
df = df[df["ts_code"].isin(candidate_codes)]
|
|
1132
|
+
frames.append(df)
|
|
1133
|
+
out = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
|
|
1134
|
+
return out, missing_dates
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
# ---------------------------------------------------------------------------
|
|
1138
|
+
# Watchlist persistence
|
|
1139
|
+
# ---------------------------------------------------------------------------
|
|
1140
|
+
|
|
1141
|
+
|
|
1142
|
+
def upsert_watchlist(db: Any, hits: list[dict[str, Any]], trade_date: str) -> tuple[int, int]:
|
|
1143
|
+
"""Insert new hits / update existing rows. Returns (n_new, n_updated).
|
|
1144
|
+
|
|
1145
|
+
Original tracked_since is PRESERVED on duplicate hits — that's the whole
|
|
1146
|
+
point of the追踪日数 metric (a stock that re-triggers shouldn't reset its
|
|
1147
|
+
tracking start).
|
|
1148
|
+
"""
|
|
1149
|
+
if not hits:
|
|
1150
|
+
return 0, 0
|
|
1151
|
+
existing = {
|
|
1152
|
+
row[0]: row[1]
|
|
1153
|
+
for row in db.fetchall("SELECT ts_code, tracked_since FROM va_watchlist")
|
|
1154
|
+
}
|
|
1155
|
+
n_new = 0
|
|
1156
|
+
n_updated = 0
|
|
1157
|
+
for h in hits:
|
|
1158
|
+
code = h["ts_code"]
|
|
1159
|
+
if code in existing:
|
|
1160
|
+
db.execute(
|
|
1161
|
+
"UPDATE va_watchlist SET name=?, industry=?, last_screened=?, "
|
|
1162
|
+
"last_pct_chg=?, last_close=?, last_vol=?, last_amount=?, "
|
|
1163
|
+
"last_body_ratio=?, last_turnover_rate=?, last_vol_ratio_5d=?, "
|
|
1164
|
+
"last_max_vol_60d=? WHERE ts_code=?",
|
|
1165
|
+
(
|
|
1166
|
+
h.get("name"),
|
|
1167
|
+
h.get("industry"),
|
|
1168
|
+
trade_date,
|
|
1169
|
+
h.get("pct_chg"),
|
|
1170
|
+
h.get("close"),
|
|
1171
|
+
h.get("vol"),
|
|
1172
|
+
h.get("amount"),
|
|
1173
|
+
h.get("body_ratio"),
|
|
1174
|
+
h.get("turnover_rate"),
|
|
1175
|
+
h.get("vol_ratio_5d"),
|
|
1176
|
+
h.get("max_vol_60d"),
|
|
1177
|
+
code,
|
|
1178
|
+
),
|
|
1179
|
+
)
|
|
1180
|
+
n_updated += 1
|
|
1181
|
+
else:
|
|
1182
|
+
db.execute(
|
|
1183
|
+
"INSERT INTO va_watchlist(ts_code, name, industry, tracked_since, "
|
|
1184
|
+
"last_screened, last_pct_chg, last_close, last_vol, last_amount, "
|
|
1185
|
+
"last_body_ratio, last_turnover_rate, last_vol_ratio_5d, last_max_vol_60d) "
|
|
1186
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
1187
|
+
(
|
|
1188
|
+
code,
|
|
1189
|
+
h.get("name"),
|
|
1190
|
+
h.get("industry"),
|
|
1191
|
+
trade_date,
|
|
1192
|
+
trade_date,
|
|
1193
|
+
h.get("pct_chg"),
|
|
1194
|
+
h.get("close"),
|
|
1195
|
+
h.get("vol"),
|
|
1196
|
+
h.get("amount"),
|
|
1197
|
+
h.get("body_ratio"),
|
|
1198
|
+
h.get("turnover_rate"),
|
|
1199
|
+
h.get("vol_ratio_5d"),
|
|
1200
|
+
h.get("max_vol_60d"),
|
|
1201
|
+
),
|
|
1202
|
+
)
|
|
1203
|
+
n_new += 1
|
|
1204
|
+
return n_new, n_updated
|
|
1205
|
+
|
|
1206
|
+
|
|
1207
|
+
def append_anomaly_history(db: Any, hits: list[dict[str, Any]]) -> None:
|
|
1208
|
+
"""Append every hit row to va_anomaly_history (audit log).
|
|
1209
|
+
|
|
1210
|
+
Uses INSERT OR REPLACE semantics via DELETE-then-INSERT on (trade_date, ts_code)
|
|
1211
|
+
since DuckDB lacks ON CONFLICT for composite PKs in older versions.
|
|
1212
|
+
"""
|
|
1213
|
+
if not hits:
|
|
1214
|
+
return
|
|
1215
|
+
for h in hits:
|
|
1216
|
+
db.execute(
|
|
1217
|
+
"DELETE FROM va_anomaly_history WHERE trade_date=? AND ts_code=?",
|
|
1218
|
+
(h["trade_date"], h["ts_code"]),
|
|
1219
|
+
)
|
|
1220
|
+
db.execute(
|
|
1221
|
+
"INSERT INTO va_anomaly_history(trade_date, ts_code, name, industry, "
|
|
1222
|
+
"pct_chg, close, open, high, low, vol, amount, body_ratio, turnover_rate, "
|
|
1223
|
+
"vol_ratio_5d, max_vol_60d, raw_metrics_json) "
|
|
1224
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
1225
|
+
(
|
|
1226
|
+
h["trade_date"],
|
|
1227
|
+
h["ts_code"],
|
|
1228
|
+
h.get("name"),
|
|
1229
|
+
h.get("industry"),
|
|
1230
|
+
h.get("pct_chg"),
|
|
1231
|
+
h.get("close"),
|
|
1232
|
+
h.get("open"),
|
|
1233
|
+
h.get("high"),
|
|
1234
|
+
h.get("low"),
|
|
1235
|
+
h.get("vol"),
|
|
1236
|
+
h.get("amount"),
|
|
1237
|
+
h.get("body_ratio"),
|
|
1238
|
+
h.get("turnover_rate"),
|
|
1239
|
+
h.get("vol_ratio_5d"),
|
|
1240
|
+
h.get("max_vol_60d"),
|
|
1241
|
+
json.dumps(h, ensure_ascii=False),
|
|
1242
|
+
),
|
|
1243
|
+
)
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def fetch_watchlist(db: Any) -> list[dict[str, Any]]:
|
|
1247
|
+
"""Read all watchlist rows as dicts."""
|
|
1248
|
+
rows = db.fetchall(
|
|
1249
|
+
"SELECT ts_code, name, industry, tracked_since, last_screened, last_pct_chg, "
|
|
1250
|
+
"last_close, last_vol, last_amount, last_body_ratio, last_turnover_rate, "
|
|
1251
|
+
"last_vol_ratio_5d, last_max_vol_60d FROM va_watchlist ORDER BY tracked_since"
|
|
1252
|
+
)
|
|
1253
|
+
cols = [
|
|
1254
|
+
"ts_code",
|
|
1255
|
+
"name",
|
|
1256
|
+
"industry",
|
|
1257
|
+
"tracked_since",
|
|
1258
|
+
"last_screened",
|
|
1259
|
+
"last_pct_chg",
|
|
1260
|
+
"last_close",
|
|
1261
|
+
"last_vol",
|
|
1262
|
+
"last_amount",
|
|
1263
|
+
"last_body_ratio",
|
|
1264
|
+
"last_turnover_rate",
|
|
1265
|
+
"last_vol_ratio_5d",
|
|
1266
|
+
"last_max_vol_60d",
|
|
1267
|
+
]
|
|
1268
|
+
return [dict(zip(cols, r, strict=False)) for r in rows]
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
def prune_watchlist(db: Any, *, min_tracked_calendar_days: int, today: str) -> list[dict[str, Any]]:
|
|
1272
|
+
"""Remove every watchlist row whose calendar-day age ≥ N. Return removed rows."""
|
|
1273
|
+
rows = fetch_watchlist(db)
|
|
1274
|
+
pruned: list[dict[str, Any]] = []
|
|
1275
|
+
for r in rows:
|
|
1276
|
+
age = _calendar_days_between(r["tracked_since"], today)
|
|
1277
|
+
if age >= min_tracked_calendar_days:
|
|
1278
|
+
r["tracked_days"] = age
|
|
1279
|
+
pruned.append(r)
|
|
1280
|
+
if pruned:
|
|
1281
|
+
codes = [r["ts_code"] for r in pruned]
|
|
1282
|
+
# DuckDB executemany via parameterized loop; safer than IN-clause stitching
|
|
1283
|
+
for code in codes:
|
|
1284
|
+
db.execute("DELETE FROM va_watchlist WHERE ts_code=?", (code,))
|
|
1285
|
+
return pruned
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
# ---------------------------------------------------------------------------
|
|
1289
|
+
# ANALYZE MODE — assemble per-stock context for LLM
|
|
1290
|
+
# ---------------------------------------------------------------------------
|
|
1291
|
+
|
|
1292
|
+
|
|
1293
|
+
# v0.3.0 P0-3 — VCP feature windows (kept module-level so callers / tests can
|
|
1294
|
+
# read them without instantiating AnalyzeBundle).
|
|
1295
|
+
ATR_WINDOW = 10 # 10-day ATR window (simple-average TR)
|
|
1296
|
+
ATR_QUANTILE_LOOKBACK = 60 # rank current ATR within the trailing 60-day series
|
|
1297
|
+
BBW_WINDOW = 20 # 20-day Bollinger band width
|
|
1298
|
+
BBW_COMPRESSION_LOOKBACK = 60 # current BBW vs trailing 60-day mean BBW
|
|
1299
|
+
# v0.3.0 P0-4 — resistance windows (E3-A: only `low_120d`, no `low_250d`).
|
|
1300
|
+
RESIST_120D = 120
|
|
1301
|
+
RESIST_250D = 250
|
|
1302
|
+
# Default extended history window for analyze mode (E2-A: single 250d fetch
|
|
1303
|
+
# is sliced internally for verbatim / VCP / resistance consumers).
|
|
1304
|
+
DEFAULT_EXTENDED_LOOKBACK_TRADE_DAYS = RESIST_250D
|
|
1305
|
+
|
|
1306
|
+
|
|
1307
|
+
def _compute_atr_series(history: list[dict[str, Any]]) -> list[float | None]:
|
|
1308
|
+
"""Per-row trailing-10-day simple-average True Range.
|
|
1309
|
+
|
|
1310
|
+
TR_t = max(high_t − low_t, |high_t − close_{t-1}|, |low_t − close_{t-1}|)
|
|
1311
|
+
ATR_10_t = mean(TR over the trailing 10 days, t inclusive)
|
|
1312
|
+
|
|
1313
|
+
Returns a list aligned to ``history``. Entries are ``None`` until enough
|
|
1314
|
+
rows are available or whenever any input value is missing in the window.
|
|
1315
|
+
"""
|
|
1316
|
+
n = len(history)
|
|
1317
|
+
if n < 2:
|
|
1318
|
+
return [None] * n
|
|
1319
|
+
trs: list[float | None] = [None]
|
|
1320
|
+
for i in range(1, n):
|
|
1321
|
+
h = history[i].get("high")
|
|
1322
|
+
low = history[i].get("low")
|
|
1323
|
+
c_prev = history[i - 1].get("close")
|
|
1324
|
+
if h is None or low is None or c_prev is None:
|
|
1325
|
+
trs.append(None)
|
|
1326
|
+
continue
|
|
1327
|
+
try:
|
|
1328
|
+
h_f, low_f, cp_f = float(h), float(low), float(c_prev)
|
|
1329
|
+
except (TypeError, ValueError):
|
|
1330
|
+
trs.append(None)
|
|
1331
|
+
continue
|
|
1332
|
+
if any(pd.isna(v) for v in (h_f, low_f, cp_f)):
|
|
1333
|
+
trs.append(None)
|
|
1334
|
+
continue
|
|
1335
|
+
trs.append(max(h_f - low_f, abs(h_f - cp_f), abs(low_f - cp_f)))
|
|
1336
|
+
|
|
1337
|
+
out: list[float | None] = []
|
|
1338
|
+
for i in range(n):
|
|
1339
|
+
start = i - ATR_WINDOW + 1
|
|
1340
|
+
if start < 0:
|
|
1341
|
+
out.append(None)
|
|
1342
|
+
continue
|
|
1343
|
+
slice_ = trs[start : i + 1]
|
|
1344
|
+
if any(t is None for t in slice_):
|
|
1345
|
+
out.append(None)
|
|
1346
|
+
continue
|
|
1347
|
+
out.append(sum(slice_) / ATR_WINDOW) # type: ignore[arg-type]
|
|
1348
|
+
return out
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
def _compute_bbw_series(history: list[dict[str, Any]]) -> list[float | None]:
|
|
1352
|
+
"""Per-row 20-day Bollinger Band Width as a percentage of the 20-day MA.
|
|
1353
|
+
|
|
1354
|
+
BBW = 4 × stdev(close_20) / mean(close_20) × 100
|
|
1355
|
+
|
|
1356
|
+
The factor 4 = upper(MA + 2σ) − lower(MA − 2σ) → 4σ. Returns ``None`` until
|
|
1357
|
+
20 rows are available, or whenever any close in the window is missing /
|
|
1358
|
+
the rolling mean is non-positive.
|
|
1359
|
+
"""
|
|
1360
|
+
n = len(history)
|
|
1361
|
+
closes: list[float | None] = []
|
|
1362
|
+
for r in history:
|
|
1363
|
+
c = r.get("close")
|
|
1364
|
+
if c is None:
|
|
1365
|
+
closes.append(None)
|
|
1366
|
+
continue
|
|
1367
|
+
try:
|
|
1368
|
+
f = float(c)
|
|
1369
|
+
except (TypeError, ValueError):
|
|
1370
|
+
closes.append(None)
|
|
1371
|
+
continue
|
|
1372
|
+
closes.append(None if pd.isna(f) else f)
|
|
1373
|
+
|
|
1374
|
+
out: list[float | None] = []
|
|
1375
|
+
for i in range(n):
|
|
1376
|
+
start = i - BBW_WINDOW + 1
|
|
1377
|
+
if start < 0:
|
|
1378
|
+
out.append(None)
|
|
1379
|
+
continue
|
|
1380
|
+
slice_ = closes[start : i + 1]
|
|
1381
|
+
if any(c is None for c in slice_):
|
|
1382
|
+
out.append(None)
|
|
1383
|
+
continue
|
|
1384
|
+
floats: list[float] = [c for c in slice_ if c is not None]
|
|
1385
|
+
mean = sum(floats) / BBW_WINDOW
|
|
1386
|
+
if mean <= 0:
|
|
1387
|
+
out.append(None)
|
|
1388
|
+
continue
|
|
1389
|
+
# Population std — same family as Bollinger's original (close enough
|
|
1390
|
+
# for our discrimination purposes; the choice is uniform across the
|
|
1391
|
+
# series so trend comparisons are unbiased).
|
|
1392
|
+
var = sum((c - mean) ** 2 for c in floats) / BBW_WINDOW
|
|
1393
|
+
std = math.sqrt(var)
|
|
1394
|
+
out.append(4 * std / mean * 100)
|
|
1395
|
+
return out
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
def _quantile_in_window(
|
|
1399
|
+
series: list[float | None], idx: int, lookback: int
|
|
1400
|
+
) -> float | None:
|
|
1401
|
+
"""Return the [0, 1] quantile of ``series[idx]`` within the trailing
|
|
1402
|
+
``lookback`` non-None values (idx inclusive). 0 = historical min,
|
|
1403
|
+
1 = historical max. ``None`` when fewer than ``lookback`` non-None values
|
|
1404
|
+
in the window or the current value itself is None."""
|
|
1405
|
+
if idx < 0 or idx >= len(series):
|
|
1406
|
+
return None
|
|
1407
|
+
cur = series[idx]
|
|
1408
|
+
if cur is None:
|
|
1409
|
+
return None
|
|
1410
|
+
start = max(0, idx - lookback + 1)
|
|
1411
|
+
window = [v for v in series[start : idx + 1] if v is not None]
|
|
1412
|
+
if len(window) < lookback:
|
|
1413
|
+
return None
|
|
1414
|
+
less_or_eq = sum(1 for v in window if v <= cur)
|
|
1415
|
+
return (less_or_eq - 1) / (len(window) - 1) if len(window) > 1 else 0.0
|
|
1416
|
+
|
|
1417
|
+
|
|
1418
|
+
@dataclass
|
|
1419
|
+
class AnalyzeBundle:
|
|
1420
|
+
"""Everything the走势分析 LLM stage needs."""
|
|
1421
|
+
|
|
1422
|
+
trade_date: str
|
|
1423
|
+
next_trade_date: str
|
|
1424
|
+
candidates: list[dict[str, Any]] = field(default_factory=list)
|
|
1425
|
+
market_summary: dict[str, Any] = field(default_factory=dict)
|
|
1426
|
+
sector_strength_source: str = "industry_fallback"
|
|
1427
|
+
sector_strength_data: dict[str, Any] = field(default_factory=dict)
|
|
1428
|
+
data_unavailable: list[str] = field(default_factory=list)
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
# v0.5.0 P1-1 — RPS / 大盘相对 alpha 配置
|
|
1432
|
+
DEFAULT_BASELINE_INDEX_CODE = "000300.SH"
|
|
1433
|
+
ALPHA_LEADING_THRESHOLD = 5.0 # alpha_20d_pct > +5 → leading
|
|
1434
|
+
ALPHA_LAGGING_THRESHOLD = -5.0 # alpha_20d_pct < -5 → lagging
|
|
1435
|
+
|
|
1436
|
+
|
|
1437
|
+
def collect_analyze_bundle(
|
|
1438
|
+
*,
|
|
1439
|
+
tushare: TushareClient,
|
|
1440
|
+
db: Any,
|
|
1441
|
+
calendar: TradeCalendar,
|
|
1442
|
+
trade_date: str,
|
|
1443
|
+
next_trade_date: str,
|
|
1444
|
+
history_lookback: int = DEFAULT_EXTENDED_LOOKBACK_TRADE_DAYS,
|
|
1445
|
+
moneyflow_lookback: int = 5,
|
|
1446
|
+
baseline_index_code: str = DEFAULT_BASELINE_INDEX_CODE,
|
|
1447
|
+
force_sync: bool = False,
|
|
1448
|
+
) -> AnalyzeBundle:
|
|
1449
|
+
"""Read watchlist + pull historical windows + assemble compact LLM context.
|
|
1450
|
+
|
|
1451
|
+
Per the design spec:
|
|
1452
|
+
* 60-trade-day window for OHLCV → compressed into MA/aggregate features
|
|
1453
|
+
* 5-day moneyflow → compressed into trend + cumulative net flow
|
|
1454
|
+
* 60-day limit_list_d → flag历史涨停 (optional)
|
|
1455
|
+
* sector_strength: limit_cpt_list (tier 1) / industry aggregation fallback
|
|
1456
|
+
* tracked_days: calendar days since first added to watchlist
|
|
1457
|
+
"""
|
|
1458
|
+
bundle = AnalyzeBundle(trade_date=trade_date, next_trade_date=next_trade_date)
|
|
1459
|
+
data_unavailable: list[str] = []
|
|
1460
|
+
|
|
1461
|
+
watchlist = fetch_watchlist(db)
|
|
1462
|
+
if not watchlist:
|
|
1463
|
+
return bundle
|
|
1464
|
+
|
|
1465
|
+
candidate_codes = {w["ts_code"] for w in watchlist}
|
|
1466
|
+
|
|
1467
|
+
# -------- historical OHLCV (extended trade-day window, batch by date) ---
|
|
1468
|
+
history_dates = _last_n_trade_dates(calendar, trade_date, history_lookback)
|
|
1469
|
+
daily_df, missing_history_dates = _fetch_daily_history_by_date(
|
|
1470
|
+
tushare, history_dates, candidate_codes, force_sync=force_sync
|
|
1471
|
+
)
|
|
1472
|
+
if daily_df.empty:
|
|
1473
|
+
data_unavailable.append(
|
|
1474
|
+
f"daily({history_lookback}d-window) returned empty"
|
|
1475
|
+
)
|
|
1476
|
+
elif missing_history_dates:
|
|
1477
|
+
sample = missing_history_dates[:5]
|
|
1478
|
+
ellipsis = "..." if len(missing_history_dates) > 5 else ""
|
|
1479
|
+
data_unavailable.append(
|
|
1480
|
+
f"daily history missing on {len(missing_history_dates)}/"
|
|
1481
|
+
f"{len(history_dates)} planned days: {sample}{ellipsis}"
|
|
1482
|
+
)
|
|
1483
|
+
|
|
1484
|
+
# -------- baseline index daily (v0.5.0 P1-1 — alpha computation) --------
|
|
1485
|
+
# F1: 沪深 300; G1: 250d window matched to per-stock daily history.
|
|
1486
|
+
# G8: failures emit a WARN-level mention into data_unavailable; the runner
|
|
1487
|
+
# surfaces it as an EventLevel.WARN LOG instead of silently degrading.
|
|
1488
|
+
baseline_close_by_date: dict[str, float] = {}
|
|
1489
|
+
if history_dates:
|
|
1490
|
+
idx_df, idx_err = _try_optional(
|
|
1491
|
+
tushare,
|
|
1492
|
+
"index_daily",
|
|
1493
|
+
params={
|
|
1494
|
+
"ts_code": baseline_index_code,
|
|
1495
|
+
"start_date": history_dates[0],
|
|
1496
|
+
"end_date": history_dates[-1],
|
|
1497
|
+
},
|
|
1498
|
+
force_sync=force_sync,
|
|
1499
|
+
)
|
|
1500
|
+
if idx_err:
|
|
1501
|
+
data_unavailable.append(
|
|
1502
|
+
f"index_daily ({idx_err}) — alpha 字段降级为 None;"
|
|
1503
|
+
f"如需启用 alpha,请确认 Tushare 账户已开通 index_daily 权限"
|
|
1504
|
+
)
|
|
1505
|
+
else:
|
|
1506
|
+
idx_df = _normalize_id_cols(idx_df)
|
|
1507
|
+
if idx_df is not None and not idx_df.empty and "close" in idx_df.columns:
|
|
1508
|
+
for r in idx_df[["trade_date", "close"]].itertuples(index=False):
|
|
1509
|
+
if r.close is not None:
|
|
1510
|
+
baseline_close_by_date[str(r.trade_date)] = float(r.close)
|
|
1511
|
+
else:
|
|
1512
|
+
data_unavailable.append(
|
|
1513
|
+
f"index_daily({baseline_index_code}) returned empty — alpha 字段降级为 None"
|
|
1514
|
+
)
|
|
1515
|
+
|
|
1516
|
+
# -------- daily_basic on T (turnover, circ_mv, pe, pb) -------------------
|
|
1517
|
+
db_basic_t = tushare.call("daily_basic", trade_date=trade_date, force_sync=force_sync)
|
|
1518
|
+
db_basic_lookup: dict[str, dict[str, Any]] = {}
|
|
1519
|
+
if not db_basic_t.empty:
|
|
1520
|
+
for r in db_basic_t.itertuples(index=False):
|
|
1521
|
+
db_basic_lookup[str(r.ts_code)] = {
|
|
1522
|
+
"turnover_rate": getattr(r, "turnover_rate", None),
|
|
1523
|
+
"volume_ratio": getattr(r, "volume_ratio", None),
|
|
1524
|
+
"pe": getattr(r, "pe", None),
|
|
1525
|
+
"pb": getattr(r, "pb", None),
|
|
1526
|
+
"circ_mv": getattr(r, "circ_mv", None),
|
|
1527
|
+
"total_mv": getattr(r, "total_mv", None),
|
|
1528
|
+
}
|
|
1529
|
+
else:
|
|
1530
|
+
data_unavailable.append("daily_basic(T)")
|
|
1531
|
+
|
|
1532
|
+
# -------- moneyflow (5-day per stock, optional) -------------------------
|
|
1533
|
+
mf_start = _shift_calendar_days(trade_date, -(moneyflow_lookback + 7))
|
|
1534
|
+
mf_df, mf_err = _try_optional(
|
|
1535
|
+
tushare,
|
|
1536
|
+
"moneyflow",
|
|
1537
|
+
params={"start_date": mf_start, "end_date": trade_date},
|
|
1538
|
+
force_sync=force_sync,
|
|
1539
|
+
)
|
|
1540
|
+
if mf_err:
|
|
1541
|
+
data_unavailable.append(f"moneyflow ({mf_err})")
|
|
1542
|
+
mf_df = _normalize_id_cols(mf_df)
|
|
1543
|
+
if mf_df is not None and not mf_df.empty:
|
|
1544
|
+
mf_df = mf_df[mf_df["ts_code"].isin(candidate_codes)]
|
|
1545
|
+
|
|
1546
|
+
# -------- limit_list_d 60-day (flag stocks with prior涨停) ---------------
|
|
1547
|
+
lu_start = history_dates[0] if history_dates else trade_date
|
|
1548
|
+
lu_df, lu_err = _try_optional(
|
|
1549
|
+
tushare,
|
|
1550
|
+
"limit_list_d",
|
|
1551
|
+
params={"start_date": lu_start, "end_date": trade_date, "limit_type": "U"},
|
|
1552
|
+
force_sync=force_sync,
|
|
1553
|
+
)
|
|
1554
|
+
if lu_err:
|
|
1555
|
+
data_unavailable.append(f"limit_list_d ({lu_err})")
|
|
1556
|
+
lu_by_code: dict[str, list[str]] = {}
|
|
1557
|
+
if lu_df is not None and not lu_df.empty:
|
|
1558
|
+
for r in lu_df.itertuples(index=False):
|
|
1559
|
+
lu_by_code.setdefault(str(r.ts_code), []).append(str(r.trade_date))
|
|
1560
|
+
|
|
1561
|
+
# -------- sector strength (tier 1: limit_cpt_list, fallback: industry agg)
|
|
1562
|
+
cpt_df, cpt_err = _try_optional(
|
|
1563
|
+
tushare, "limit_cpt_list", trade_date=trade_date, force_sync=force_sync
|
|
1564
|
+
)
|
|
1565
|
+
if cpt_err:
|
|
1566
|
+
data_unavailable.append(f"limit_cpt_list ({cpt_err})")
|
|
1567
|
+
if cpt_df is not None and not cpt_df.empty:
|
|
1568
|
+
bundle.sector_strength_source = "limit_cpt_list"
|
|
1569
|
+
top = cpt_df.sort_values("rank").head(10) if "rank" in cpt_df.columns else cpt_df.head(10)
|
|
1570
|
+
bundle.sector_strength_data = {"top_sectors": top.to_dict(orient="records")}
|
|
1571
|
+
else:
|
|
1572
|
+
# Industry fallback: aggregate watchlist by industry
|
|
1573
|
+
agg: dict[str, int] = {}
|
|
1574
|
+
for w in watchlist:
|
|
1575
|
+
ind = w.get("industry") or "未分类"
|
|
1576
|
+
agg[ind] = agg.get(ind, 0) + 1
|
|
1577
|
+
bundle.sector_strength_source = "industry_fallback"
|
|
1578
|
+
bundle.sector_strength_data = {
|
|
1579
|
+
"top_sectors": [
|
|
1580
|
+
{"sector": k, "watchlist_count": v}
|
|
1581
|
+
for k, v in sorted(agg.items(), key=lambda kv: kv[1], reverse=True)[:10]
|
|
1582
|
+
]
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
# -------- per-stock context assembly ------------------------------------
|
|
1586
|
+
daily_by_code = _index_daily_by_code(daily_df)
|
|
1587
|
+
mf_by_code = _index_moneyflow_by_code(mf_df)
|
|
1588
|
+
candidates: list[dict[str, Any]] = []
|
|
1589
|
+
for w in watchlist:
|
|
1590
|
+
code = w["ts_code"]
|
|
1591
|
+
history = daily_by_code.get(code, [])
|
|
1592
|
+
if not history:
|
|
1593
|
+
# No data for this stock — still include it so LLM sees missing_data
|
|
1594
|
+
candidates.append(
|
|
1595
|
+
{
|
|
1596
|
+
"candidate_id": code,
|
|
1597
|
+
"ts_code": code,
|
|
1598
|
+
"name": w.get("name"),
|
|
1599
|
+
"industry": w.get("industry"),
|
|
1600
|
+
"tracked_since": w.get("tracked_since"),
|
|
1601
|
+
"tracked_days": _calendar_days_between(w["tracked_since"], trade_date),
|
|
1602
|
+
"_missing_history": True,
|
|
1603
|
+
}
|
|
1604
|
+
)
|
|
1605
|
+
continue
|
|
1606
|
+
|
|
1607
|
+
rec = _build_candidate_row(
|
|
1608
|
+
watchlist_row=w,
|
|
1609
|
+
trade_date=trade_date,
|
|
1610
|
+
history=history,
|
|
1611
|
+
daily_basic=db_basic_lookup.get(code, {}),
|
|
1612
|
+
moneyflow_5d=mf_by_code.get(code, [])[-moneyflow_lookback:],
|
|
1613
|
+
limit_up_dates=sorted(lu_by_code.get(code, [])),
|
|
1614
|
+
baseline_index_code=baseline_index_code,
|
|
1615
|
+
baseline_close_by_date=baseline_close_by_date,
|
|
1616
|
+
)
|
|
1617
|
+
candidates.append(rec)
|
|
1618
|
+
|
|
1619
|
+
# -------- market summary -------------------------------------------------
|
|
1620
|
+
bundle.market_summary = {
|
|
1621
|
+
"watchlist_total": len(watchlist),
|
|
1622
|
+
"history_lookback_trade_days": history_lookback,
|
|
1623
|
+
"moneyflow_lookback_days": moneyflow_lookback,
|
|
1624
|
+
}
|
|
1625
|
+
bundle.candidates = candidates
|
|
1626
|
+
bundle.data_unavailable = data_unavailable
|
|
1627
|
+
return bundle
|
|
1628
|
+
|
|
1629
|
+
|
|
1630
|
+
def _index_daily_by_code(df: pd.DataFrame | None) -> dict[str, list[dict[str, Any]]]:
|
|
1631
|
+
if df is None or df.empty or "ts_code" not in df.columns:
|
|
1632
|
+
return {}
|
|
1633
|
+
df = _normalize_id_cols(df)
|
|
1634
|
+
if df is None:
|
|
1635
|
+
return {}
|
|
1636
|
+
df = df.sort_values("trade_date") if "trade_date" in df.columns else df
|
|
1637
|
+
out: dict[str, list[dict[str, Any]]] = {}
|
|
1638
|
+
for code, group in df.groupby("ts_code"):
|
|
1639
|
+
out[str(code)] = group.to_dict(orient="records")
|
|
1640
|
+
return out
|
|
1641
|
+
|
|
1642
|
+
|
|
1643
|
+
def _index_moneyflow_by_code(df: pd.DataFrame | None) -> dict[str, list[dict[str, Any]]]:
|
|
1644
|
+
if df is None or df.empty or "ts_code" not in df.columns:
|
|
1645
|
+
return {}
|
|
1646
|
+
df = _normalize_id_cols(df)
|
|
1647
|
+
if df is None:
|
|
1648
|
+
return {}
|
|
1649
|
+
df = df.sort_values("trade_date") if "trade_date" in df.columns else df
|
|
1650
|
+
out: dict[str, list[dict[str, Any]]] = {}
|
|
1651
|
+
for code, group in df.groupby("ts_code"):
|
|
1652
|
+
out[str(code)] = group.to_dict(orient="records")
|
|
1653
|
+
return out
|
|
1654
|
+
|
|
1655
|
+
|
|
1656
|
+
def _compute_alpha_pct(
|
|
1657
|
+
history: list[dict[str, Any]],
|
|
1658
|
+
baseline_close_by_date: dict[str, float],
|
|
1659
|
+
n: int,
|
|
1660
|
+
) -> float | None:
|
|
1661
|
+
"""alpha_n = stock_pct_chg_n − baseline_pct_chg_n (over the last n trade days).
|
|
1662
|
+
|
|
1663
|
+
Both legs use simple compounded close-to-close return. Returns None when
|
|
1664
|
+
either leg can't be computed (insufficient history / baseline data missing
|
|
1665
|
+
on the required dates).
|
|
1666
|
+
"""
|
|
1667
|
+
if len(history) <= n:
|
|
1668
|
+
return None
|
|
1669
|
+
end_row = history[-1]
|
|
1670
|
+
start_row = history[-1 - n]
|
|
1671
|
+
end_close = end_row.get("close")
|
|
1672
|
+
start_close = start_row.get("close")
|
|
1673
|
+
if end_close is None or start_close is None or start_close <= 0:
|
|
1674
|
+
return None
|
|
1675
|
+
end_date = str(end_row.get("trade_date") or "")
|
|
1676
|
+
start_date = str(start_row.get("trade_date") or "")
|
|
1677
|
+
base_end = baseline_close_by_date.get(end_date)
|
|
1678
|
+
base_start = baseline_close_by_date.get(start_date)
|
|
1679
|
+
if base_end is None or base_start is None or base_start <= 0:
|
|
1680
|
+
return None
|
|
1681
|
+
stock_ret = (float(end_close) / float(start_close) - 1.0) * 100.0
|
|
1682
|
+
base_ret = (float(base_end) / float(base_start) - 1.0) * 100.0
|
|
1683
|
+
return round(stock_ret - base_ret, 2)
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
def _classify_rel_strength(alpha_20d: float | None) -> str | None:
|
|
1687
|
+
if alpha_20d is None:
|
|
1688
|
+
return None
|
|
1689
|
+
if alpha_20d > ALPHA_LEADING_THRESHOLD:
|
|
1690
|
+
return "leading"
|
|
1691
|
+
if alpha_20d < ALPHA_LAGGING_THRESHOLD:
|
|
1692
|
+
return "lagging"
|
|
1693
|
+
return "in_line"
|
|
1694
|
+
|
|
1695
|
+
|
|
1696
|
+
def _build_candidate_row(
|
|
1697
|
+
*,
|
|
1698
|
+
watchlist_row: dict[str, Any],
|
|
1699
|
+
trade_date: str,
|
|
1700
|
+
history: list[dict[str, Any]],
|
|
1701
|
+
daily_basic: dict[str, Any],
|
|
1702
|
+
moneyflow_5d: list[dict[str, Any]],
|
|
1703
|
+
limit_up_dates: list[str],
|
|
1704
|
+
baseline_index_code: str = DEFAULT_BASELINE_INDEX_CODE,
|
|
1705
|
+
baseline_close_by_date: dict[str, float] | None = None,
|
|
1706
|
+
) -> dict[str, Any]:
|
|
1707
|
+
"""Compress (up to) 250-day history → moving averages + base/washout +
|
|
1708
|
+
VCP / resistance features.
|
|
1709
|
+
|
|
1710
|
+
Reduces token usage by emitting compact scalars; the recent 5 OHLCV rows
|
|
1711
|
+
are still passed verbatim for form reference. v0.3.0 (PR-2):
|
|
1712
|
+
* input window widened from 60 → 250 trading days (E2-A) and sliced
|
|
1713
|
+
internally — 60d for MAs / aggregates, full window for VCP and
|
|
1714
|
+
120d / 250d resistance.
|
|
1715
|
+
* new fields: atr_10d_pct / atr_10d_quantile_in_60d / bbw_20d /
|
|
1716
|
+
bbw_compression_ratio (P0-3) and high_120d / high_250d / low_120d /
|
|
1717
|
+
dist_to_120d_high_pct / dist_to_250d_high_pct / is_above_120d_high /
|
|
1718
|
+
is_above_250d_high / pos_in_120d_range (P0-4).
|
|
1719
|
+
"""
|
|
1720
|
+
closes = [float(r["close"]) for r in history if r.get("close") is not None]
|
|
1721
|
+
if not closes:
|
|
1722
|
+
return {
|
|
1723
|
+
"candidate_id": watchlist_row["ts_code"],
|
|
1724
|
+
"ts_code": watchlist_row["ts_code"],
|
|
1725
|
+
"name": watchlist_row.get("name"),
|
|
1726
|
+
"tracked_since": watchlist_row["tracked_since"],
|
|
1727
|
+
"tracked_days": _calendar_days_between(watchlist_row["tracked_since"], trade_date),
|
|
1728
|
+
"_missing_history": True,
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
# The 60d "compressed feature" slice — preserve pre-v0.3.0 semantics for
|
|
1732
|
+
# ma60 / high_60d / low_60d / pct_chg_60d when the input history is now
|
|
1733
|
+
# 250d wide.
|
|
1734
|
+
closes_60 = closes[-60:]
|
|
1735
|
+
last_close = closes[-1]
|
|
1736
|
+
|
|
1737
|
+
def _ma(n: int) -> float | None:
|
|
1738
|
+
if len(closes) < n:
|
|
1739
|
+
return None
|
|
1740
|
+
return round(sum(closes[-n:]) / n, 3)
|
|
1741
|
+
|
|
1742
|
+
ma5, ma10, ma20, ma60 = _ma(5), _ma(10), _ma(20), _ma(60)
|
|
1743
|
+
above_ma60 = ma60 is not None and last_close > ma60
|
|
1744
|
+
above_ma20 = ma20 is not None and last_close > ma20
|
|
1745
|
+
|
|
1746
|
+
# 60d aggregates (over the most-recent 60 closes)
|
|
1747
|
+
high_60d = round(max(closes_60), 3)
|
|
1748
|
+
low_60d = round(min(closes_60), 3)
|
|
1749
|
+
range_pct_60d = round((high_60d - low_60d) / max(low_60d, 1e-9) * 100, 2)
|
|
1750
|
+
pct_chg_60d = (
|
|
1751
|
+
round((last_close / closes_60[0] - 1) * 100, 2)
|
|
1752
|
+
if len(closes_60) >= 60 and closes_60[0] > 0
|
|
1753
|
+
else None
|
|
1754
|
+
)
|
|
1755
|
+
|
|
1756
|
+
# v0.3.0 P0-3 — VCP波动率收敛指标
|
|
1757
|
+
atr_series = _compute_atr_series(history)
|
|
1758
|
+
bbw_series = _compute_bbw_series(history)
|
|
1759
|
+
last_idx = len(history) - 1
|
|
1760
|
+
atr_now = atr_series[last_idx] if atr_series else None
|
|
1761
|
+
bbw_now = bbw_series[last_idx] if bbw_series else None
|
|
1762
|
+
atr_10d_pct: float | None = None
|
|
1763
|
+
if atr_now is not None and last_close > 0:
|
|
1764
|
+
atr_10d_pct = round(atr_now / last_close * 100, 3)
|
|
1765
|
+
atr_10d_quantile_in_60d = _quantile_in_window(
|
|
1766
|
+
atr_series, last_idx, ATR_QUANTILE_LOOKBACK
|
|
1767
|
+
)
|
|
1768
|
+
if atr_10d_quantile_in_60d is not None:
|
|
1769
|
+
atr_10d_quantile_in_60d = round(atr_10d_quantile_in_60d, 3)
|
|
1770
|
+
bbw_20d = round(bbw_now, 3) if bbw_now is not None else None
|
|
1771
|
+
bbw_compression_ratio: float | None = None
|
|
1772
|
+
if bbw_now is not None:
|
|
1773
|
+
prior = [b for b in bbw_series[-BBW_COMPRESSION_LOOKBACK:] if b is not None]
|
|
1774
|
+
if len(prior) >= BBW_COMPRESSION_LOOKBACK:
|
|
1775
|
+
mean_prior = sum(prior) / len(prior)
|
|
1776
|
+
if mean_prior > 0:
|
|
1777
|
+
bbw_compression_ratio = round(bbw_now / mean_prior, 3)
|
|
1778
|
+
|
|
1779
|
+
# v0.3.0 P0-4 — 120d / 250d 阻力位距离 (closes-based to match high_60d).
|
|
1780
|
+
# Compute the raw (unrounded) extremes for comparison against last_close so
|
|
1781
|
+
# boundary cases like "last close IS the 60d high" don't flip on rounding;
|
|
1782
|
+
# round only the emitted scalar field.
|
|
1783
|
+
def _window_extremes(n: int) -> tuple[float | None, float | None]:
|
|
1784
|
+
if len(closes) < n:
|
|
1785
|
+
return None, None
|
|
1786
|
+
sl = closes[-n:]
|
|
1787
|
+
return max(sl), min(sl)
|
|
1788
|
+
|
|
1789
|
+
high_120d_raw, low_120d_raw = _window_extremes(RESIST_120D)
|
|
1790
|
+
high_250d_raw, _ = _window_extremes(RESIST_250D)
|
|
1791
|
+
high_120d = round(high_120d_raw, 3) if high_120d_raw is not None else None
|
|
1792
|
+
high_250d = round(high_250d_raw, 3) if high_250d_raw is not None else None
|
|
1793
|
+
low_120d = round(low_120d_raw, 3) if low_120d_raw is not None else None
|
|
1794
|
+
dist_to_120d_high_pct = (
|
|
1795
|
+
round((last_close - high_120d_raw) / high_120d_raw * 100, 2)
|
|
1796
|
+
if high_120d_raw not in (None, 0)
|
|
1797
|
+
else None
|
|
1798
|
+
)
|
|
1799
|
+
dist_to_250d_high_pct = (
|
|
1800
|
+
round((last_close - high_250d_raw) / high_250d_raw * 100, 2)
|
|
1801
|
+
if high_250d_raw not in (None, 0)
|
|
1802
|
+
else None
|
|
1803
|
+
)
|
|
1804
|
+
is_above_120d_high = high_120d_raw is not None and last_close > high_120d_raw
|
|
1805
|
+
is_above_250d_high = high_250d_raw is not None and last_close > high_250d_raw
|
|
1806
|
+
pos_in_120d_range: float | None = None
|
|
1807
|
+
if (
|
|
1808
|
+
high_120d_raw is not None
|
|
1809
|
+
and low_120d_raw is not None
|
|
1810
|
+
and high_120d_raw > low_120d_raw
|
|
1811
|
+
):
|
|
1812
|
+
pos_in_120d_range = round(
|
|
1813
|
+
(last_close - low_120d_raw) / (high_120d_raw - low_120d_raw), 3
|
|
1814
|
+
)
|
|
1815
|
+
|
|
1816
|
+
# v0.5.0 P1-1 — RPS / 大盘相对 alpha. F10 — 5d / 20d / 60d。
|
|
1817
|
+
baseline_close_by_date = baseline_close_by_date or {}
|
|
1818
|
+
alpha_5d_pct = _compute_alpha_pct(history, baseline_close_by_date, 5)
|
|
1819
|
+
alpha_20d_pct = _compute_alpha_pct(history, baseline_close_by_date, 20)
|
|
1820
|
+
alpha_60d_pct = _compute_alpha_pct(history, baseline_close_by_date, 60)
|
|
1821
|
+
rel_strength_label = _classify_rel_strength(alpha_20d_pct)
|
|
1822
|
+
|
|
1823
|
+
# Base / washout features — find the最近 anomaly day (T) and the platform before it
|
|
1824
|
+
# The异动 day is `trade_date` itself (or the最近 row matching T). The "base" is
|
|
1825
|
+
# the period between the previous notable up-move and T.
|
|
1826
|
+
t_idx = next(
|
|
1827
|
+
(i for i, r in enumerate(history) if str(r.get("trade_date")) == trade_date),
|
|
1828
|
+
len(history) - 1,
|
|
1829
|
+
)
|
|
1830
|
+
# v0.3.0 PR-2 — keep the base/washout window at 60d even though `history`
|
|
1831
|
+
# is now up to 250d, so `base_*` field semantics stay backward-compatible.
|
|
1832
|
+
base_window_size = 60
|
|
1833
|
+
if t_idx > 0:
|
|
1834
|
+
base_start = max(0, t_idx - base_window_size)
|
|
1835
|
+
base_window_pre_t = history[base_start:t_idx]
|
|
1836
|
+
else:
|
|
1837
|
+
base_window_pre_t = []
|
|
1838
|
+
|
|
1839
|
+
# base_days = consecutive days before T where pct_chg is moderate (|pct_chg| < 4)
|
|
1840
|
+
base_days = 0
|
|
1841
|
+
for r in reversed(base_window_pre_t):
|
|
1842
|
+
if abs(float(r.get("pct_chg") or 0)) < 4.0:
|
|
1843
|
+
base_days += 1
|
|
1844
|
+
else:
|
|
1845
|
+
break
|
|
1846
|
+
|
|
1847
|
+
# Drawdown within base window: (max_close - min_close) / max_close * 100
|
|
1848
|
+
base_closes = [float(r["close"]) for r in base_window_pre_t if r.get("close") is not None]
|
|
1849
|
+
base_max_drawdown_pct = None
|
|
1850
|
+
base_avg_vol = None
|
|
1851
|
+
base_vol_shrink_ratio = None
|
|
1852
|
+
base_avg_turnover_rate = None
|
|
1853
|
+
if base_closes:
|
|
1854
|
+
bmax = max(base_closes)
|
|
1855
|
+
bmin = min(base_closes)
|
|
1856
|
+
if bmax > 0:
|
|
1857
|
+
base_max_drawdown_pct = round((bmax - bmin) / bmax * 100, 2)
|
|
1858
|
+
base_vols_pre = [float(r["vol"]) for r in base_window_pre_t if r.get("vol") is not None]
|
|
1859
|
+
if base_vols_pre:
|
|
1860
|
+
base_avg_vol = round(sum(base_vols_pre) / len(base_vols_pre), 2)
|
|
1861
|
+
# Compare平均 of整理后期 vs 整理前期 — shrinkage indicator
|
|
1862
|
+
if len(base_vols_pre) >= 10:
|
|
1863
|
+
half = len(base_vols_pre) // 2
|
|
1864
|
+
early = sum(base_vols_pre[:half]) / max(half, 1)
|
|
1865
|
+
late = sum(base_vols_pre[half:]) / max(len(base_vols_pre) - half, 1)
|
|
1866
|
+
if early > 0:
|
|
1867
|
+
base_vol_shrink_ratio = round(late / early, 2)
|
|
1868
|
+
|
|
1869
|
+
# days_since_last_limit_up — strictly before T
|
|
1870
|
+
prior_limit_ups = [d for d in limit_up_dates if d < trade_date]
|
|
1871
|
+
days_since_last_limit_up: int | None = None
|
|
1872
|
+
if prior_limit_ups:
|
|
1873
|
+
days_since_last_limit_up = _calendar_days_between(prior_limit_ups[-1], trade_date)
|
|
1874
|
+
|
|
1875
|
+
# Recent 5 days OHLCV (verbatim, for form reference)
|
|
1876
|
+
recent5 = [
|
|
1877
|
+
{
|
|
1878
|
+
"date": str(r.get("trade_date")),
|
|
1879
|
+
"open": round2(r.get("open")),
|
|
1880
|
+
"high": round2(r.get("high")),
|
|
1881
|
+
"low": round2(r.get("low")),
|
|
1882
|
+
"close": round2(r.get("close")),
|
|
1883
|
+
"pct_chg": round2(r.get("pct_chg")),
|
|
1884
|
+
"vol": _opt_int(r.get("vol")),
|
|
1885
|
+
}
|
|
1886
|
+
for r in history[-5:]
|
|
1887
|
+
]
|
|
1888
|
+
|
|
1889
|
+
# Moneyflow summary
|
|
1890
|
+
mf_summary: dict[str, Any] = {}
|
|
1891
|
+
if moneyflow_5d:
|
|
1892
|
+
net_amounts = [float(r.get("net_mf_amount") or 0) for r in moneyflow_5d]
|
|
1893
|
+
elg_amounts = [float(r.get("buy_elg_amount") or 0) for r in moneyflow_5d]
|
|
1894
|
+
lg_amounts = [float(r.get("buy_lg_amount") or 0) for r in moneyflow_5d]
|
|
1895
|
+
cum_net_yi = round(sum(net_amounts) / 1e4, 3) # 万元 → 亿
|
|
1896
|
+
cum_elg_lg_yi = round(sum(elg_amounts + lg_amounts) / 1e4, 3)
|
|
1897
|
+
# trend: increasing if last3 > first2 mean
|
|
1898
|
+
trend = "flat"
|
|
1899
|
+
if len(net_amounts) >= 5:
|
|
1900
|
+
first2 = sum(net_amounts[:2]) / 2
|
|
1901
|
+
last3 = sum(net_amounts[-3:]) / 3
|
|
1902
|
+
if last3 > first2 * 1.2:
|
|
1903
|
+
trend = "rising"
|
|
1904
|
+
elif last3 < first2 * 0.8:
|
|
1905
|
+
trend = "falling"
|
|
1906
|
+
mf_summary = {
|
|
1907
|
+
"cum_net_mf_yi": cum_net_yi,
|
|
1908
|
+
"cum_elg_plus_lg_buy_yi": cum_elg_lg_yi,
|
|
1909
|
+
"net_mf_trend": trend,
|
|
1910
|
+
"rows_used": len(moneyflow_5d),
|
|
1911
|
+
}
|
|
1912
|
+
else:
|
|
1913
|
+
mf_summary = {"rows_used": 0}
|
|
1914
|
+
|
|
1915
|
+
tracked_days = _calendar_days_between(watchlist_row["tracked_since"], trade_date)
|
|
1916
|
+
return {
|
|
1917
|
+
"candidate_id": watchlist_row["ts_code"],
|
|
1918
|
+
"ts_code": watchlist_row["ts_code"],
|
|
1919
|
+
"name": watchlist_row.get("name"),
|
|
1920
|
+
"industry": watchlist_row.get("industry"),
|
|
1921
|
+
"tracked_since": watchlist_row["tracked_since"],
|
|
1922
|
+
"tracked_days": tracked_days,
|
|
1923
|
+
# T-day snapshot (from watchlist row — the异动 day metrics)
|
|
1924
|
+
"anomaly_day": watchlist_row.get("last_screened"),
|
|
1925
|
+
"anomaly_pct_chg": watchlist_row.get("last_pct_chg"),
|
|
1926
|
+
"anomaly_body_ratio": watchlist_row.get("last_body_ratio"),
|
|
1927
|
+
"anomaly_turnover_rate": watchlist_row.get("last_turnover_rate"),
|
|
1928
|
+
"anomaly_vol_ratio_5d": watchlist_row.get("last_vol_ratio_5d"),
|
|
1929
|
+
# Latest market data
|
|
1930
|
+
"last_close": round2(last_close),
|
|
1931
|
+
"ma5": ma5,
|
|
1932
|
+
"ma10": ma10,
|
|
1933
|
+
"ma20": ma20,
|
|
1934
|
+
"ma60": ma60,
|
|
1935
|
+
"above_ma20": above_ma20,
|
|
1936
|
+
"above_ma60": above_ma60,
|
|
1937
|
+
"high_60d": high_60d,
|
|
1938
|
+
"low_60d": low_60d,
|
|
1939
|
+
"range_pct_60d": range_pct_60d,
|
|
1940
|
+
"pct_chg_60d": pct_chg_60d,
|
|
1941
|
+
# v0.3.0 P0-3 — VCP波动率收敛
|
|
1942
|
+
"atr_10d_pct": atr_10d_pct,
|
|
1943
|
+
"atr_10d_quantile_in_60d": atr_10d_quantile_in_60d,
|
|
1944
|
+
"bbw_20d": bbw_20d,
|
|
1945
|
+
"bbw_compression_ratio": bbw_compression_ratio,
|
|
1946
|
+
# v0.5.0 P1-1 — RPS / 大盘相对 alpha
|
|
1947
|
+
"alpha_5d_pct": alpha_5d_pct,
|
|
1948
|
+
"alpha_20d_pct": alpha_20d_pct,
|
|
1949
|
+
"alpha_60d_pct": alpha_60d_pct,
|
|
1950
|
+
"baseline_index_code": baseline_index_code,
|
|
1951
|
+
"rel_strength_label": rel_strength_label,
|
|
1952
|
+
# v0.3.0 P0-4 — 120d/250d 阻力位 (E3-A:不补 low_250d / pos_in_250d_range)
|
|
1953
|
+
"high_120d": high_120d,
|
|
1954
|
+
"high_250d": high_250d,
|
|
1955
|
+
"low_120d": low_120d,
|
|
1956
|
+
"dist_to_120d_high_pct": dist_to_120d_high_pct,
|
|
1957
|
+
"dist_to_250d_high_pct": dist_to_250d_high_pct,
|
|
1958
|
+
"is_above_120d_high": is_above_120d_high,
|
|
1959
|
+
"is_above_250d_high": is_above_250d_high,
|
|
1960
|
+
"pos_in_120d_range": pos_in_120d_range,
|
|
1961
|
+
# Washout / base features (the user's要求 #7 维度)
|
|
1962
|
+
"base_days": base_days,
|
|
1963
|
+
"base_max_drawdown_pct": base_max_drawdown_pct,
|
|
1964
|
+
"base_avg_vol": base_avg_vol,
|
|
1965
|
+
"base_vol_shrink_ratio": base_vol_shrink_ratio,
|
|
1966
|
+
"base_avg_turnover_rate": base_avg_turnover_rate,
|
|
1967
|
+
"days_since_last_limit_up": days_since_last_limit_up,
|
|
1968
|
+
"prior_limit_up_count_60d": len(prior_limit_ups),
|
|
1969
|
+
# Latest daily_basic
|
|
1970
|
+
"turnover_rate_t": round2(daily_basic.get("turnover_rate")),
|
|
1971
|
+
"volume_ratio_t": round2(daily_basic.get("volume_ratio")),
|
|
1972
|
+
"pe_t": round2(daily_basic.get("pe")),
|
|
1973
|
+
"pb_t": round2(daily_basic.get("pb")),
|
|
1974
|
+
"circ_mv_yi": normalize_to_yi("circ_mv", daily_basic.get("circ_mv")),
|
|
1975
|
+
"total_mv_yi": normalize_to_yi("total_mv", daily_basic.get("total_mv")),
|
|
1976
|
+
# Recent 5 OHLCV verbatim
|
|
1977
|
+
"recent_5d": recent5,
|
|
1978
|
+
# Moneyflow摘要 (5d)
|
|
1979
|
+
"moneyflow_5d_summary": mf_summary,
|
|
1980
|
+
}
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
# ---------------------------------------------------------------------------
|
|
1984
|
+
# EVALUATE MODE — T+N realized-return computation (v0.4.0 P1-3)
|
|
1985
|
+
# ---------------------------------------------------------------------------
|
|
1986
|
+
|
|
1987
|
+
|
|
1988
|
+
def _resolve_horizon_dates(
|
|
1989
|
+
calendar: TradeCalendar,
|
|
1990
|
+
anomaly_date: str,
|
|
1991
|
+
horizons: tuple[int, ...] = EVALUATE_HORIZONS,
|
|
1992
|
+
) -> dict[int, str]:
|
|
1993
|
+
"""For each horizon n, resolve the trade_date that is n trade days AFTER
|
|
1994
|
+
``anomaly_date`` (skipping non-open days, including holidays / weekends).
|
|
1995
|
+
|
|
1996
|
+
Returns ``{n: yyyymmdd_string}`` for every requested horizon. Raises
|
|
1997
|
+
``ValueError`` only when the calendar has no future trade days at all
|
|
1998
|
+
(which would indicate the calendar fixture is too short).
|
|
1999
|
+
"""
|
|
2000
|
+
out: dict[int, str] = {}
|
|
2001
|
+
cursor = anomaly_date
|
|
2002
|
+
advanced = 0
|
|
2003
|
+
target_n = max(horizons)
|
|
2004
|
+
while advanced < target_n:
|
|
2005
|
+
cursor = calendar.next_open(cursor)
|
|
2006
|
+
advanced += 1
|
|
2007
|
+
if advanced in horizons:
|
|
2008
|
+
out[advanced] = cursor
|
|
2009
|
+
return out
|
|
2010
|
+
|
|
2011
|
+
|
|
2012
|
+
def _compute_realized_returns(
|
|
2013
|
+
*,
|
|
2014
|
+
t_close: float | None,
|
|
2015
|
+
horizon_closes: dict[int, float | None],
|
|
2016
|
+
window_5d_closes: list[float | None],
|
|
2017
|
+
window_10d_closes: list[float | None],
|
|
2018
|
+
) -> dict[str, float | None]:
|
|
2019
|
+
"""Convert raw OHLCV inputs into the realised-return scalar metrics
|
|
2020
|
+
persisted in ``va_realized_returns``.
|
|
2021
|
+
|
|
2022
|
+
Args:
|
|
2023
|
+
t_close: T-day close (basis for all percentage calcs).
|
|
2024
|
+
horizon_closes: ``{1: c1, 3: c3, 5: c5, 10: c10}``. Any missing horizon
|
|
2025
|
+
value is OK — it surfaces as ``None`` in the result.
|
|
2026
|
+
window_5d_closes: ordered closes for T+1..T+5 (length ≤ 5; may
|
|
2027
|
+
contain ``None`` for suspended days).
|
|
2028
|
+
window_10d_closes: same idea for T+1..T+10.
|
|
2029
|
+
|
|
2030
|
+
Output keys: ``ret_t1`` ``ret_t3`` ``ret_t5`` ``ret_t10`` ``max_close_5d``
|
|
2031
|
+
``max_close_10d`` ``max_ret_5d`` ``max_ret_10d`` ``max_dd_5d``.
|
|
2032
|
+
"""
|
|
2033
|
+
|
|
2034
|
+
def _pct(num: float | None) -> float | None:
|
|
2035
|
+
if num is None or t_close is None or t_close <= 0:
|
|
2036
|
+
return None
|
|
2037
|
+
return round((num / t_close - 1) * 100, 2)
|
|
2038
|
+
|
|
2039
|
+
out: dict[str, float | None] = {
|
|
2040
|
+
"ret_t1": _pct(horizon_closes.get(1)),
|
|
2041
|
+
"ret_t3": _pct(horizon_closes.get(3)),
|
|
2042
|
+
"ret_t5": _pct(horizon_closes.get(5)),
|
|
2043
|
+
"ret_t10": _pct(horizon_closes.get(10)),
|
|
2044
|
+
}
|
|
2045
|
+
valid_5 = [c for c in window_5d_closes if c is not None]
|
|
2046
|
+
valid_10 = [c for c in window_10d_closes if c is not None]
|
|
2047
|
+
out["max_close_5d"] = round(max(valid_5), 3) if valid_5 else None
|
|
2048
|
+
out["max_close_10d"] = round(max(valid_10), 3) if valid_10 else None
|
|
2049
|
+
out["max_ret_5d"] = _pct(out["max_close_5d"])
|
|
2050
|
+
out["max_ret_10d"] = _pct(out["max_close_10d"])
|
|
2051
|
+
# G2 决策: max_dd from T = (min(close[T+1..T+5]) - t_close) / t_close × 100
|
|
2052
|
+
out["max_dd_5d"] = (
|
|
2053
|
+
_pct(min(valid_5)) if valid_5 else None
|
|
2054
|
+
)
|
|
2055
|
+
return out
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
def _classify_data_status(
|
|
2059
|
+
*,
|
|
2060
|
+
horizon_closes: dict[int, float | None],
|
|
2061
|
+
horizons: tuple[int, ...],
|
|
2062
|
+
today: str,
|
|
2063
|
+
horizon_dates: dict[int, str],
|
|
2064
|
+
) -> str:
|
|
2065
|
+
"""Determine ``data_status`` per the v3 G5 rule:
|
|
2066
|
+
|
|
2067
|
+
* ``pending`` — T+1 trade_date is still in the future (no horizon column
|
|
2068
|
+
can possibly be filled yet).
|
|
2069
|
+
* ``partial`` — max_horizon trade_date is in the future, OR any reachable
|
|
2070
|
+
horizon row has missing close (suspension / data gap).
|
|
2071
|
+
* ``complete`` — max_horizon is in the past AND every horizon was filled.
|
|
2072
|
+
"""
|
|
2073
|
+
if not horizon_dates:
|
|
2074
|
+
return "pending"
|
|
2075
|
+
h1_date = horizon_dates.get(min(horizons))
|
|
2076
|
+
if h1_date is not None and h1_date > today:
|
|
2077
|
+
return "pending"
|
|
2078
|
+
max_n = max(horizons)
|
|
2079
|
+
max_date = horizon_dates.get(max_n)
|
|
2080
|
+
max_reached = max_date is not None and max_date <= today
|
|
2081
|
+
all_filled = all(horizon_closes.get(n) is not None for n in horizons)
|
|
2082
|
+
if max_reached and all_filled:
|
|
2083
|
+
return "complete"
|
|
2084
|
+
return "partial"
|
|
2085
|
+
|
|
2086
|
+
|
|
2087
|
+
def fetch_anomaly_dates_within_lookback(
|
|
2088
|
+
db: Any, *, today: str, lookback_days: int
|
|
2089
|
+
) -> list[tuple[str, str]]:
|
|
2090
|
+
"""Return ``[(anomaly_date, ts_code)]`` for every va_anomaly_history row
|
|
2091
|
+
whose ``anomaly_date`` is within the trailing ``lookback_days`` calendar
|
|
2092
|
+
days of ``today``."""
|
|
2093
|
+
cutoff = _shift_calendar_days(today, -int(lookback_days))
|
|
2094
|
+
rows = db.fetchall(
|
|
2095
|
+
"SELECT trade_date, ts_code FROM va_anomaly_history "
|
|
2096
|
+
"WHERE trade_date >= ? ORDER BY trade_date, ts_code",
|
|
2097
|
+
(cutoff,),
|
|
2098
|
+
)
|
|
2099
|
+
return [(str(r[0]), str(r[1])) for r in rows]
|
|
2100
|
+
|
|
2101
|
+
|
|
2102
|
+
def upsert_realized_return(
|
|
2103
|
+
db: Any,
|
|
2104
|
+
*,
|
|
2105
|
+
anomaly_date: str,
|
|
2106
|
+
ts_code: str,
|
|
2107
|
+
t_close: float | None,
|
|
2108
|
+
horizon_closes: dict[int, float | None],
|
|
2109
|
+
metrics: dict[str, float | None],
|
|
2110
|
+
data_status: str,
|
|
2111
|
+
) -> None:
|
|
2112
|
+
"""UPSERT one row into ``va_realized_returns``."""
|
|
2113
|
+
db.execute(
|
|
2114
|
+
"DELETE FROM va_realized_returns WHERE anomaly_date=? AND ts_code=?",
|
|
2115
|
+
(anomaly_date, ts_code),
|
|
2116
|
+
)
|
|
2117
|
+
db.execute(
|
|
2118
|
+
"INSERT INTO va_realized_returns(anomaly_date, ts_code, t_close, "
|
|
2119
|
+
"t1_close, t3_close, t5_close, t10_close, "
|
|
2120
|
+
"ret_t1, ret_t3, ret_t5, ret_t10, "
|
|
2121
|
+
"max_close_5d, max_close_10d, max_ret_5d, max_ret_10d, max_dd_5d, "
|
|
2122
|
+
"data_status) "
|
|
2123
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
2124
|
+
(
|
|
2125
|
+
anomaly_date,
|
|
2126
|
+
ts_code,
|
|
2127
|
+
t_close,
|
|
2128
|
+
horizon_closes.get(1),
|
|
2129
|
+
horizon_closes.get(3),
|
|
2130
|
+
horizon_closes.get(5),
|
|
2131
|
+
horizon_closes.get(10),
|
|
2132
|
+
metrics.get("ret_t1"),
|
|
2133
|
+
metrics.get("ret_t3"),
|
|
2134
|
+
metrics.get("ret_t5"),
|
|
2135
|
+
metrics.get("ret_t10"),
|
|
2136
|
+
metrics.get("max_close_5d"),
|
|
2137
|
+
metrics.get("max_close_10d"),
|
|
2138
|
+
metrics.get("max_ret_5d"),
|
|
2139
|
+
metrics.get("max_ret_10d"),
|
|
2140
|
+
metrics.get("max_dd_5d"),
|
|
2141
|
+
data_status,
|
|
2142
|
+
),
|
|
2143
|
+
)
|
|
2144
|
+
|
|
2145
|
+
|
|
2146
|
+
def fetch_completed_realized_keys(db: Any) -> set[tuple[str, str]]:
|
|
2147
|
+
"""Return ``{(anomaly_date, ts_code)}`` for every row with
|
|
2148
|
+
``data_status='complete'`` — used to skip work on subsequent evaluate
|
|
2149
|
+
runs (idempotency)."""
|
|
2150
|
+
rows = db.fetchall(
|
|
2151
|
+
"SELECT anomaly_date, ts_code FROM va_realized_returns "
|
|
2152
|
+
"WHERE data_status = 'complete'"
|
|
2153
|
+
)
|
|
2154
|
+
return {(str(r[0]), str(r[1])) for r in rows}
|