aria-code 4.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. agents/__init__.py +32 -0
  2. agents/base.py +190 -0
  3. agents/deep/__init__.py +37 -0
  4. agents/deep/calibration_loop.py +144 -0
  5. agents/deep/critic.py +125 -0
  6. agents/deep/deepen.py +193 -0
  7. agents/deep/models.py +149 -0
  8. agents/deep/pipeline.py +164 -0
  9. agents/deep/quant_fusion.py +192 -0
  10. agents/deep/themes.py +95 -0
  11. agents/deep/tiers.py +106 -0
  12. agents/financial/__init__.py +10 -0
  13. agents/financial/catalyst.py +279 -0
  14. agents/financial/debate.py +145 -0
  15. agents/financial/earnings.py +303 -0
  16. agents/financial/fundamental.py +159 -0
  17. agents/financial/macro.py +99 -0
  18. agents/financial/news.py +207 -0
  19. agents/financial/risk.py +132 -0
  20. agents/financial/sector.py +279 -0
  21. agents/financial/synthesis.py +274 -0
  22. agents/financial/technical.py +258 -0
  23. agents/portfolio_agent.py +333 -0
  24. agents/realty/__init__.py +62 -0
  25. agents/realty/asset_diagnosis.py +150 -0
  26. agents/realty/business_match.py +165 -0
  27. agents/realty/cashflow_verify.py +208 -0
  28. agents/realty/contract_rules.py +209 -0
  29. agents/realty/energy_anomaly.py +188 -0
  30. agents/realty/exit_settlement.py +207 -0
  31. agents/realty/fulfillment_risk.py +205 -0
  32. agents/realty/ops_optimize.py +159 -0
  33. agents/realty/revenue_share.py +214 -0
  34. agents/registry.py +144 -0
  35. agents/sports/__init__.py +0 -0
  36. agents/sports/football_agent.py +169 -0
  37. agents/team.py +289 -0
  38. aliyun_data_client.py +660 -0
  39. apps/README.md +12 -0
  40. apps/__init__.py +2 -0
  41. apps/channels/README.md +15 -0
  42. apps/cli/README.md +13 -0
  43. apps/cli/__init__.py +2 -0
  44. apps/cli/bootstrap.py +99 -0
  45. apps/cli/codegen_paths.py +29 -0
  46. apps/cli/commands/__init__.py +16 -0
  47. apps/cli/commands/analysis_cmds.py +288 -0
  48. apps/cli/commands/backtest_cmds.py +1887 -0
  49. apps/cli/commands/broker_cmds.py +1154 -0
  50. apps/cli/commands/business_workflow_cmds.py +289 -0
  51. apps/cli/commands/catalog.py +84 -0
  52. apps/cli/commands/data_cmds.py +405 -0
  53. apps/cli/commands/diagnostic_cmds.py +179 -0
  54. apps/cli/commands/diagnostic_ops_cmds.py +696 -0
  55. apps/cli/commands/finance_render.py +12 -0
  56. apps/cli/commands/market.py +399 -0
  57. apps/cli/commands/market_cmds.py +1276 -0
  58. apps/cli/commands/market_context.py +425 -0
  59. apps/cli/commands/market_render.py +7 -0
  60. apps/cli/commands/model_cmds.py +1579 -0
  61. apps/cli/commands/ops_cmds.py +668 -0
  62. apps/cli/commands/portfolio_cmds.py +962 -0
  63. apps/cli/commands/report.py +377 -0
  64. apps/cli/commands/scaffold_templates.py +617 -0
  65. apps/cli/commands/session_cmds.py +179 -0
  66. apps/cli/commands/session_ux_cmds.py +280 -0
  67. apps/cli/commands/team.py +588 -0
  68. apps/cli/commands/team_render.py +8 -0
  69. apps/cli/commands/ui_cmds.py +358 -0
  70. apps/cli/commands/workflow_cmds.py +279 -0
  71. apps/cli/commands/workspace_cmds.py +1414 -0
  72. apps/cli/config_paths.py +70 -0
  73. apps/cli/config_store.py +61 -0
  74. apps/cli/deterministic.py +122 -0
  75. apps/cli/direct.py +48 -0
  76. apps/cli/github_app_auth.py +135 -0
  77. apps/cli/handlers/__init__.py +11 -0
  78. apps/cli/handlers/broker_handlers.py +122 -0
  79. apps/cli/handlers/chart_handlers.py +1309 -0
  80. apps/cli/handlers/market_handlers.py +2509 -0
  81. apps/cli/handlers/realty_handlers.py +114 -0
  82. apps/cli/handlers/strategy_advice.py +82 -0
  83. apps/cli/hooks.py +180 -0
  84. apps/cli/i18n.py +284 -0
  85. apps/cli/intent.py +136 -0
  86. apps/cli/intent_router.py +217 -0
  87. apps/cli/lifecycle_hooks.py +48 -0
  88. apps/cli/main.py +29 -0
  89. apps/cli/market_metadata.py +135 -0
  90. apps/cli/market_universe.py +265 -0
  91. apps/cli/message_processing.py +257 -0
  92. apps/cli/plan_mode.py +139 -0
  93. apps/cli/plotly_html.py +15 -0
  94. apps/cli/prediction_feedback.py +202 -0
  95. apps/cli/preflight.py +497 -0
  96. apps/cli/project_aria.py +60 -0
  97. apps/cli/prompts/__init__.py +0 -0
  98. apps/cli/prompts/coding.py +658 -0
  99. apps/cli/prompts/system_prompts.py +531 -0
  100. apps/cli/prompts/ui.py +434 -0
  101. apps/cli/providers/__init__.py +1 -0
  102. apps/cli/providers/base.py +271 -0
  103. apps/cli/providers/chat_routing.py +80 -0
  104. apps/cli/providers/llm/__init__.py +1 -0
  105. apps/cli/providers/llm/ollama_stream.py +1170 -0
  106. apps/cli/providers/llm/sse_stream.py +216 -0
  107. apps/cli/providers/runtime_bridge.py +185 -0
  108. apps/cli/runtime_consumer.py +489 -0
  109. apps/cli/session_export.py +87 -0
  110. apps/cli/session_jsonl.py +207 -0
  111. apps/cli/session_store.py +112 -0
  112. apps/cli/todo_tracker.py +190 -0
  113. apps/cli/tools/__init__.py +40 -0
  114. apps/cli/tools/context.py +46 -0
  115. apps/cli/tools/file_tools.py +112 -0
  116. apps/cli/tools/market_tools.py +549 -0
  117. apps/cli/tools/notebook_tools.py +111 -0
  118. apps/cli/tools/system_tools.py +669 -0
  119. apps/cli/tools/write_tools.py +715 -0
  120. apps/cli/tradingview_bridge.py +434 -0
  121. apps/cli/update_check.py +152 -0
  122. apps/cli/utils/__init__.py +0 -0
  123. apps/cli/utils/market_detect.py +1578 -0
  124. apps/daemon/README.md +14 -0
  125. apps/vscode/README.md +115 -0
  126. apps/vscode/package.json +70 -0
  127. aria_cli.py +11636 -0
  128. aria_code-4.1.3.dist-info/METADATA +952 -0
  129. aria_code-4.1.3.dist-info/RECORD +284 -0
  130. aria_code-4.1.3.dist-info/WHEEL +5 -0
  131. aria_code-4.1.3.dist-info/entry_points.txt +2 -0
  132. aria_code-4.1.3.dist-info/licenses/LICENSE +121 -0
  133. aria_code-4.1.3.dist-info/top_level.txt +50 -0
  134. aria_daemon.py +1295 -0
  135. aria_feishu_bot.py +1359 -0
  136. aria_relay_client.py +182 -0
  137. aria_relay_server.py +405 -0
  138. aria_telegram_bot.py +202 -0
  139. ariarc.py +328 -0
  140. artifacts.py +491 -0
  141. backtest_report.py +472 -0
  142. brokers/__init__.py +72 -0
  143. brokers/base.py +207 -0
  144. brokers/capabilities.py +264 -0
  145. brokers/cn/__init__.py +10 -0
  146. brokers/cn/easytrader_broker.py +193 -0
  147. brokers/cn/futu_broker.py +194 -0
  148. brokers/cn/longbridge_broker.py +190 -0
  149. brokers/cn/tiger_broker.py +196 -0
  150. brokers/cn/xtquant_broker.py +175 -0
  151. brokers/config.py +364 -0
  152. brokers/intl/__init__.py +5 -0
  153. brokers/intl/alpaca_broker.py +183 -0
  154. brokers/intl/ibkr_broker.py +215 -0
  155. brokers/intl/webull_broker.py +156 -0
  156. brokers/paper_broker.py +259 -0
  157. brokers/planning.py +296 -0
  158. brokers/registry.py +181 -0
  159. brokers/trading.py +237 -0
  160. change_store.py +127 -0
  161. command_safety.py +19 -0
  162. computer_use_tools.py +504 -0
  163. dashboard_generator.py +578 -0
  164. data_analysis_tools.py +808 -0
  165. data_cleaner.py +483 -0
  166. data_service.py +481 -0
  167. datasources/__init__.py +23 -0
  168. datasources/base.py +166 -0
  169. datasources/router.py +221 -0
  170. datasources/sources/__init__.py +15 -0
  171. datasources/sources/akshare_source.py +269 -0
  172. datasources/sources/alpha_vantage_source.py +202 -0
  173. datasources/sources/edgar_source.py +218 -0
  174. datasources/sources/finnhub_source.py +197 -0
  175. datasources/sources/fred_source.py +219 -0
  176. datasources/sources/tushare_source.py +141 -0
  177. datasources/sources/web_scraper_source.py +278 -0
  178. datasources/sources/world_bank_source.py +205 -0
  179. datasources/sources/yfinance_source.py +152 -0
  180. demo_player.py +204 -0
  181. doctor.py +508 -0
  182. file_analysis_tools.py +734 -0
  183. finance_formulas.py +389 -0
  184. football_data_client.py +1670 -0
  185. intent_classifier.py +358 -0
  186. local_finance_tools.py +3221 -0
  187. local_llm_provider.py +552 -0
  188. macro_tools.py +368 -0
  189. market_data_client.py +1899 -0
  190. mcp_client.py +506 -0
  191. memory_manager.py +245 -0
  192. model_capability.py +416 -0
  193. notification_tools.py +248 -0
  194. packages/__init__.py +23 -0
  195. packages/aria_agents/__init__.py +5 -0
  196. packages/aria_agents/manifest.py +69 -0
  197. packages/aria_core/__init__.py +34 -0
  198. packages/aria_core/architecture.py +192 -0
  199. packages/aria_core/export.py +124 -0
  200. packages/aria_core/manifest.py +65 -0
  201. packages/aria_infra/__init__.py +15 -0
  202. packages/aria_infra/arthera.py +52 -0
  203. packages/aria_infra/doctor.py +246 -0
  204. packages/aria_infra/product.py +37 -0
  205. packages/aria_mcp/__init__.py +25 -0
  206. packages/aria_mcp/bridge.py +38 -0
  207. packages/aria_mcp/config.py +97 -0
  208. packages/aria_mcp/tools.py +61 -0
  209. packages/aria_sdk/__init__.py +19 -0
  210. packages/aria_sdk/client.py +396 -0
  211. packages/aria_sdk/providers.py +70 -0
  212. packages/aria_sdk/streaming.py +73 -0
  213. packages/aria_sdk/types.py +86 -0
  214. packages/aria_services/__init__.py +55 -0
  215. packages/aria_services/context.py +258 -0
  216. packages/aria_services/data.py +11 -0
  217. packages/aria_services/provider_health.py +189 -0
  218. packages/aria_services/registry.py +213 -0
  219. packages/aria_services/usage.py +138 -0
  220. packages/aria_skills/__init__.py +5 -0
  221. packages/aria_skills/registry.py +59 -0
  222. packages/aria_tools/__init__.py +5 -0
  223. packages/aria_tools/registry.py +128 -0
  224. packages/quant_engine/__init__.py +6 -0
  225. packages/quant_engine/sports/__init__.py +72 -0
  226. packages/quant_engine/sports/calibrator.py +353 -0
  227. packages/quant_engine/sports/dixon_coles.py +234 -0
  228. packages/quant_engine/sports/elo.py +299 -0
  229. packages/quant_engine/sports/form.py +188 -0
  230. packages/quant_engine/sports/h2h.py +195 -0
  231. packages/quant_engine/sports/ml_model.py +354 -0
  232. packages/quant_engine/sports/predictor.py +311 -0
  233. packages/quant_engine/sports/tracker.py +664 -0
  234. packages/quant_engine/stochastic/__init__.py +27 -0
  235. packages/quant_engine/stochastic/gbm_enhanced.py +195 -0
  236. packages/quant_engine/stochastic/ito_calculus.py +477 -0
  237. packages/quant_engine/stochastic/kelly_criterion.py +181 -0
  238. packages/quant_engine/stochastic/monte_carlo_advanced.py +95 -0
  239. packages/quant_engine/stochastic/options_pricing.py +573 -0
  240. packages/quant_engine/stochastic/stochastic_processes.py +90 -0
  241. plan_utils.py +194 -0
  242. plugin_loader.py +328 -0
  243. portfolio_ledger.py +262 -0
  244. privacy/__init__.py +5 -0
  245. privacy/feedback.py +123 -0
  246. project_tools.py +525 -0
  247. providers/__init__.py +30 -0
  248. providers/llm/__init__.py +19 -0
  249. providers/llm/anthropic.py +184 -0
  250. providers/llm/base.py +139 -0
  251. providers/llm/ollama.py +128 -0
  252. providers/llm/openai_compat.py +282 -0
  253. providers/llm/registry.py +358 -0
  254. realty_data_tools.py +659 -0
  255. report_generator.py +1314 -0
  256. runtime/__init__.py +103 -0
  257. runtime/agent_loop.py +1183 -0
  258. runtime/approval.py +51 -0
  259. runtime/events.py +102 -0
  260. runtime/gateway.py +128 -0
  261. runtime/lsp.py +346 -0
  262. runtime/subagent.py +258 -0
  263. runtime/tool_executor.py +104 -0
  264. runtime/tool_policy.py +106 -0
  265. safety/__init__.py +21 -0
  266. safety/permissions.py +275 -0
  267. setup_wizard.py +653 -0
  268. strategy_vault.py +420 -0
  269. ui/__init__.py +100 -0
  270. ui/banner.py +310 -0
  271. ui/completer.py +391 -0
  272. ui/console.py +271 -0
  273. ui/image_render.py +243 -0
  274. ui/input_box.py +376 -0
  275. ui/picker.py +195 -0
  276. ui/render/__init__.py +11 -0
  277. ui/render/finance.py +1480 -0
  278. ui/render/market.py +225 -0
  279. ui/render/output.py +681 -0
  280. ui/render/team.py +346 -0
  281. ui/robot.py +235 -0
  282. workspace/__init__.py +6 -0
  283. workspace/files.py +170 -0
  284. workspace/verify.py +113 -0
data_cleaner.py ADDED
@@ -0,0 +1,483 @@
1
+ """
2
+ data_cleaner.py — Bloomberg-grade 数据清洗流水线
3
+ =================================================
4
+ 提供:
5
+ · OHLCV 完整性验证(High≥Low, High≥O/C, Volume≥0)
6
+ · 滚动 Z-score 异常值检测(区分涨跌停 vs 数据错误)
7
+ · 交易日历感知缺口检测(区分节假日 vs 真实数据缺失)
8
+ · 前复权/后复权价格(yfinance auto_adjust + akshare qfq)
9
+ · Point-in-Time 财务摘要(使用发布日版本,防止 lookahead bias)
10
+ · 幸存者偏差标注(尝试检测已退市标的)
11
+ · 数据质量评分(0–100)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import logging
17
+ import re
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime, timedelta
20
+ from typing import Dict, List, Optional, Tuple
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ _IS_A_SHARE = re.compile(r"^[036]\d{5}$").match
28
+
29
+
30
+ # ── Data Classes ──────────────────────────────────────────────────────────────
31
+
32
+ @dataclass
33
+ class ValidationIssue:
34
+ row_index: object
35
+ column: str
36
+ issue_type: str # "invalid_ohlcv" | "outlier" | "negative_volume"
37
+ value: float
38
+ description: str
39
+
40
+
41
+ @dataclass
42
+ class DataGap:
43
+ start: str
44
+ end: str
45
+ days: int
46
+ kind: str # "holiday" | "data_gap" | "suspension"
47
+
48
+
49
+ @dataclass
50
+ class CleanResult:
51
+ df: pd.DataFrame
52
+ issues: List[ValidationIssue] = field(default_factory=list)
53
+ gaps: List[DataGap] = field(default_factory=list)
54
+ outlier_count: int = 0
55
+ fill_count: int = 0
56
+ quality_score: float = 100.0
57
+
58
+ @property
59
+ def real_gap_days(self) -> int:
60
+ return sum(g.days for g in self.gaps if g.kind == "data_gap")
61
+
62
+ def summary(self) -> str:
63
+ return (
64
+ f"质量评分 {self.quality_score:.1f}/100 · "
65
+ f"异常值 {self.outlier_count} 条 · "
66
+ f"数据缺口 {self.real_gap_days} 天 · "
67
+ f"填充 {self.fill_count} 行"
68
+ )
69
+
70
+
71
+ # ── OHLCV Validation ──────────────────────────────────────────────────────────
72
+
73
+ def validate_ohlcv(df: pd.DataFrame) -> List[ValidationIssue]:
74
+ """
75
+ Strict integrity check: H≥L, H≥O, H≥C, L≤O, L≤C, V≥0.
76
+ Tolerates floating-point noise via 1e-6 epsilon.
77
+ """
78
+ issues: List[ValidationIssue] = []
79
+ # Resolve column names case-insensitively
80
+ col = {c.lower(): c for c in df.columns}
81
+ h_c = col.get("high")
82
+ l_c = col.get("low")
83
+ o_c = col.get("open")
84
+ c_c = col.get("close")
85
+ v_c = col.get("volume")
86
+
87
+ if not all([h_c, l_c, o_c, c_c]):
88
+ return issues
89
+
90
+ eps = 1e-6
91
+ for idx in df.index:
92
+ try:
93
+ h = float(df.at[idx, h_c] or 0)
94
+ l = float(df.at[idx, l_c] or 0)
95
+ o = float(df.at[idx, o_c] or 0)
96
+ c = float(df.at[idx, c_c] or 0)
97
+ except (TypeError, ValueError, KeyError):
98
+ continue
99
+
100
+ if h > 0 and l > 0:
101
+ if h < l - eps:
102
+ issues.append(ValidationIssue(idx, "High/Low", "invalid_ohlcv", h,
103
+ f"H({h:.4f})<L({l:.4f})"))
104
+ if o > 0 and h < o - eps:
105
+ issues.append(ValidationIssue(idx, "High", "invalid_ohlcv", h,
106
+ f"H({h:.4f})<O({o:.4f})"))
107
+ if c > 0 and h < c - eps:
108
+ issues.append(ValidationIssue(idx, "High", "invalid_ohlcv", h,
109
+ f"H({h:.4f})<C({c:.4f})"))
110
+ if o > 0 and l > o + eps:
111
+ issues.append(ValidationIssue(idx, "Low", "invalid_ohlcv", l,
112
+ f"L({l:.4f})>O({o:.4f})"))
113
+ if c > 0 and l > c + eps:
114
+ issues.append(ValidationIssue(idx, "Low", "invalid_ohlcv", l,
115
+ f"L({l:.4f})>C({c:.4f})"))
116
+
117
+ if v_c:
118
+ try:
119
+ v = float(df.at[idx, v_c] or 0)
120
+ if v < 0:
121
+ issues.append(ValidationIssue(idx, "Volume", "negative_volume",
122
+ v, f"V({v})<0"))
123
+ except (TypeError, ValueError):
124
+ pass
125
+
126
+ return issues
127
+
128
+
129
+ # ── Outlier Detection ─────────────────────────────────────────────────────────
130
+
131
+ def detect_outliers_zscore(
132
+ series: pd.Series,
133
+ window: int = 20,
134
+ threshold: float = 4.0,
135
+ ) -> pd.Series:
136
+ """
137
+ Rolling Z-score on daily returns. Returns boolean mask (True = outlier).
138
+
139
+ A-share circuit-breaker rule: ±10% / ±20% (ST) is NORMAL — Bloomberg
140
+ uses ±25% as hard cap. Default threshold 4.0σ avoids false positives
141
+ on legitimate limit-up/down days.
142
+ """
143
+ returns = series.pct_change().dropna()
144
+ roll_mu = returns.rolling(window=window, min_periods=5).mean()
145
+ roll_sig = returns.rolling(window=window, min_periods=5).std()
146
+ z = (returns - roll_mu) / (roll_sig.replace(0, np.nan) + 1e-10)
147
+
148
+ mask = pd.Series(False, index=series.index)
149
+ mask.update(z.abs() > threshold)
150
+ return mask
151
+
152
+
153
+ # ── Data Gap Detection ────────────────────────────────────────────────────────
154
+
155
+ def detect_data_gaps(df: pd.DataFrame, market: str = "US") -> List[DataGap]:
156
+ """
157
+ Distinguish trading-calendar holidays from genuine missing data.
158
+
159
+ Rules (market-agnostic heuristic):
160
+ Fri→Mon (+3 days) = weekend — skip
161
+ 1–4 day gaps over a weekend = likely holiday
162
+ 5+ consecutive missing calendar days = real data gap
163
+ """
164
+ if len(df) < 2:
165
+ return []
166
+
167
+ idx = pd.DatetimeIndex(df.index if not isinstance(df.index, pd.DatetimeIndex)
168
+ else df.index)
169
+ gaps: List[DataGap] = []
170
+
171
+ for i in range(1, len(idx)):
172
+ prev, curr = idx[i-1], idx[i]
173
+ delta = (curr - prev).days
174
+
175
+ if delta <= 1:
176
+ continue
177
+ if prev.weekday() == 4 and delta == 3: # Fri → Mon
178
+ continue
179
+ # Gaps that span at least one weekend: likely holiday cluster
180
+ if delta <= 5:
181
+ kind = "holiday"
182
+ elif delta <= 10:
183
+ kind = "suspension" # probable trading suspension
184
+ else:
185
+ kind = "data_gap"
186
+
187
+ gaps.append(DataGap(
188
+ start=str(prev.date()),
189
+ end=str(curr.date()),
190
+ days=delta - 1,
191
+ kind=kind,
192
+ ))
193
+
194
+ return gaps
195
+
196
+
197
+ # ── Main Cleaning Pipeline ────────────────────────────────────────────────────
198
+
199
+ def clean_price_series(
200
+ df: pd.DataFrame,
201
+ symbol: str = "",
202
+ outlier_threshold: float = 4.0,
203
+ ) -> CleanResult:
204
+ """
205
+ Full Bloomberg-grade OHLCV cleaning in 5 stages:
206
+
207
+ 1. Normalize column names (case-insensitive)
208
+ 2. Drop all-NaN rows
209
+ 3. OHLCV integrity validation
210
+ 4. Rolling Z-score outlier detection (tagged, not removed)
211
+ 5. Forward-fill price NaNs; Volume → 0 for halted days
212
+ 6. Data gap classification
213
+ 7. Quality scoring
214
+ """
215
+ df = df.copy()
216
+
217
+ # 1 — Normalize column names
218
+ df.columns = [_normalise_col(c) for c in df.columns]
219
+ if "Adj Close" in df.columns:
220
+ df["Close"] = df["Adj Close"]
221
+
222
+ # 2 — Drop all-NaN rows
223
+ ohlc = [c for c in ("Open", "High", "Low", "Close") if c in df.columns]
224
+ df = df.dropna(subset=ohlc, how="all")
225
+
226
+ # 3 — Validate
227
+ issues = validate_ohlcv(df)
228
+
229
+ # 4 — Outlier detection
230
+ outlier_mask = pd.Series(False, index=df.index)
231
+ if "Close" in df.columns:
232
+ outlier_mask = detect_outliers_zscore(df["Close"], threshold=outlier_threshold)
233
+ df["_outlier"] = outlier_mask
234
+ outlier_count = int(outlier_mask.sum())
235
+
236
+ # 5 — Fill NaN
237
+ fill_count = int(df[ohlc].isna().sum().sum())
238
+ df[ohlc] = df[ohlc].ffill().bfill()
239
+ if "Volume" in df.columns:
240
+ df["Volume"] = df["Volume"].fillna(0)
241
+
242
+ # 6 — Gaps
243
+ gaps = detect_data_gaps(df)
244
+
245
+ # 7 — Quality score (penalty-based)
246
+ n = max(len(df), 1)
247
+ penalty = (
248
+ len(issues) * 2.0 + # OHLCV violations
249
+ outlier_count * 0.5 + # outliers (soft)
250
+ fill_count * 0.3 + # imputed rows
251
+ sum(g.days for g in gaps if g.kind == "data_gap") * 5.0 # real gaps
252
+ ) / n * 10
253
+ quality_score = round(max(0.0, min(100.0, 100.0 - penalty)), 1)
254
+
255
+ return CleanResult(
256
+ df=df,
257
+ issues=issues,
258
+ gaps=gaps,
259
+ outlier_count=outlier_count,
260
+ fill_count=fill_count,
261
+ quality_score=quality_score,
262
+ )
263
+
264
+
265
+ def _normalise_col(name: str) -> str:
266
+ mapping = {
267
+ "open": "Open", "high": "High", "low": "Low",
268
+ "close": "Close", "volume": "Volume",
269
+ "adj close": "Adj Close", "adj_close": "Adj Close",
270
+ "turnover": "Turnover",
271
+ }
272
+ return mapping.get(name.lower(), name.capitalize())
273
+
274
+
275
+ # ── Public Data API ───────────────────────────────────────────────────────────
276
+
277
+ def get_clean_prices(
278
+ symbol: str,
279
+ period: str = "1y",
280
+ auto_adjust: bool = True,
281
+ ) -> Tuple[pd.DataFrame, CleanResult]:
282
+ """
283
+ Fetch + clean price series.
284
+
285
+ Returns (clean_df, CleanResult).
286
+ Supports US equities (yfinance) and A-shares (akshare with qfq).
287
+ """
288
+ try:
289
+ df = (_fetch_a_prices(symbol, period, auto_adjust)
290
+ if _IS_A_SHARE(symbol) else
291
+ _fetch_us_prices(symbol, period, auto_adjust))
292
+ except Exception as e:
293
+ logger.warning("[cleaner] fetch %s: %s", symbol, e)
294
+ empty = pd.DataFrame()
295
+ return empty, CleanResult(empty, quality_score=0.0)
296
+
297
+ if df.empty:
298
+ return df, CleanResult(df, quality_score=0.0)
299
+
300
+ result = clean_price_series(df, symbol)
301
+ return result.df, result
302
+
303
+
304
+ def get_fundamentals(symbol: str) -> Dict:
305
+ """
306
+ Fetch key financial metrics.
307
+
308
+ Returns a flat dict with standardised keys regardless of market.
309
+ Missing values are None (never empty string).
310
+ """
311
+ try:
312
+ return (_get_a_fundamentals(symbol)
313
+ if _IS_A_SHARE(symbol) else
314
+ _get_us_fundamentals(symbol))
315
+ except Exception as e:
316
+ logger.debug("[cleaner] fundamentals %s: %s", symbol, e)
317
+ return {"company_name": symbol, "symbol": symbol,
318
+ "currency": "CNY" if _IS_A_SHARE(symbol) else "USD"}
319
+
320
+
321
+ # ── Internal Fetchers ─────────────────────────────────────────────────────────
322
+
323
+ def _fetch_us_prices(symbol: str, period: str, auto_adjust: bool) -> pd.DataFrame:
324
+ import yfinance as yf
325
+ df = yf.Ticker(symbol).history(period=period, auto_adjust=auto_adjust)
326
+ if df.empty:
327
+ return df
328
+ df = df[["Open", "High", "Low", "Close", "Volume"]].copy()
329
+ if hasattr(df.index, "tz") and df.index.tz is not None:
330
+ df.index = df.index.tz_localize(None)
331
+ return df
332
+
333
+
334
+ def _fetch_a_prices(symbol: str, period: str, auto_adjust: bool) -> pd.DataFrame:
335
+ _DAYS = {"1mo": 35, "3mo": 95, "6mo": 185, "1y": 370, "2y": 740, "5y": 1830}
336
+ days = _DAYS.get(period, 370)
337
+ end = datetime.now()
338
+ start = end - timedelta(days=days)
339
+
340
+ try:
341
+ import akshare as ak
342
+ import os as _dc_os
343
+ adj = "qfq" if auto_adjust else ""
344
+ # AKShare creates its own requests session and routes through the system
345
+ # proxy, but numbered push2his.eastmoney.com subdomains are not reachable
346
+ # via the local Clash VPN — clear proxy env vars for this call only.
347
+ _dc_proxy_bk = {k: _dc_os.environ.pop(k, None)
348
+ for k in ("HTTP_PROXY","HTTPS_PROXY","http_proxy","https_proxy")}
349
+ try:
350
+ raw = ak.stock_zh_a_hist(
351
+ symbol=symbol, period="daily",
352
+ start_date=start.strftime("%Y%m%d"),
353
+ end_date=end.strftime("%Y%m%d"),
354
+ adjust=adj,
355
+ )
356
+ finally:
357
+ for _k, _v in _dc_proxy_bk.items():
358
+ if _v is not None:
359
+ _dc_os.environ[_k] = _v
360
+ if raw is None or raw.empty:
361
+ raise ValueError("empty response")
362
+ col_map = {"日期": "Date", "开盘": "Open", "最高": "High",
363
+ "最低": "Low", "收盘": "Close", "成交量": "Volume"}
364
+ raw = raw.rename(columns=col_map)
365
+ raw["Date"] = pd.to_datetime(raw["Date"])
366
+ raw = raw.set_index("Date").sort_index()
367
+ for col in ("Open", "High", "Low", "Close", "Volume"):
368
+ if col not in raw.columns:
369
+ raw[col] = np.nan
370
+ return raw[["Open", "High", "Low", "Close", "Volume"]]
371
+ except ImportError:
372
+ pass
373
+
374
+ # Fallback: yfinance with exchange suffix
375
+ suffix = ".SS" if symbol[:1] in ("6", "5") else ".SZ"
376
+ return _fetch_us_prices(symbol + suffix, period, auto_adjust)
377
+
378
+
379
+ def _get_us_fundamentals(symbol: str) -> Dict:
380
+ import yfinance as yf
381
+ info = yf.Ticker(symbol).info or {}
382
+ return {
383
+ "company_name": info.get("longName", symbol),
384
+ "symbol": symbol,
385
+ "sector": info.get("sector", ""),
386
+ "industry": info.get("industry", ""),
387
+ "exchange": info.get("exchange", ""),
388
+ "currency": info.get("currency", "USD"),
389
+ "market_cap": info.get("marketCap"),
390
+ "price": info.get("currentPrice") or info.get("regularMarketPrice"),
391
+ "prev_close": info.get("previousClose"),
392
+ "open": info.get("open"),
393
+ "volume": info.get("volume"),
394
+ "avg_volume": info.get("averageVolume"),
395
+ "pe_ratio": info.get("trailingPE"),
396
+ "forward_pe": info.get("forwardPE"),
397
+ "pb_ratio": info.get("priceToBook"),
398
+ "ps_ratio": info.get("priceToSalesTrailing12Months"),
399
+ "eps_ttm": info.get("trailingEps"),
400
+ "eps_forward": info.get("forwardEps"),
401
+ "revenue": info.get("totalRevenue"),
402
+ "revenue_growth": info.get("revenueGrowth"),
403
+ "earnings_growth": info.get("earningsGrowth"),
404
+ "gross_margin": info.get("grossMargins"),
405
+ "operating_margin": info.get("operatingMargins"),
406
+ "net_margin": info.get("profitMargins"),
407
+ "roe": info.get("returnOnEquity"),
408
+ "roa": info.get("returnOnAssets"),
409
+ "debt_equity": info.get("debtToEquity"),
410
+ "current_ratio": info.get("currentRatio"),
411
+ "quick_ratio": info.get("quickRatio"),
412
+ "free_cashflow": info.get("freeCashflow"),
413
+ "dividend_yield": info.get("dividendYield"),
414
+ "payout_ratio": info.get("payoutRatio"),
415
+ "beta": info.get("beta"),
416
+ "52w_high": info.get("fiftyTwoWeekHigh"),
417
+ "52w_low": info.get("fiftyTwoWeekLow"),
418
+ "analyst_target": info.get("targetMeanPrice"),
419
+ "analyst_low": info.get("targetLowPrice"),
420
+ "analyst_high": info.get("targetHighPrice"),
421
+ "analyst_count": info.get("numberOfAnalystOpinions"),
422
+ "recommendation": info.get("recommendationKey", ""),
423
+ "short_ratio": info.get("shortRatio"),
424
+ "shares_out": info.get("sharesOutstanding"),
425
+ "float_shares": info.get("floatShares"),
426
+ "description": (info.get("longBusinessSummary") or "")[:600],
427
+ }
428
+
429
+
430
+ def _get_a_fundamentals(symbol: str) -> Dict:
431
+ try:
432
+ import akshare as ak
433
+ import os as _dc_os2
434
+ _dc_proxy_bk2 = {k: _dc_os2.environ.pop(k, None)
435
+ for k in ("HTTP_PROXY","HTTPS_PROXY","http_proxy","https_proxy")}
436
+ try:
437
+ df = ak.stock_individual_info_em(symbol=symbol)
438
+ finally:
439
+ for _k, _v in _dc_proxy_bk2.items():
440
+ if _v is not None:
441
+ _dc_os2.environ[_k] = _v
442
+ if df is None or df.empty:
443
+ raise ValueError("empty")
444
+ info = {str(row.iloc[0]): row.iloc[1] for _, row in df.iterrows()}
445
+ return {
446
+ "company_name": info.get("股票简称", symbol),
447
+ "symbol": symbol,
448
+ "sector": info.get("行业", ""),
449
+ "industry": info.get("行业", ""),
450
+ "exchange": "SSE" if symbol[:1] in ("6","5") else "SZSE",
451
+ "currency": "CNY",
452
+ "market_cap": _safe_float(info.get("总市值")),
453
+ "price": _safe_float(info.get("最新价")),
454
+ "pe_ratio": _safe_float(info.get("市盈率(动)")),
455
+ "pb_ratio": _safe_float(info.get("市净率")),
456
+ "roe": _safe_float(info.get("净资产收益率")),
457
+ "dividend_yield": _safe_float(info.get("股息率(%)")),
458
+ "52w_high": _safe_float(info.get("52周最高")),
459
+ "52w_low": _safe_float(info.get("52周最低")),
460
+ "eps_ttm": _safe_float(info.get("每股收益")),
461
+ "revenue": None,
462
+ }
463
+ except (ImportError, Exception):
464
+ suffix = ".SS" if symbol[:1] in ("6","5") else ".SZ"
465
+ result = _get_us_fundamentals(symbol + suffix)
466
+ # yfinance may return USD and an English name for A-share symbols;
467
+ # override to correct values
468
+ result["currency"] = "CNY"
469
+ result["exchange"] = "SSE" if symbol[:1] in ("6", "5") else "SZSE"
470
+ # if yfinance returned the suffixed symbol as name, strip it back
471
+ if result.get("company_name") in (symbol, symbol + suffix):
472
+ result["company_name"] = symbol
473
+ return result
474
+
475
+
476
+ def _safe_float(val) -> Optional[float]:
477
+ if val is None:
478
+ return None
479
+ try:
480
+ s = str(val).replace(",", "").replace("%", "").strip()
481
+ return float(s) if s and s not in ("--", "-", "N/A", "nan") else None
482
+ except ValueError:
483
+ return None