cyberagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cyberagent/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ """cyberagent — TradingAgents for every market.
2
+
3
+ A 5-department LLM analyst chain unified across stocks (A-share / HK / US) and
4
+ crypto (token / contract address). The analytical soul is a physical-bottleneck
5
+ reverse-consensus 5-step chain. Bring your own LLM key.
6
+
7
+ from cyberagent import AnalystChain
8
+
9
+ chain = AnalystChain(llm="gemini", api_key="...", lang="zh")
10
+ report = await chain.analyze("NVDA")
11
+ print(report.final_decision, report.confidence)
12
+ print(report.departments["industry"].markdown)
13
+
14
+ Public API:
15
+ AnalystChain — main entry, await chain.analyze(symbol)
16
+ AssetClassifier — unified routing for A-share / HK / US / crypto / EVM
17
+ classify — functional form of the classifier
18
+ LLMAdapter — OpenAI / Gemini / Claude / DeepSeek + custom
19
+ MockLLM — offline adapter for tests/examples
20
+ AnalystReport / DeptReport / AssetInfo — Pydantic structured output
21
+ """
22
+
23
+ from .chain import AnalystChain
24
+ from .classifier import AssetClassifier, classify
25
+ from .llm_adapter import LLMAdapter, MockLLM
26
+ from .models import AnalystReport, AssetInfo, DeptReport
27
+
28
+ __version__ = "0.1.0"
29
+
30
+ __all__ = [
31
+ "__version__",
32
+ "AnalystChain",
33
+ "AssetClassifier",
34
+ "classify",
35
+ "LLMAdapter",
36
+ "MockLLM",
37
+ "AnalystReport",
38
+ "DeptReport",
39
+ "AssetInfo",
40
+ ]
@@ -0,0 +1,31 @@
1
+ """Data adapters — route an AssetInfo to the right source and return a markdown
2
+ data block the departments can read.
3
+
4
+ fetch(asset_info) -> (data_md, meta)
5
+
6
+ All adapters degrade gracefully: on missing libs / no network / errors they
7
+ return ("", {"company_name": code}) so the analyst chain still runs on LLM
8
+ reasoning alone.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Tuple
14
+
15
+ from ..models import AssetInfo
16
+ from . import crypto, stock
17
+
18
+ __all__ = ["fetch", "crypto", "stock"]
19
+
20
+
21
+ async def fetch(asset_info: AssetInfo, *, timeout: float = 10.0) -> Tuple[str, dict]:
22
+ """Return (data_md, meta) for an asset. meta always has 'company_name'."""
23
+ t = asset_info.type
24
+ try:
25
+ if t in ("token", "evm_contract", "solana_contract"):
26
+ return await crypto.fetch(asset_info, timeout=timeout)
27
+ if t in ("stock_cn", "stock_us", "stock_hk"):
28
+ return await stock.fetch(asset_info, timeout=timeout)
29
+ except Exception:
30
+ pass
31
+ return "", {"company_name": asset_info.code}
@@ -0,0 +1,96 @@
1
+ """Crypto data adapter — CoinGecko (no key, free tier).
2
+
3
+ Returns a markdown data block for a token. Network/errors degrade gracefully to
4
+ ("", {"company_name": code}) so the chain can still run on LLM reasoning alone.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Tuple
10
+
11
+ from ..models import AssetInfo
12
+
13
+ _CG = "https://api.coingecko.com/api/v3"
14
+
15
+
16
+ async def fetch(asset_info: AssetInfo, *, timeout: float = 10.0) -> Tuple[str, dict]:
17
+ code = asset_info.code
18
+ cgid = asset_info.coingecko_id
19
+ meta = {"company_name": code}
20
+
21
+ if asset_info.type in ("evm_contract", "solana_contract"):
22
+ md = (
23
+ f"### On-chain asset\n- Address: {code}\n- Chain: {asset_info.chain}\n"
24
+ "- No token summary available without an indexer; rely on reasoning + search.\n"
25
+ )
26
+ return md, meta
27
+
28
+ try:
29
+ import httpx
30
+ except ImportError:
31
+ return "", meta
32
+
33
+ try:
34
+ async with httpx.AsyncClient(timeout=timeout) as client:
35
+ if not cgid:
36
+ r = await client.get(f"{_CG}/search", params={"query": code})
37
+ coins = (r.json() or {}).get("coins", [])
38
+ if coins:
39
+ cgid = coins[0]["id"]
40
+ if not cgid:
41
+ return "", meta
42
+ r = await client.get(
43
+ f"{_CG}/coins/{cgid}",
44
+ params={"localization": "false", "tickers": "false", "market_data": "true",
45
+ "community_data": "false", "developer_data": "false", "sparkline": "false"},
46
+ )
47
+ d = r.json() or {}
48
+ except Exception:
49
+ return "", meta
50
+
51
+ name = d.get("name") or code
52
+ meta["company_name"] = name
53
+ m = d.get("market_data") or {}
54
+
55
+ def usd(block):
56
+ return (m.get(block) or {}).get("usd")
57
+
58
+ desc = ((d.get("description") or {}).get("en") or "").strip().replace("\n", " ")[:400]
59
+ cats = ", ".join([c for c in (d.get("categories") or []) if c][:6])
60
+
61
+ chg24 = m.get("price_change_percentage_24h")
62
+ chg7 = m.get("price_change_percentage_7d")
63
+ chg30 = m.get("price_change_percentage_30d")
64
+ ath_chg = (m.get("ath_change_percentage") or {}).get("usd")
65
+ flags = []
66
+ try:
67
+ if chg24 is not None and chg24 >= 20: flags.append(f"24h {chg24:+.0f}%")
68
+ if chg7 is not None and chg7 >= 40: flags.append(f"7d {chg7:+.0f}%")
69
+ if chg30 is not None and chg30 >= 80: flags.append(f"30d {chg30:+.0f}%")
70
+ if ath_chg is not None and ath_chg >= -8: flags.append("near all-time high")
71
+ except Exception:
72
+ pass
73
+ flag_md = ""
74
+ if flags:
75
+ flag_md = ("- ⚠ **PARABOLIC / NARRATIVE-MOVE FLAG**: " + "; ".join(flags) +
76
+ ". You MUST search WHY (catalyst / listing / KOL / unlock) and treat a "
77
+ "headline-driven spike as an AVOID/observe form, not a buy form.\n")
78
+ meta["price_flags"] = flags
79
+
80
+ md = (
81
+ f"### Token: {name} ({(d.get('symbol') or '').upper()})\n"
82
+ f"- Narrative / categories: {cats or 'N/A'}\n"
83
+ f"- Price (USD): {usd('current_price')}\n"
84
+ f"- Market cap (USD): {usd('market_cap')}\n"
85
+ f"- Fully diluted valuation (USD): {usd('fully_diluted_valuation')}\n"
86
+ f"- 24h volume (USD): {usd('total_volume')}\n"
87
+ f"\n#### Price action (CHECK BEFORE THESIS)\n"
88
+ f"- 24h / 7d / 30d change %: {chg24} / {chg7} / {chg30}\n"
89
+ f"- ATH (USD): {usd('ath')} | from ATH %: {ath_chg}\n"
90
+ f"{flag_md}"
91
+ f"\n#### Supply\n"
92
+ f"- Circulating: {m.get('circulating_supply')} | Total: {m.get('total_supply')} | Max: {m.get('max_supply')}\n"
93
+ f"\n#### Description\n{desc or 'N/A'}\n"
94
+ f"\n*(source: CoinGecko)*\n"
95
+ )
96
+ return md, meta
@@ -0,0 +1,130 @@
1
+ """Stock data adapter — yfinance (no key; needs network).
2
+
3
+ Covers US / HK / A-share via yfinance symbol mapping. Degrades gracefully to
4
+ ("", {"company_name": code}) when yfinance is missing or offline.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Tuple
10
+
11
+ from ..models import AssetInfo
12
+
13
+
14
+ def _yf_symbol(asset_info: AssetInfo) -> str:
15
+ code = asset_info.code
16
+ if asset_info.type == "stock_cn":
17
+ # 600519.SH -> 600519.SS (Shanghai); .SZ / .BJ kept as yfinance expects
18
+ return code.replace(".SH", ".SS")
19
+ return code # stock_us as-is; stock_hk already like 0700.HK
20
+
21
+
22
+ async def fetch(asset_info: AssetInfo, *, timeout: float = 10.0) -> Tuple[str, dict]:
23
+ import asyncio
24
+ from datetime import datetime, timezone
25
+
26
+ code = asset_info.code
27
+ meta = {"company_name": code}
28
+
29
+ try:
30
+ import yfinance as yf
31
+ except ImportError:
32
+ return "", meta
33
+
34
+ sym = _yf_symbol(asset_info)
35
+
36
+ def _pull():
37
+ try:
38
+ t = yf.Ticker(sym)
39
+ info = t.info or {}
40
+ try:
41
+ hist = t.history(period="6mo", interval="1d")
42
+ except Exception:
43
+ hist = None
44
+ return info, hist
45
+ except Exception:
46
+ return {}, None
47
+
48
+ try:
49
+ info, hist = await asyncio.wait_for(asyncio.to_thread(_pull), timeout=timeout)
50
+ except Exception:
51
+ return "", meta
52
+ if not info:
53
+ return "", meta
54
+
55
+ name = info.get("longName") or info.get("shortName") or code
56
+ meta["company_name"] = name
57
+ price = info.get("currentPrice") or info.get("regularMarketPrice")
58
+ currency = info.get("currency", "")
59
+
60
+ fetched_at = datetime.now().astimezone().strftime("%Y-%m-%d %H:%M %Z")
61
+ mkt_epoch = info.get("regularMarketTime")
62
+ if isinstance(mkt_epoch, (int, float)):
63
+ quote_as_of = datetime.fromtimestamp(mkt_epoch, tz=timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
64
+ else:
65
+ quote_as_of = fetched_at
66
+ meta["fetched_at"] = fetched_at
67
+ meta["price"] = price
68
+
69
+ # ── price action: recent returns + parabolic/spike flag (so the chain can
70
+ # SEE a narrative-driven run-up instead of treating a snapshot as stable) ──
71
+ hi52 = info.get("fiftyTwoWeekHigh")
72
+ lo52 = info.get("fiftyTwoWeekLow")
73
+ pa_lines, flags = [], []
74
+ try:
75
+ if hist is not None and len(hist) > 3:
76
+ close = hist["Close"].dropna()
77
+ last = float(close.iloc[-1])
78
+ for label, n in (("5D", 5), ("1M", 22), ("3M", 66)):
79
+ if len(close) > n:
80
+ chg = (last / float(close.iloc[-n - 1]) - 1) * 100
81
+ pa_lines.append(f" - {label} change: {chg:+.1f}%")
82
+ if label == "1M" and chg >= 40:
83
+ flags.append(f"1-month move {chg:+.0f}% (steep run-up)")
84
+ if label == "5D" and chg >= 25:
85
+ flags.append(f"5-day move {chg:+.0f}% (vertical spike)")
86
+ if hi52 and price and float(price) / float(hi52) >= 0.95:
87
+ flags.append("price within 5% of 52-week high")
88
+ if hi52 and lo52 and float(lo52) > 0 and float(hi52) / float(lo52) >= 3:
89
+ flags.append(f"52-week range is {float(hi52)/float(lo52):.1f}x (very wide)")
90
+ except Exception:
91
+ pass
92
+ pa_block = ""
93
+ if pa_lines or flags:
94
+ pa_block = "\n#### Price action (CHECK BEFORE THESIS)\n" + "\n".join(pa_lines) + "\n"
95
+ if flags:
96
+ pa_block += ("- ⚠ **PARABOLIC / NARRATIVE-MOVE FLAG**: " + "; ".join(flags) +
97
+ ". You MUST search WHY the price moved (catalyst / who said what / news) "
98
+ "and treat a headline-driven spike as an AVOID/observe form, not a buy form.\n")
99
+ meta["price_flags"] = flags
100
+
101
+ md = (
102
+ f"### Company: {name} ({code})\n"
103
+ f"- **Data fetched at: {fetched_at}** (cite this date; do not use a remembered/older price)\n"
104
+ f"- Sector / Industry: {info.get('sector', 'N/A')} / {info.get('industry', 'N/A')}\n"
105
+ f"- Country: {info.get('country', 'N/A')}\n"
106
+ f"- **Price ({currency}, as of {quote_as_of}): {price}**\n"
107
+ f"- Market cap: {info.get('marketCap')}\n"
108
+ f"- 52W high / low: {hi52} / {lo52}\n"
109
+ f"{pa_block}"
110
+ f"\n#### Valuation (note: market usually prices FORWARD)\n"
111
+ f"- P/E (trailing / **forward**): {info.get('trailingPE')} / {info.get('forwardPE')}\n"
112
+ f"- P/B: {info.get('priceToBook')} | P/S (trailing): {info.get('priceToSalesTrailing12Months')}\n"
113
+ f"- EV/EBITDA: {info.get('enterpriseToEbitda')} | PEG: {info.get('pegRatio')}\n"
114
+ f"\n#### Analyst consensus & ownership (cross-check 'is it already priced')\n"
115
+ f"- Mean target / high / low: {info.get('targetMeanPrice')} / {info.get('targetHighPrice')} / {info.get('targetLowPrice')}"
116
+ f" (vs price {price}{' — ⚠ MEAN TARGET BELOW PRICE = overshoot signal' if (info.get('targetMeanPrice') and price and info.get('targetMeanPrice') < price) else ''})\n"
117
+ f"- # analysts: {info.get('numberOfAnalystOpinions')} | recommendation: {info.get('recommendationKey')} (mean {info.get('recommendationMean')})\n"
118
+ f"- Insider held %: {info.get('heldPercentInsiders')} | Institution held %: {info.get('heldPercentInstitutions')}\n"
119
+ f"\n#### Fundamentals\n"
120
+ f"- Total revenue: {info.get('totalRevenue')}\n"
121
+ f"- Revenue growth: {info.get('revenueGrowth')}\n"
122
+ f"- Earnings growth: {info.get('earningsGrowth')}\n"
123
+ f"- Gross margin: {info.get('grossMargins')} | Operating margin: {info.get('operatingMargins')} | Profit margin: {info.get('profitMargins')}\n"
124
+ f"- ROE: {info.get('returnOnEquity')} | ROA: {info.get('returnOnAssets')}\n"
125
+ f"- Free cash flow: {info.get('freeCashflow')} | Operating cash flow: {info.get('operatingCashflow')}\n"
126
+ f"- Total debt: {info.get('totalDebt')} | Debt/Equity: {info.get('debtToEquity')} | Current ratio: {info.get('currentRatio')}\n"
127
+ f"\n#### Business\n{(info.get('longBusinessSummary') or 'N/A')[:600]}\n"
128
+ f"\n*(source: yfinance, symbol {sym})*\n"
129
+ )
130
+ return md, meta
cyberagent/chain.py ADDED
@@ -0,0 +1,143 @@
1
+ """AnalystChain — the public entry point.
2
+
3
+ chain = AnalystChain(llm="gemini", api_key="...", lang="zh")
4
+ report = await chain.analyze("NVDA")
5
+
6
+ Outer shape: classify -> fetch data -> run the 5 departments in sequence (each
7
+ reads the upstream reports) -> parse the strategy dept's final decision ->
8
+ AnalystReport. The analytical soul is the physical-bottleneck 5-step chain
9
+ encoded in ``cyberagent.prompts``.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ import time
16
+ from typing import Optional, Sequence, Union
17
+
18
+ from . import adapters
19
+ from .classifier import classify
20
+ from .depts import run_department, run_positioning
21
+ from .llm_adapter import LLMAdapter, resolve_llm
22
+ from .models import AnalystReport, FinalDecision
23
+ from .prompts import DEPT_ORDER, get_department
24
+
25
+ _DECISIONS = ("ACCUMULATE", "HOLD", "REDUCE", "AVOID")
26
+
27
+
28
+ def _parse_verdict(markdown: str) -> tuple[Optional[FinalDecision], float, Optional[str]]:
29
+ """Best-effort extraction of final_decision / confidence / headline from the
30
+ closing (leaders) department's markdown."""
31
+ decision: Optional[FinalDecision] = None
32
+ up = markdown.upper()
33
+ # Prefer a decision near a "final decision" marker, else first occurrence.
34
+ marker = re.search(r"(FINAL DECISION|最终决策)(.{0,80})", up, re.DOTALL)
35
+ search_space = marker.group(2) if marker else up
36
+ for d in _DECISIONS:
37
+ if d in search_space:
38
+ decision = d # type: ignore[assignment]
39
+ break
40
+ if decision is None:
41
+ for d in _DECISIONS:
42
+ if d in up:
43
+ decision = d # type: ignore[assignment]
44
+ break
45
+
46
+ confidence = 0.0
47
+ # Skip an optional "(0-100)" scale label after the keyword, then grab the real number.
48
+ cm = re.search(
49
+ r"(?:置信度|confidence)\s*(?:[((][^))]*[))])?\s*[::]?\s*\n?\s*(\d{1,3})",
50
+ markdown, re.IGNORECASE,
51
+ )
52
+ if cm:
53
+ try:
54
+ confidence = min(100, int(cm.group(1))) / 100.0
55
+ except ValueError:
56
+ pass
57
+
58
+ headline = None
59
+ hm = re.search(r"(?:headline|反共识)[^\n]*\n+([^\n]{4,160})", markdown, re.IGNORECASE)
60
+ if hm:
61
+ headline = hm.group(1).strip().lstrip("#-* ").strip()
62
+
63
+ return decision, confidence, headline
64
+
65
+
66
+ class AnalystChain:
67
+ def __init__(
68
+ self,
69
+ llm: Union[str, LLMAdapter] = "gemini",
70
+ *,
71
+ api_key: Optional[str] = None,
72
+ model: Optional[str] = None,
73
+ lang: str = "en",
74
+ departments: Optional[Sequence[str]] = None,
75
+ timeout: float = 10.0,
76
+ ):
77
+ self.llm = resolve_llm(llm, api_key=api_key, model=model)
78
+ self.lang = lang
79
+ self.departments = list(departments) if departments else list(DEPT_ORDER)
80
+ self.timeout = timeout
81
+
82
+ async def analyze(
83
+ self,
84
+ symbol: str,
85
+ *,
86
+ lang: Optional[str] = None,
87
+ departments: Optional[Sequence[str]] = None,
88
+ on_event=None,
89
+ ) -> AnalystReport:
90
+ """on_event(stage_key, label, status) — optional progress callback
91
+ (status in {'start','done'}); used by the CLI for live progress."""
92
+ lang = lang or self.lang
93
+ dept_keys = [k for k in DEPT_ORDER if k in (departments or self.departments)]
94
+ t0 = time.perf_counter()
95
+
96
+ def _emit(stage, label, status):
97
+ if on_event:
98
+ try:
99
+ on_event(stage, label, status)
100
+ except Exception:
101
+ pass
102
+
103
+ asset = classify(symbol)
104
+ report = AnalystReport(asset=asset, company_name=asset.code, market=asset.market)
105
+
106
+ if asset.type == "unknown":
107
+ report.success = False
108
+ report.error = f"Could not classify {symbol!r}. Try cn:/us:/hk:/crypto:/evm: prefix."
109
+ report.elapsed_seconds = round(time.perf_counter() - t0, 3)
110
+ return report
111
+
112
+ data_md, meta = await adapters.fetch(asset, timeout=self.timeout)
113
+ report.company_name = meta.get("company_name") or asset.code
114
+
115
+ # Phase 0: core business + physical-world positioning (grounds the whole chain)
116
+ _emit("positioning", "资产定位 / Positioning", "start")
117
+ report.positioning = await run_positioning(
118
+ llm=self.llm, company_name=report.company_name, code=asset.code,
119
+ market=asset.market, data_md=data_md, lang=lang,
120
+ )
121
+ _emit("positioning", "资产定位 / Positioning", "done")
122
+ pos_label = "资产定位 / Positioning"
123
+ grounded_md = f"## {pos_label}\n{report.positioning}\n\n## 原始数据 / Raw data\n{data_md}".strip()
124
+
125
+ prior: dict[str, str] = {}
126
+ for key in dept_keys:
127
+ spec = get_department(key)
128
+ label = spec["display_zh"] if lang == "zh" else spec["display_en"]
129
+ _emit(key, label, "start")
130
+ dr = await run_department(
131
+ key, llm=self.llm, company_name=report.company_name, code=asset.code,
132
+ market=asset.market, data_md=grounded_md, prior_reports=prior, lang=lang,
133
+ )
134
+ report.departments[key] = dr
135
+ prior[key] = dr.markdown
136
+ _emit(key, label, "done")
137
+
138
+ closer = report.departments.get("leaders")
139
+ if closer and closer.success:
140
+ report.final_decision, report.confidence, report.headline = _parse_verdict(closer.markdown)
141
+
142
+ report.elapsed_seconds = round(time.perf_counter() - t0, 3)
143
+ return report
@@ -0,0 +1,202 @@
1
+ """AssetClassifier — identify whether an input is a stock / token / contract
2
+ and route it to the right data adapter.
3
+
4
+ classify("NVDA") -> stock_us code=NVDA
5
+ classify("600519") -> stock_cn code=600519.SH
6
+ classify("0700") -> stock_hk code=0700.HK
7
+ classify("BTC") -> token code=btc coingecko_id=bitcoin
8
+ classify("0x6B17...") -> evm_contract chain=ethereum
9
+ classify("crypto:JUP") -> token code=jup coingecko_id=jupiter-exchange-solana
10
+
11
+ Explicit prefixes force routing: cn: / us: / hk: / crypto: / evm: / sol:
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+
18
+ from .models import AssetInfo
19
+
20
+ # ──────────────────────────────────────────────────────────────────────────
21
+ # Token whitelist (symbol -> coingecko_id). Unknown symbols fall back to the
22
+ # CoinGecko search API at fetch time.
23
+ # ──────────────────────────────────────────────────────────────────────────
24
+ TOKEN_WHITELIST: dict[str, str] = {
25
+ # Top market cap
26
+ "BTC": "bitcoin", "ETH": "ethereum", "USDT": "tether", "BNB": "binancecoin",
27
+ "SOL": "solana", "USDC": "usd-coin", "XRP": "ripple", "DOGE": "dogecoin",
28
+ "ADA": "cardano", "TRX": "tron", "AVAX": "avalanche-2", "TON": "the-open-network",
29
+ "DOT": "polkadot", "MATIC": "matic-network", "LINK": "chainlink",
30
+ "WBTC": "wrapped-bitcoin", "LTC": "litecoin", "BCH": "bitcoin-cash",
31
+ "NEAR": "near", "UNI": "uniswap", "XLM": "stellar", "ETC": "ethereum-classic",
32
+ "ATOM": "cosmos", "XMR": "monero", "FIL": "filecoin", "APT": "aptos",
33
+ "ARB": "arbitrum", "VET": "vechain", "OP": "optimism", "HBAR": "hedera-hashgraph",
34
+ "IMX": "immutable-x", "INJ": "injective-protocol", "SUI": "sui",
35
+ "RNDR": "render-token", "RENDER": "render-token", "GRT": "the-graph", "STX": "blockstack",
36
+ # DeFi blue chips
37
+ "AAVE": "aave", "MKR": "maker", "CRV": "curve-dao-token", "LDO": "lido-dao",
38
+ "COMP": "compound-governance-token", "SNX": "havven", "FXS": "frax-share",
39
+ "PENDLE": "pendle", "BAL": "balancer", "1INCH": "1inch", "RPL": "rocket-pool",
40
+ # L2 / Modular / Restaking
41
+ "TIA": "celestia", "EIGEN": "eigenlayer", "STRK": "starknet", "METIS": "metis-token",
42
+ "MNT": "mantle", "ZK": "zksync", "MANTA": "manta-network", "BLAST": "blast",
43
+ # Solana ecosystem
44
+ "JUP": "jupiter-exchange-solana", "JTO": "jito-governance-token", "PYTH": "pyth-network",
45
+ "WIF": "dogwifcoin", "BONK": "bonk", "POPCAT": "popcat", "W": "wormhole",
46
+ # AI / DePIN narratives
47
+ "TAO": "bittensor", "FET": "fetch-ai", "AGIX": "singularitynet", "OCEAN": "ocean-protocol",
48
+ "AKT": "akash-network", "NMR": "numeraire", "WLD": "worldcoin-wld",
49
+ # Meme
50
+ "PEPE": "pepe", "SHIB": "shiba-inu", "FLOKI": "floki", "BOME": "book-of-meme",
51
+ # RWA / stablecoin
52
+ "ONDO": "ondo-finance", "FRAX": "frax", "DAI": "dai", "TUSD": "true-usd",
53
+ # Others
54
+ "HYPE": "hyperliquid", "KAS": "kaspa", "GMX": "gmx", "DYDX": "dydx-chain",
55
+ "CFX": "conflux-token",
56
+ }
57
+
58
+ _SYMBOL_LOOKUP = {sym.lower(): cgid for sym, cgid in TOKEN_WHITELIST.items()}
59
+
60
+ RE_EVM_CONTRACT = re.compile(r"^0x[a-fA-F0-9]{40}$")
61
+ RE_CN_STOCK = re.compile(r"^(\d{6})(?:\.(SH|SZ|BJ))?$", re.IGNORECASE)
62
+ RE_HK_STOCK = re.compile(r"^(\d{1,5})(?:\.HK)?$", re.IGNORECASE)
63
+ RE_US_STOCK = re.compile(r"^[A-Z]{1,5}(\.[A-Z])?$")
64
+ RE_SOL_ADDR = re.compile(r"^[1-9A-HJ-NP-Za-km-z]{32,44}$")
65
+
66
+
67
+ def classify(raw: str, default_evm_chain: str = "ethereum") -> AssetInfo:
68
+ """Identify the asset type of a raw input. Always returns an AssetInfo
69
+ (type='unknown' rather than raising)."""
70
+ if not raw:
71
+ return AssetInfo(type="unknown", code="", raw_input="", confidence=0.0)
72
+
73
+ s = raw.strip()
74
+
75
+ # 0. Explicit prefix
76
+ if ":" in s:
77
+ prefix, _, rest = s.partition(":")
78
+ prefix = prefix.strip().lower()
79
+ rest = rest.strip()
80
+ if prefix == "cn":
81
+ return _cn_stock(rest, raw, forced=True)
82
+ if prefix == "us":
83
+ return _us_stock(rest, raw, forced=True)
84
+ if prefix == "hk":
85
+ return _hk_stock(rest, raw, forced=True)
86
+ if prefix in ("crypto", "token"):
87
+ return _token(rest, raw, forced=True)
88
+ if prefix == "evm":
89
+ return _evm_contract(rest, raw, default_evm_chain, forced=True)
90
+ if prefix in ("sol", "solana"):
91
+ return _solana(rest, raw, forced=True)
92
+
93
+ # 1. EVM contract (strictest)
94
+ if RE_EVM_CONTRACT.match(s):
95
+ return _evm_contract(s, raw, default_evm_chain, forced=False)
96
+
97
+ # 2. A-share (exactly 6 digits)
98
+ if RE_CN_STOCK.match(s):
99
+ return _cn_stock(s, raw, forced=False)
100
+
101
+ # 3. Token whitelist (before US stock so BTC/ETH/SOL resolve to token)
102
+ if s.lower() in _SYMBOL_LOOKUP:
103
+ return _token(s, raw, forced=False)
104
+
105
+ # 4. US stock (1-5 letters)
106
+ if RE_US_STOCK.match(s.upper()):
107
+ return _us_stock(s.upper(), raw, forced=False)
108
+
109
+ # 5. HK stock (1-5 digits, not 6)
110
+ if RE_HK_STOCK.match(s):
111
+ return _hk_stock(s, raw, forced=False)
112
+
113
+ # 6. Solana address (base58)
114
+ if RE_SOL_ADDR.match(s) and len(s) >= 32:
115
+ return _solana(s, raw, forced=False)
116
+
117
+ return AssetInfo(
118
+ type="unknown", code=s, raw_input=raw, confidence=0.0,
119
+ hints={"reason": "no matching pattern; try cn:/us:/hk:/crypto:/evm: prefix"},
120
+ )
121
+
122
+
123
+ def _cn_stock(s: str, raw: str, forced: bool) -> AssetInfo:
124
+ s = s.strip().upper()
125
+ m = RE_CN_STOCK.match(s)
126
+ if not m:
127
+ digits = "".join(c for c in s if c.isdigit())
128
+ if len(digits) == 6:
129
+ s = digits
130
+ m = RE_CN_STOCK.match(s)
131
+ if m:
132
+ code_6, suffix = m.group(1), m.group(2)
133
+ if not suffix:
134
+ first = code_6[0]
135
+ suffix = "SH" if first in ("6", "9") else ("BJ" if first in ("8", "4") else "SZ")
136
+ return AssetInfo(
137
+ type="stock_cn", code=f"{code_6}.{suffix.upper()}", raw_input=raw,
138
+ market="CN", confidence=1.0 if not forced else 0.95,
139
+ hints={"exchange": suffix.upper()},
140
+ )
141
+ return AssetInfo(type="unknown", code=s, raw_input=raw, confidence=0.0,
142
+ hints={"reason": "forced cn: but not 6-digit A-share format"})
143
+
144
+
145
+ def _hk_stock(s: str, raw: str, forced: bool) -> AssetInfo:
146
+ digits = "".join(c for c in s.upper().replace(".HK", "") if c.isdigit())
147
+ if not digits or len(digits) > 5:
148
+ return AssetInfo(type="unknown", code=s, raw_input=raw, confidence=0.0,
149
+ hints={"reason": "not a 1-5 digit HK code"})
150
+ code = f"{int(digits):04d}.HK" # zero-pad to 4, e.g. 700 -> 0700.HK
151
+ return AssetInfo(type="stock_hk", code=code, raw_input=raw, market="HK",
152
+ confidence=0.9 if forced else 0.7,
153
+ hints={"note": "HK stock (numeric code)"})
154
+
155
+
156
+ def _us_stock(s: str, raw: str, forced: bool) -> AssetInfo:
157
+ return AssetInfo(type="stock_us", code=s.strip().upper(), raw_input=raw, market="US",
158
+ confidence=0.85 if not forced else 0.95,
159
+ hints={"note": "US stock or unknown small-cap ticker"})
160
+
161
+
162
+ def _token(s: str, raw: str, forced: bool) -> AssetInfo:
163
+ sym = s.strip().lower()
164
+ cgid = _SYMBOL_LOOKUP.get(sym)
165
+ if cgid:
166
+ return AssetInfo(type="token", code=sym, raw_input=raw, market="CRYPTO",
167
+ coingecko_id=cgid, confidence=1.0, hints={"source": "whitelist"})
168
+ return AssetInfo(type="token", code=sym, raw_input=raw, market="CRYPTO",
169
+ coingecko_id=None, confidence=0.5 if forced else 0.3,
170
+ hints={"source": "unknown_symbol",
171
+ "note": "not in whitelist; will search CoinGecko at fetch time"})
172
+
173
+
174
+ def _evm_contract(s: str, raw: str, default_chain: str, forced: bool) -> AssetInfo:
175
+ addr = s.strip()
176
+ if not addr.startswith("0x"):
177
+ addr = "0x" + addr
178
+ addr = addr.lower()
179
+ if not RE_EVM_CONTRACT.match(addr):
180
+ return AssetInfo(type="unknown", code=addr, raw_input=raw, confidence=0.0,
181
+ hints={"reason": "not a valid EVM address"})
182
+ return AssetInfo(type="evm_contract", code=addr, raw_input=raw, market="CRYPTO",
183
+ chain=default_chain, confidence=1.0,
184
+ hints={"note": "EVM address; chain defaulted"})
185
+
186
+
187
+ def _solana(s: str, raw: str, forced: bool) -> AssetInfo:
188
+ addr = s.strip()
189
+ if not RE_SOL_ADDR.match(addr):
190
+ return AssetInfo(type="unknown", code=addr, raw_input=raw, confidence=0.0,
191
+ hints={"reason": "not a valid Solana base58 address"})
192
+ return AssetInfo(type="solana_contract", code=addr, raw_input=raw, market="CRYPTO",
193
+ chain="solana", confidence=0.95 if forced else 0.85,
194
+ hints={"note": "Solana base58 address"})
195
+
196
+
197
+ class AssetClassifier:
198
+ """Thin OO wrapper so `from cyberagent import AssetClassifier` works."""
199
+
200
+ @staticmethod
201
+ def classify(raw: str, default_evm_chain: str = "ethereum") -> AssetInfo:
202
+ return classify(raw, default_evm_chain=default_evm_chain)