probar 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
probar/__init__.py ADDED
@@ -0,0 +1,54 @@
1
+ """probar —— 稳定、可观测的 A 股数据接入层。
2
+
3
+ 按数据源拆分独立命名空间,每个命名空间只暴露该源**真实支持**的接口:
4
+
5
+ pb.dc 东方财富(HTTP/JSON,数据最全:实时 / 复权K / 资金流 / 龙虎榜 / 财报)
6
+ pb.tdx 通达信(自写二进制协议,行情底座:批量实时五档 / 历史分钟 / 历史逐笔)
7
+ pb.ths 同花顺(题材增强:问财 / 概念,best-effort 反爬,实验性)
8
+
9
+ pb.capabilities() 返回三源能力矩阵(DataFrame)
10
+
11
+ 各源数据**各自独立、口径可能不一致**:用户按需选源,probar 不做"主源"也不做跨源替换。
12
+ 设计原则见 README。本库为非官方/逆向接口封装,使用前请阅读免责声明。
13
+ """
14
+
15
+ from ._version import __version__
16
+ from .core.capabilities import capabilities
17
+ from .core.errors import (
18
+ NetworkError,
19
+ NoData,
20
+ NotSupported,
21
+ ProbarError,
22
+ RateLimited,
23
+ SchemaChanged,
24
+ )
25
+ from .providers.eastmoney import EastMoney
26
+ from .providers.tdx import Tdx
27
+ from .providers.ths import Ths
28
+
29
+ # 默认实例:绑定到各命名空间。高级用户可自建实例传入配置,如
30
+ # from probar import EastMoney
31
+ # dc = EastMoney(timeout=5, proxy="http://127.0.0.1:7890")
32
+ dc = EastMoney()
33
+ tdx = Tdx()
34
+ ths = Ths()
35
+
36
+ __all__ = [
37
+ "__version__",
38
+ "capabilities",
39
+ # 命名空间(默认实例)
40
+ "dc",
41
+ "tdx",
42
+ "ths",
43
+ # Provider 类(自定义配置时使用)
44
+ "EastMoney",
45
+ "Tdx",
46
+ "Ths",
47
+ # 异常
48
+ "ProbarError",
49
+ "NetworkError",
50
+ "RateLimited",
51
+ "NotSupported",
52
+ "NoData",
53
+ "SchemaChanged",
54
+ ]
probar/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ """probar 共享基础设施层(L1):错误模型、数据契约、传输、限流、缓存、日历、代码归一化、能力矩阵。
2
+
3
+ 各数据源命名空间(dc/tdx/ths)都建立在这一层之上。
4
+ """
probar/core/cache.py ADDED
@@ -0,0 +1,36 @@
1
+ """极简 TTL 内存缓存。
2
+
3
+ 盘中快照走短 TTL,避免重复请求打爆数据源;历史数据的落盘缓存留待后续版本
4
+ (diskcache / sqlite)。
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import threading
10
+ import time
11
+ from typing import Any
12
+
13
+
14
+ class TTLCache:
15
+ def __init__(self, ttl: float = 2.0, maxsize: int = 4096) -> None:
16
+ self.ttl = ttl
17
+ self.maxsize = maxsize
18
+ self._d: dict[Any, tuple[float, Any]] = {}
19
+ self._lock = threading.Lock()
20
+
21
+ def get(self, key: Any) -> Any | None:
22
+ with self._lock:
23
+ item = self._d.get(key)
24
+ if item is None:
25
+ return None
26
+ expire, value = item
27
+ if expire < time.monotonic():
28
+ self._d.pop(key, None)
29
+ return None
30
+ return value
31
+
32
+ def set(self, key: Any, value: Any, ttl: float | None = None) -> None:
33
+ with self._lock:
34
+ if len(self._d) >= self.maxsize:
35
+ self._d.clear() # 简单粗暴的逐出策略,scaffold 够用
36
+ self._d[key] = (time.monotonic() + (ttl if ttl is not None else self.ttl), value)
@@ -0,0 +1,17 @@
1
+ """交易日历(占位实现)。
2
+
3
+ v0.1 仅按工作日粗判;接入交易所节假日表(以及午休/集合竞价时段判断)留待后续版本。
4
+ TODO(v0.2): 内置或拉取 SSE/SZSE 节假日数据,补 ``is_open_now`` / ``previous_trading_day``。
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import date, datetime
10
+
11
+
12
+ def is_trading_day(d: date | datetime | None = None) -> bool:
13
+ """是否为交易日(当前仅排除周末,**未含法定节假日**)。"""
14
+ d = d or date.today()
15
+ if isinstance(d, datetime):
16
+ d = d.date()
17
+ return d.weekday() < 5
@@ -0,0 +1,63 @@
1
+ """三源能力矩阵 —— 定稿。
2
+
3
+ 这是对三个**数据源本身**能力的参考记录(某源能不能提供某类数据),不是方法清单:
4
+ 各命名空间(pb.dc/tdx/ths)按路线图暴露其中**已实现或计划实现**的方法子集;
5
+ 真实可调用的方法以 ``dir(pb.dc)`` / IDE 自动补全为准。
6
+
7
+
8
+ 档位:
9
+ FULL ✅ 强,可做主实现
10
+ PART 🔸 部分/弱/需二次计算
11
+ SOFT ⚠️ 反爬脆,best-effort(同花顺多数能力)
12
+ NONE ❌ 无,命名空间里不提供该接口
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import TYPE_CHECKING
18
+
19
+ if TYPE_CHECKING:
20
+ import pandas as pd
21
+
22
+ FULL, PART, SOFT, NONE = "✅", "🔸", "⚠️", "❌"
23
+
24
+ # capability -> {dc, tdx, ths}
25
+ CAPABILITIES: dict[str, dict[str, str]] = {
26
+ "实时快照 quote": {"dc": FULL, "tdx": FULL, "ths": SOFT},
27
+ "五档盘口(仅L1)": {"dc": FULL, "tdx": FULL, "ths": SOFT},
28
+ "当日分时 intraday": {"dc": FULL, "tdx": FULL, "ths": SOFT},
29
+ "历史分时 intraday_hist": {"dc": PART, "tdx": FULL, "ths": SOFT},
30
+ "当日逐笔 ticks": {"dc": PART, "tdx": FULL, "ths": SOFT},
31
+ "历史逐笔 ticks_hist": {"dc": NONE, "tdx": PART, "ths": NONE},
32
+ "K线 日/周/月": {"dc": FULL, "tdx": FULL, "ths": SOFT},
33
+ "K线 分钟": {"dc": PART, "tdx": FULL, "ths": SOFT},
34
+ "前/后复权 adjust": {"dc": FULL, "tdx": PART, "ths": SOFT},
35
+ "资金流 fund_flow": {"dc": FULL, "tdx": NONE, "ths": SOFT},
36
+ "龙虎榜 lhb": {"dc": FULL, "tdx": NONE, "ths": SOFT},
37
+ "北向/沪深港通 hsgt": {"dc": PART, "tdx": NONE, "ths": SOFT},
38
+ "财务报表/业绩 financials": {"dc": FULL, "tdx": PART, "ths": SOFT},
39
+ "股东/解禁/分红": {"dc": FULL, "tdx": PART, "ths": SOFT},
40
+ "板块/概念成分": {"dc": FULL, "tdx": PART, "ths": SOFT},
41
+ "细粒度概念题材": {"dc": PART, "tdx": PART, "ths": SOFT},
42
+ "自然语言选股 wencai": {"dc": NONE, "tdx": NONE, "ths": SOFT},
43
+ "证券代码表 securities": {"dc": FULL, "tdx": FULL, "ths": PART},
44
+ "除权除息 xdxr": {"dc": FULL, "tdx": FULL, "ths": SOFT},
45
+ "多市场(港美/期货/基金/转债)": {"dc": FULL, "tdx": PART, "ths": SOFT},
46
+ }
47
+
48
+ # 同花顺整体为反爬 best-effort:内容(题材/问财)是三源最强,但抓取可靠性最低。
49
+ NOTES = {
50
+ "ths": "全程反爬(hexin-v),best-effort;问财与细粒度概念题材为其独有价值。",
51
+ "tdx": "无资金流/龙虎榜/北向(协议无此数据域);复权需用 xdxr 自算;逐笔为分笔明细非 L2。",
52
+ "dc": "数据最全(实时/复权/资金流/龙虎榜/财报);北向实时盘中已停披露,仅 EOD/额度。",
53
+ }
54
+
55
+
56
+ def capabilities() -> pd.DataFrame:
57
+ """返回能力矩阵(行=能力,列=dc/tdx/ths)。"""
58
+ import pandas as pd
59
+
60
+ df = pd.DataFrame(CAPABILITIES).T
61
+ df = df[["dc", "tdx", "ths"]]
62
+ df.index.name = "capability"
63
+ return df
probar/core/errors.py ADDED
@@ -0,0 +1,32 @@
1
+ """结构化异常模型。
2
+
3
+ 调用方可按类型精确处理:网络抖动重试、限频退避、源不支持降级、无数据跳过。
4
+ 其中 ``SchemaChanged`` 通常意味着上游接口字段变更 —— 正是每日 canary 巡检要替用户
5
+ 最先发现的那一类。
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+
11
+ class ProbarError(Exception):
12
+ """probar 所有异常的基类。"""
13
+
14
+
15
+ class NetworkError(ProbarError):
16
+ """网络/超时/连接失败(已穷尽重试)。"""
17
+
18
+
19
+ class RateLimited(ProbarError):
20
+ """被数据源限频(如 HTTP 429)。"""
21
+
22
+
23
+ class NotSupported(ProbarError):
24
+ """该数据源不支持此接口(能力矩阵中标记为无)。"""
25
+
26
+
27
+ class NoData(ProbarError):
28
+ """请求合法但无数据(停牌 / 未上市 / 区间无成交)。"""
29
+
30
+
31
+ class SchemaChanged(ProbarError):
32
+ """上游响应结构与预期契约不符,接口可能已变更。"""
probar/core/http.py ADDED
@@ -0,0 +1,69 @@
1
+ """基于 httpx 的同步 HTTP 传输层:统一超时、限流、退避重试、默认请求头。
2
+
3
+ 只负责"把请求安全地发出去、把 JSON 拿回来",不关心字段语义(解析交给各 provider)。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import time
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ from .errors import NetworkError, RateLimited
14
+ from .rate_limit import TokenBucket
15
+
16
+ DEFAULT_HEADERS = {
17
+ "User-Agent": (
18
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
19
+ "(KHTML, like Gecko) Chrome/124.0 Safari/537.36"
20
+ ),
21
+ "Accept": "*/*",
22
+ }
23
+
24
+
25
+ class HttpClient:
26
+ def __init__(
27
+ self,
28
+ *,
29
+ timeout: float = 8.0,
30
+ rate: float = 10.0,
31
+ proxy: str | None = None,
32
+ headers: dict[str, str] | None = None,
33
+ retries: int = 3,
34
+ ) -> None:
35
+ self._bucket = TokenBucket(rate)
36
+ self._retries = max(1, retries)
37
+ client_kwargs: dict[str, Any] = {
38
+ "timeout": timeout,
39
+ "headers": {**DEFAULT_HEADERS, **(headers or {})},
40
+ "follow_redirects": True,
41
+ }
42
+ if proxy: # 仅在显式传入时才加,避免老版 httpx 不识别 proxy/proxies 之别
43
+ client_kwargs["proxy"] = proxy
44
+ self._client = httpx.Client(**client_kwargs)
45
+
46
+ def get_json(
47
+ self, url: str, params: dict[str, Any] | None = None, *, referer: str | None = None
48
+ ) -> Any:
49
+ """限流 + 重试地发起 GET 并解析 JSON。失败穷尽重试后抛 :class:`NetworkError`。"""
50
+ headers = {"Referer": referer} if referer else None
51
+ last_err: Exception | None = None
52
+ for attempt in range(self._retries):
53
+ self._bucket.acquire()
54
+ try:
55
+ resp = self._client.get(url, params=params, headers=headers)
56
+ if resp.status_code == 429:
57
+ raise RateLimited(f"429 Too Many Requests: {url}")
58
+ resp.raise_for_status()
59
+ # 非 JSON(被 WAF 拦截返回 HTML 等)时 .json() 抛 ValueError,纳入重试与分类
60
+ return resp.json()
61
+ except RateLimited:
62
+ raise
63
+ except (httpx.TransportError, httpx.HTTPStatusError, ValueError) as err:
64
+ last_err = err
65
+ time.sleep(0.3 * (attempt + 1))
66
+ raise NetworkError(f"GET {url} 失败(已重试 {self._retries} 次): {last_err!r}")
67
+
68
+ def close(self) -> None:
69
+ self._client.close()
probar/core/models.py ADDED
@@ -0,0 +1,79 @@
1
+ """数据契约(统一核心 schema)。
2
+
3
+ 设计取舍:大表行情**不做逐行 pydantic 校验**,只用轻量的
4
+ "列存在 + dtype" 断言;严格校验留给 canary / ``validate=True``。各源同名接口返回
5
+ 同一套**核心列**,源特有的额外字段放进 ``df.attrs['extras']`` 或 ``raw``。
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Iterable
11
+ from typing import TYPE_CHECKING
12
+
13
+ from .errors import SchemaChanged
14
+
15
+ if TYPE_CHECKING:
16
+ import pandas as pd
17
+
18
+ # 同名接口的核心列契约(跨源一致)
19
+ KLINE_COLUMNS = [
20
+ "symbol",
21
+ "date",
22
+ "open",
23
+ "high",
24
+ "low",
25
+ "close",
26
+ "volume",
27
+ "amount",
28
+ "pct_chg",
29
+ "turnover",
30
+ ]
31
+
32
+ QUOTE_COLUMNS = [
33
+ "symbol",
34
+ "name",
35
+ "price",
36
+ "open",
37
+ "high",
38
+ "low",
39
+ "prev_close",
40
+ "volume",
41
+ "amount",
42
+ "pct_chg",
43
+ ]
44
+
45
+ # 全市场证券列表(securities)
46
+ SECURITIES_COLUMNS = ["symbol", "code", "name", "market", "asset_type"]
47
+
48
+ # 通达信实时五档快照(quote):核心列 + L1 盘口五档 + 内外盘/现手/服务器时间。
49
+ # name 恒为 None(TDX 行情协议不返回名称);inner_vol/outer_vol 为内盘/外盘。
50
+ TDX_QUOTE_COLUMNS = [
51
+ *QUOTE_COLUMNS, # symbol,name,price,open,high,low,prev_close,volume,amount,pct_chg
52
+ "bid1", "bid_vol1", "ask1", "ask_vol1",
53
+ "bid2", "bid_vol2", "ask2", "ask_vol2",
54
+ "bid3", "bid_vol3", "ask3", "ask_vol3",
55
+ "bid4", "bid_vol4", "ask4", "ask_vol4",
56
+ "bid5", "bid_vol5", "ask5", "ask_vol5",
57
+ "cur_vol", "inner_vol", "outer_vol", "servertime",
58
+ ]
59
+
60
+
61
+ def ensure_columns(
62
+ df: pd.DataFrame, required: Iterable[str], *, source: str, interface: str
63
+ ) -> pd.DataFrame:
64
+ """校验 DataFrame 至少包含 ``required`` 列,否则抛 :class:`SchemaChanged`。"""
65
+ missing = [c for c in required if c not in df.columns]
66
+ if missing:
67
+ raise SchemaChanged(
68
+ f"[{source}.{interface}] 响应缺少字段 {missing};上游接口可能已变更。"
69
+ f" 实得列: {list(df.columns)}"
70
+ )
71
+ return df
72
+
73
+
74
+ def stamp(df: pd.DataFrame, *, source: str, **meta: object) -> pd.DataFrame:
75
+ """在 ``df.attrs`` 写入来源等溯源信息(provenance)。"""
76
+ df.attrs["source"] = source
77
+ for k, v in meta.items():
78
+ df.attrs[k] = v
79
+ return df
@@ -0,0 +1,35 @@
1
+ """线程安全的令牌桶限流器,用于对每个数据源做"友好爬取",降低被限频/封 IP 的风险。"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+
8
+
9
+ class TokenBucket:
10
+ """经典令牌桶。``rate`` 为每秒补充的令牌数,``capacity`` 为桶容量(默认等于 rate)。"""
11
+
12
+ def __init__(self, rate: float, capacity: float | None = None) -> None:
13
+ if rate <= 0:
14
+ raise ValueError(f"rate 必须 > 0,得到 {rate!r}")
15
+ self.rate = float(rate)
16
+ # 容量至少为 1,否则低频限流(rate<1)下 acquire(1) 永远取不到令牌而死锁
17
+ self.capacity = float(capacity if capacity is not None else max(rate, 1.0))
18
+ self._tokens = self.capacity
19
+ self._last = time.monotonic()
20
+ self._lock = threading.Lock()
21
+
22
+ def acquire(self, n: float = 1.0) -> None:
23
+ """阻塞直到取到 ``n`` 个令牌。``n`` 超过桶容量会永远取不到,直接报错。"""
24
+ if n > self.capacity:
25
+ raise ValueError(f"单次请求 {n} 个令牌超过桶容量 {self.capacity}")
26
+ while True:
27
+ with self._lock:
28
+ now = time.monotonic()
29
+ self._tokens = min(self.capacity, self._tokens + (now - self._last) * self.rate)
30
+ self._last = now
31
+ if self._tokens >= n:
32
+ self._tokens -= n
33
+ return
34
+ wait = (n - self._tokens) / self.rate
35
+ time.sleep(wait)
probar/core/symbols.py ADDED
@@ -0,0 +1,89 @@
1
+ """证券代码归一化。
2
+
3
+ 统一内部表示为 ``Symbol(code, market)``,market ∈ {SH, SZ, BJ}。
4
+ 对外规范文本形如 ``600519.SH`` / ``000001.SZ``;并提供到各数据源的格式转换:
5
+
6
+ to_eastmoney_secid("600519.SH") -> "1.600519"
7
+ to_tdx("000001.SZ") -> (0, "000001")
8
+
9
+ 接受的输入:``600519`` / ``600519.SH`` / ``SH600519`` / ``sh.600519`` 等。
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+
16
+ SH, SZ, BJ = "SH", "SZ", "BJ"
17
+ _MARKETS = {SH, SZ, BJ}
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class Symbol:
22
+ code: str
23
+ market: str # SH / SZ / BJ
24
+
25
+ @property
26
+ def ts_code(self) -> str:
27
+ """tushare 风格规范代码,如 ``600519.SH``。"""
28
+ return f"{self.code}.{self.market}"
29
+
30
+ def __str__(self) -> str:
31
+ return self.ts_code
32
+
33
+
34
+ def _infer_market(code: str) -> str:
35
+ """按代码前缀推断交易所(覆盖主板/创业板/科创板/北交所/ETF/可转债等常见段)。"""
36
+ if code.startswith(("50", "51", "52", "56", "58", "60", "68", "90", "11", "70")):
37
+ return SH
38
+ if code.startswith(("00", "30", "12", "15", "16", "18", "20", "39", "13")):
39
+ return SZ
40
+ if code.startswith(("43", "82", "83", "87", "88", "92")):
41
+ return BJ
42
+ # 兜底:6 开头归上交所,其余归深交所
43
+ return SH if code[:1] == "6" else SZ
44
+
45
+
46
+ def normalize(symbol: str) -> Symbol:
47
+ """把任意常见写法归一为 :class:`Symbol`。"""
48
+ s = str(symbol).strip().upper().replace(" ", "").replace(".", "")
49
+ # 形如 SH600519 / 600519SH —— 去掉市场前后缀后,剩余部分必须是纯数字代码
50
+ if s[:2] in _MARKETS:
51
+ code, market = s[2:], s[:2]
52
+ elif s[-2:] in _MARKETS:
53
+ code, market = s[:-2], s[-2:]
54
+ elif s.isdigit():
55
+ code = s
56
+ market = _infer_market(s)
57
+ else:
58
+ raise ValueError(f"无法解析证券代码: {symbol!r}")
59
+ if not code.isdigit():
60
+ raise ValueError(f"无法解析证券代码: {symbol!r}")
61
+ return Symbol(code, market)
62
+
63
+
64
+ _EM_MARKET = {SH: "1", SZ: "0", BJ: "0"}
65
+ _TDX_MARKET = {SZ: 0, SH: 1, BJ: 2}
66
+ _TDX_MARKET_REV = {0: SZ, 1: SH, 2: BJ}
67
+
68
+
69
+ def to_eastmoney_secid(symbol: str) -> str:
70
+ """东方财富 secid,如 ``1.600519`` / ``0.000001``。"""
71
+ sym = normalize(symbol)
72
+ return f"{_EM_MARKET[sym.market]}.{sym.code}"
73
+
74
+
75
+ def to_tdx(symbol: str) -> tuple[int, str]:
76
+ """通达信 (market, code),market: 0=深 1=沪 2=北。"""
77
+ sym = normalize(symbol)
78
+ return _TDX_MARKET[sym.market], sym.code
79
+
80
+
81
+ def from_tdx(market: int, code: str) -> Symbol:
82
+ """通达信 (market, code) -> :class:`Symbol`。market: 0=深 1=沪 2=北。
83
+
84
+ 把行情响应里的数字 market 还原为 probar 规范市场,避免 TDX 的数字 market 编码外泄到公共 API。
85
+ """
86
+ try:
87
+ return Symbol(str(code), _TDX_MARKET_REV[int(market)])
88
+ except (KeyError, TypeError, ValueError):
89
+ raise ValueError(f"无法识别的通达信 market={market!r} code={code!r}") from None
@@ -0,0 +1 @@
1
+ """probar 接口可视化测试台(可选,需 `pip install "probar[playground]"`)。"""
@@ -0,0 +1,24 @@
1
+ """`python -m probar.playground` 启动本地测试台。"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+
8
+ def main() -> None:
9
+ parser = argparse.ArgumentParser(description="probar 接口可视化测试台")
10
+ parser.add_argument("--host", default="127.0.0.1")
11
+ parser.add_argument("--port", type=int, default=8787)
12
+ args = parser.parse_args()
13
+
14
+ try:
15
+ import uvicorn
16
+ except ImportError as e: # noqa: B904
17
+ raise SystemExit("缺少依赖,请先安装:pip install \"probar[playground]\"") from e
18
+
19
+ print(f"probar 测试台: http://{args.host}:{args.port}")
20
+ uvicorn.run("probar.playground.app:app", host=args.host, port=args.port, reload=False)
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()