disprove 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disprove-0.1.0/.github/workflows/verify.yml +20 -0
- disprove-0.1.0/.gitignore +8 -0
- disprove-0.1.0/LICENSE +21 -0
- disprove-0.1.0/PKG-INFO +115 -0
- disprove-0.1.0/README.md +95 -0
- disprove-0.1.0/docs/case-study.md +86 -0
- disprove-0.1.0/examples/run_synthetic_audit.py +86 -0
- disprove-0.1.0/pyproject.toml +43 -0
- disprove-0.1.0/src/disprove/__init__.py +45 -0
- disprove-0.1.0/src/disprove/audit.py +121 -0
- disprove-0.1.0/src/disprove/chunks.py +320 -0
- disprove-0.1.0/src/disprove/cli.py +195 -0
- disprove-0.1.0/src/disprove/costs.py +25 -0
- disprove-0.1.0/src/disprove/engine.py +115 -0
- disprove-0.1.0/src/disprove/features.py +16 -0
- disprove-0.1.0/src/disprove/metrics.py +205 -0
- disprove-0.1.0/src/disprove/signals.py +44 -0
- disprove-0.1.0/src/disprove/walkforward.py +24 -0
- disprove-0.1.0/tests/conftest.py +11 -0
- disprove-0.1.0/tests/test_backtest_costs.py +13 -0
- disprove-0.1.0/tests/test_backtest_engine.py +72 -0
- disprove-0.1.0/tests/test_backtest_engine_v2.py +207 -0
- disprove-0.1.0/tests/test_features_funding.py +41 -0
- disprove-0.1.0/tests/test_fixed_horizon_chunks.py +442 -0
- disprove-0.1.0/tests/test_funding_carry_chunk_audit.py +99 -0
- disprove-0.1.0/tests/test_stats_metrics.py +196 -0
- disprove-0.1.0/tests/test_stats_walkforward.py +17 -0
- disprove-0.1.0/tests/test_strategies_funding_carry.py +39 -0
- disprove-0.1.0/uv.lock +509 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
name: verify
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["**"]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
verify:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
timeout-minutes: 10
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: astral-sh/setup-uv@v5
|
|
15
|
+
with:
|
|
16
|
+
enable-cache: true
|
|
17
|
+
- run: uv python install 3.12
|
|
18
|
+
- run: uv sync
|
|
19
|
+
- run: uv run ruff check .
|
|
20
|
+
- run: uv run pytest -q
|
disprove-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vadims
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
disprove-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: disprove
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The strategy evidence engine: honest PASS/WARN/BLOCK verdicts on trading strategies. Its credential: it blocked its own flagship strategy.
|
|
5
|
+
Project-URL: Homepage, https://github.com/866y4tb8hc-coder/disprove
|
|
6
|
+
Author: Vadims
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: audit,backtesting,evidence,quant,trading,validation
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Office/Business :: Financial :: Investment
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Requires-Dist: numpy>=1.26
|
|
17
|
+
Requires-Dist: polars>=1.0
|
|
18
|
+
Requires-Dist: scipy>=1.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# Disprove
|
|
22
|
+
|
|
23
|
+
**The strategy evidence engine.** Disprove decides whether a trading strategy
|
|
24
|
+
has earned the right to risk money — and refuses when it hasn't.
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
pip install disprove
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Its credential: it blocked its own flagship strategy
|
|
31
|
+
|
|
32
|
+
Disprove was extracted from a private trading-research system whose best
|
|
33
|
+
strategy — delta-neutral funding-rate carry, 6.7 years of real Binance data,
|
|
34
|
+
**+11.8%/yr average** — looked fundable by every conventional measure. The
|
|
35
|
+
fixed-horizon audit returned **BLOCK**: only ~1 month in 3 was actually clean;
|
|
36
|
+
the average was carried by two hot regimes. Thresholds were anchored to
|
|
37
|
+
observables and committed in writing *before* the run. The verdict was
|
|
38
|
+
accepted and the strategy was never funded.
|
|
39
|
+
|
|
40
|
+
A validation tool that has never rejected its owner's favorite idea is a
|
|
41
|
+
rubber stamp. Read the full story: [docs/case-study.md](docs/case-study.md).
|
|
42
|
+
|
|
43
|
+
## 60-second demo (no keys, no data, no network)
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
python examples/run_synthetic_audit.py
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
```text
|
|
50
|
+
consistent -> PASS (4P/0W/0B of 4, pass_ratio=1.00, reasons=clean)
|
|
51
|
+
inconsistent -> WARN (1P/3W/0B of 4, pass_ratio=0.25, reasons=['pass_ratio_below_threshold'])
|
|
52
|
+
gapped -> BLOCK (3P/0W/2B of 5, pass_ratio=0.60, reasons=['blocked_chunks_present'])
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The third line is the point: a **profitable-looking history with missing data
|
|
56
|
+
is blocked on integrity** — before performance is even considered.
|
|
57
|
+
|
|
58
|
+
## What it does
|
|
59
|
+
|
|
60
|
+
1. **Fixed-horizon chunks.** Your strategy's history is split into fixed
|
|
61
|
+
calendar windows (default 30 days). Every window between first and last
|
|
62
|
+
observation is emitted — missing history cannot disappear from the count.
|
|
63
|
+
2. **Integrity blocks.** Partial windows, data gaps, and thin chunks BLOCK.
|
|
64
|
+
Untrustworthy evidence is not neutral; it is disqualifying.
|
|
65
|
+
3. **Per-chunk verdicts.** PASS needs positive fee-adjusted PnL and bounded
|
|
66
|
+
drawdown *inside each window* — a pretty average cannot hide bad months.
|
|
67
|
+
4. **An overall verdict you committed to in advance.** Anchor your thresholds
|
|
68
|
+
to observables (your max drawdown, your monthly expectancy), write them
|
|
69
|
+
down, then run. `evaluate_overall_verdict` does the rest.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from disprove import (
|
|
73
|
+
CostModel, CarryAuditPolicy, FixedHorizonChunkPolicy,
|
|
74
|
+
audit_funding_carry_chunks,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
audit = audit_funding_carry_chunks(
|
|
78
|
+
funding_frame, # polars: funding_time (ms), funding_rate
|
|
79
|
+
cost_model=CostModel(spot_fee_bps=10, perp_fee_bps=5, half_spread_bps=1, slippage_bps=2),
|
|
80
|
+
policy=CarryAuditPolicy(window=6, notional_usdt=10_000),
|
|
81
|
+
chunk_policy=FixedHorizonChunkPolicy(
|
|
82
|
+
catastrophic_net_loss_usdt=-150.0, # anchor: your full-history maxDD
|
|
83
|
+
max_pass_drawdown_usdt=40.0, # anchor: one month's expected profit
|
|
84
|
+
),
|
|
85
|
+
horizon_ms=30 * 24 * 3600 * 1000,
|
|
86
|
+
)
|
|
87
|
+
print(audit.overall.verdict) # PASS | WARN | BLOCK — with receipts
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Also included: cost-charged vectorized carry backtests (`run_backtest`,
|
|
91
|
+
basis-aware `run_backtest_v2`), deflated Sharpe and walk-forward splits
|
|
92
|
+
(`disprove.metrics`, `disprove.walkforward`), and a CLI (`disprove --help`)
|
|
93
|
+
that writes auditable Markdown + CSV verdict artifacts.
|
|
94
|
+
|
|
95
|
+
## What this is not
|
|
96
|
+
|
|
97
|
+
- Not a signal service. Not a profit bot. **No profitability claims** — the
|
|
98
|
+
flagship case study documents a rejection; that is the product.
|
|
99
|
+
- Not a full backtesting framework: it is the *judgment layer* you put on top
|
|
100
|
+
of whichever one you use.
|
|
101
|
+
|
|
102
|
+
## Provenance
|
|
103
|
+
|
|
104
|
+
Extracted from a private research system with 1,000+ tests; this public core
|
|
105
|
+
ships with its own test suite and CI. The private system also rejected, with
|
|
106
|
+
receipts: orderbook scalping for non-colocated operators (fees + adverse
|
|
107
|
+
selection) and cross-sectional funding rotation (turnover costs).
|
|
108
|
+
|
|
109
|
+
## Paid: independent audits
|
|
110
|
+
|
|
111
|
+
The author runs this gate against client strategies under pre-committed
|
|
112
|
+
thresholds — your rules, your data window, an honest verdict with receipts.
|
|
113
|
+
Contact via GitHub issues or the email in the commit log.
|
|
114
|
+
|
|
115
|
+
MIT licensed.
|
disprove-0.1.0/README.md
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Disprove
|
|
2
|
+
|
|
3
|
+
**The strategy evidence engine.** Disprove decides whether a trading strategy
|
|
4
|
+
has earned the right to risk money — and refuses when it hasn't.
|
|
5
|
+
|
|
6
|
+
```
|
|
7
|
+
pip install disprove
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
## Its credential: it blocked its own flagship strategy
|
|
11
|
+
|
|
12
|
+
Disprove was extracted from a private trading-research system whose best
|
|
13
|
+
strategy — delta-neutral funding-rate carry, 6.7 years of real Binance data,
|
|
14
|
+
**+11.8%/yr average** — looked fundable by every conventional measure. The
|
|
15
|
+
fixed-horizon audit returned **BLOCK**: only ~1 month in 3 was actually clean;
|
|
16
|
+
the average was carried by two hot regimes. Thresholds were anchored to
|
|
17
|
+
observables and committed in writing *before* the run. The verdict was
|
|
18
|
+
accepted and the strategy was never funded.
|
|
19
|
+
|
|
20
|
+
A validation tool that has never rejected its owner's favorite idea is a
|
|
21
|
+
rubber stamp. Read the full story: [docs/case-study.md](docs/case-study.md).
|
|
22
|
+
|
|
23
|
+
## 60-second demo (no keys, no data, no network)
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
python examples/run_synthetic_audit.py
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
```text
|
|
30
|
+
consistent -> PASS (4P/0W/0B of 4, pass_ratio=1.00, reasons=clean)
|
|
31
|
+
inconsistent -> WARN (1P/3W/0B of 4, pass_ratio=0.25, reasons=['pass_ratio_below_threshold'])
|
|
32
|
+
gapped -> BLOCK (3P/0W/2B of 5, pass_ratio=0.60, reasons=['blocked_chunks_present'])
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
The third line is the point: a **profitable-looking history with missing data
|
|
36
|
+
is blocked on integrity** — before performance is even considered.
|
|
37
|
+
|
|
38
|
+
## What it does
|
|
39
|
+
|
|
40
|
+
1. **Fixed-horizon chunks.** Your strategy's history is split into fixed
|
|
41
|
+
calendar windows (default 30 days). Every window between first and last
|
|
42
|
+
observation is emitted — missing history cannot disappear from the count.
|
|
43
|
+
2. **Integrity blocks.** Partial windows, data gaps, and thin chunks BLOCK.
|
|
44
|
+
Untrustworthy evidence is not neutral; it is disqualifying.
|
|
45
|
+
3. **Per-chunk verdicts.** PASS needs positive fee-adjusted PnL and bounded
|
|
46
|
+
drawdown *inside each window* — a pretty average cannot hide bad months.
|
|
47
|
+
4. **An overall verdict you committed to in advance.** Anchor your thresholds
|
|
48
|
+
to observables (your max drawdown, your monthly expectancy), write them
|
|
49
|
+
down, then run. `evaluate_overall_verdict` does the rest.
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from disprove import (
|
|
53
|
+
CostModel, CarryAuditPolicy, FixedHorizonChunkPolicy,
|
|
54
|
+
audit_funding_carry_chunks,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
audit = audit_funding_carry_chunks(
|
|
58
|
+
funding_frame, # polars: funding_time (ms), funding_rate
|
|
59
|
+
cost_model=CostModel(spot_fee_bps=10, perp_fee_bps=5, half_spread_bps=1, slippage_bps=2),
|
|
60
|
+
policy=CarryAuditPolicy(window=6, notional_usdt=10_000),
|
|
61
|
+
chunk_policy=FixedHorizonChunkPolicy(
|
|
62
|
+
catastrophic_net_loss_usdt=-150.0, # anchor: your full-history maxDD
|
|
63
|
+
max_pass_drawdown_usdt=40.0, # anchor: one month's expected profit
|
|
64
|
+
),
|
|
65
|
+
horizon_ms=30 * 24 * 3600 * 1000,
|
|
66
|
+
)
|
|
67
|
+
print(audit.overall.verdict) # PASS | WARN | BLOCK — with receipts
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Also included: cost-charged vectorized carry backtests (`run_backtest`,
|
|
71
|
+
basis-aware `run_backtest_v2`), deflated Sharpe and walk-forward splits
|
|
72
|
+
(`disprove.metrics`, `disprove.walkforward`), and a CLI (`disprove --help`)
|
|
73
|
+
that writes auditable Markdown + CSV verdict artifacts.
|
|
74
|
+
|
|
75
|
+
## What this is not
|
|
76
|
+
|
|
77
|
+
- Not a signal service. Not a profit bot. **No profitability claims** — the
|
|
78
|
+
flagship case study documents a rejection; that is the product.
|
|
79
|
+
- Not a full backtesting framework: it is the *judgment layer* you put on top
|
|
80
|
+
of whichever one you use.
|
|
81
|
+
|
|
82
|
+
## Provenance
|
|
83
|
+
|
|
84
|
+
Extracted from a private research system with 1,000+ tests; this public core
|
|
85
|
+
ships with its own test suite and CI. The private system also rejected, with
|
|
86
|
+
receipts: orderbook scalping for non-colocated operators (fees + adverse
|
|
87
|
+
selection) and cross-sectional funding rotation (turnover costs).
|
|
88
|
+
|
|
89
|
+
## Paid: independent audits
|
|
90
|
+
|
|
91
|
+
The author runs this gate against client strategies under pre-committed
|
|
92
|
+
thresholds — your rules, your data window, an honest verdict with receipts.
|
|
93
|
+
Contact via GitHub issues or the email in the commit log.
|
|
94
|
+
|
|
95
|
+
MIT licensed.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Case Study: The Gate That Blocked Its Own Strategy
|
|
2
|
+
|
|
3
|
+
*Buyer-facing artifact. Every number below comes from committed, reproducible
|
|
4
|
+
runs; nothing is adjusted for presentation. Reproduction commands included.*
|
|
5
|
+
|
|
6
|
+
## The setup
|
|
7
|
+
|
|
8
|
+
the engine's flagship strategy candidate was delta-neutral funding-rate carry on
|
|
9
|
+
BTCUSDT — a documented structural premium, tested against 6.72 years of real
|
|
10
|
+
Binance funding history (7,363 settlement intervals, 2019-2026).
|
|
11
|
+
|
|
12
|
+
The aggregate evidence looked good:
|
|
13
|
+
|
|
14
|
+
- Always-in harvest, full history: **+11.8%/yr**, max drawdown **-1.5%**
|
|
15
|
+
- Walk-forward out-of-sample (config chosen on train folds only): **+5.9%/yr**
|
|
16
|
+
- Survived a basis-drift audit (v2 engine: basis mark-to-market ≈ neutral)
|
|
17
|
+
- Survived a look-ahead audit (strict one-period signal lagging moved OOS
|
|
18
|
+
returns by only -0.16pp/yr — measured, not argued)
|
|
19
|
+
|
|
20
|
+
Most validation systems would fund this. Disprove blocked it.
|
|
21
|
+
|
|
22
|
+
## The gate
|
|
23
|
+
|
|
24
|
+
The fixed-horizon chunk audit splits the entire history into 30-day windows
|
|
25
|
+
and demands that the strategy's core claim — "the edge is in continuously
|
|
26
|
+
being in the position" — hold *month by month*, not just on average:
|
|
27
|
+
|
|
28
|
+
- any chunk with missing data, a partial window, or too few observations
|
|
29
|
+
**blocks** (untrustworthy evidence is not neutral; it is disqualifying);
|
|
30
|
+
- a chunk losing more than the strategy's entire historical max drawdown
|
|
31
|
+
**blocks** (catastrophic);
|
|
32
|
+
- a clean PASS requires positive net PnL with drawdown bounded by one month's
|
|
33
|
+
expected profit;
|
|
34
|
+
- at least 70% of chunks must PASS.
|
|
35
|
+
|
|
36
|
+
Thresholds were anchored to observables outside the gate (full-history maxDD,
|
|
37
|
+
lean-regime monthly expectancy) and **committed in writing before the run**
|
|
38
|
+
(the calibration record (committed before the run)) — they cannot be tuned after seeing
|
|
39
|
+
the verdict.
|
|
40
|
+
|
|
41
|
+
## The verdict
|
|
42
|
+
|
|
43
|
+
| Setup | Verdict | PASS/WARN/BLOCK | Pass ratio (need ≥ 0.70) |
|
|
44
|
+
|---|---|---|---|
|
|
45
|
+
| BTCUSDT, 6-period signal | **BLOCK** | 22 / 50 / 10 of 82 | 0.268 |
|
|
46
|
+
| BTCUSDT, 12-period signal | **BLOCK** | 29 / 52 / 1 of 82 | 0.354 |
|
|
47
|
+
| ETHUSDT, 6-period signal | **BLOCK** | 23 / 45 / 12 of 80 | 0.287 |
|
|
48
|
+
|
|
49
|
+
The diagnosis is precise: catastrophic months are rare (1 of 82 in the best
|
|
50
|
+
configuration), but **roughly two out of three months earn nothing**. The
|
|
51
|
+
attractive average is carried by a few hot regimes (2020-2021). A strategy
|
|
52
|
+
whose monthly consistency fails does not get promoted — it stays in research,
|
|
53
|
+
unfunded.
|
|
54
|
+
|
|
55
|
+
## Why this matters to you
|
|
56
|
+
|
|
57
|
+
1. **The gate is adversarial to its owner.** It was pointed at the project's
|
|
58
|
+
own best work, with pre-committed thresholds, and the BLOCK was accepted.
|
|
59
|
+
That is the behavior you need from a validation layer — and the behavior
|
|
60
|
+
retail backtest tooling structurally cannot offer you.
|
|
61
|
+
2. **Integrity outranks PnL.** A profitable-looking history with data gaps
|
|
62
|
+
blocks on integrity before performance is even considered. (60-second
|
|
63
|
+
demo: `uv run python examples/run_synthetic_audit.py`.)
|
|
64
|
+
3. **Everything is reproducible.** Each verdict ships as a Markdown + CSV
|
|
65
|
+
artifact recording the data range, method, policy, and per-chunk numbers
|
|
66
|
+
(the private evidence archive (verdict tables reproduced below)); the audit reruns deterministically from one
|
|
67
|
+
command.
|
|
68
|
+
|
|
69
|
+
## The same gate, on your strategy
|
|
70
|
+
|
|
71
|
+
This pipeline is what the **Strategy Evidence Audit** runs against client
|
|
72
|
+
strategies (see the paid audit offer): your rules, your data window, an
|
|
73
|
+
anchored policy agreed *before* the run, and an honest PASS/WARN/BLOCK with
|
|
74
|
+
the receipts.
|
|
75
|
+
|
|
76
|
+
## Reproduce it
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# with your own funding-rate parquet (funding_time ms, funding_rate):
|
|
80
|
+
disprove --funding funding_BTCUSDT.parquet --symbol BTCUSDT \
|
|
81
|
+
--setup-id carry:BTCUSDT:w12 --window 12 \
|
|
82
|
+
--catastrophic-net-loss-usdt -150 --max-pass-drawdown-usdt 40
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
*Disclosure: Disprove makes no profitability claims. This document describes a
|
|
86
|
+
rejection. That is the product.*
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Quickstart: run the fixed-horizon evidence gate on synthetic data.
|
|
2
|
+
|
|
3
|
+
No exchange account, no API keys, no real market data, no network — this is
|
|
4
|
+
the fastest way to see what Disprove delivers: an auditable PASS/WARN/BLOCK verdict
|
|
5
|
+
over strategy evidence.
|
|
6
|
+
|
|
7
|
+
Run:
|
|
8
|
+
python examples/run_synthetic_audit.py
|
|
9
|
+
|
|
10
|
+
It builds three synthetic funding-rate histories (a consistent earner, an
|
|
11
|
+
inconsistent one, and one with a data gap), runs the same audit pipeline used
|
|
12
|
+
on real evidence, and prints the three verdicts. Expected output ends with:
|
|
13
|
+
consistent -> PASS
|
|
14
|
+
inconsistent-> WARN or BLOCK
|
|
15
|
+
gapped -> BLOCK (data integrity, not performance)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
import polars as pl
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from disprove.costs import CostModel
|
|
26
|
+
from disprove.chunks import FixedHorizonChunkPolicy
|
|
27
|
+
from disprove.audit import audit_funding_carry_chunks
|
|
28
|
+
from disprove.audit import CarryAuditPolicy
|
|
29
|
+
|
|
30
|
+
EIGHT_H_MS = 8 * 3600 * 1000
|
|
31
|
+
ZERO_COST = CostModel(spot_fee_bps=0, perp_fee_bps=0, half_spread_bps=0, slippage_bps=0)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _frame(rates: list[float], times: list[int] | None = None) -> pl.DataFrame:
|
|
35
|
+
times = times if times is not None else [i * EIGHT_H_MS for i in range(len(rates))]
|
|
36
|
+
return pl.DataFrame({"funding_time": times, "funding_rate": rates})
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _audit(frame: pl.DataFrame):
|
|
40
|
+
return audit_funding_carry_chunks(
|
|
41
|
+
frame,
|
|
42
|
+
cost_model=ZERO_COST,
|
|
43
|
+
policy=CarryAuditPolicy(
|
|
44
|
+
window=2, expected_holding_periods=30, notional_usdt=10_000
|
|
45
|
+
),
|
|
46
|
+
chunk_policy=FixedHorizonChunkPolicy(
|
|
47
|
+
min_observations=30,
|
|
48
|
+
catastrophic_net_loss_usdt=-150.0,
|
|
49
|
+
max_pass_drawdown_usdt=40.0,
|
|
50
|
+
min_pass_ratio=0.70,
|
|
51
|
+
),
|
|
52
|
+
horizon_ms=30 * EIGHT_H_MS, # 10-day chunks for a small demo
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def main() -> None:
|
|
57
|
+
# 1. A consistent earner: small positive funding every period.
|
|
58
|
+
consistent = _frame([0.0005] * 120)
|
|
59
|
+
|
|
60
|
+
# 2. An inconsistent earner: one hot stretch, then nothing — the average
|
|
61
|
+
# looks fine, the chunk gate sees the truth.
|
|
62
|
+
inconsistent = _frame([0.002] * 30 + [-0.0001] * 90)
|
|
63
|
+
|
|
64
|
+
# 3. A gapped history: looks profitable, but evidence is missing — the
|
|
65
|
+
# gate refuses to bless it regardless of PnL.
|
|
66
|
+
times = [i * EIGHT_H_MS for i in range(50)] + [
|
|
67
|
+
i * EIGHT_H_MS for i in range(55, 125)
|
|
68
|
+
]
|
|
69
|
+
gapped = _frame([0.0005] * 120, times)
|
|
70
|
+
|
|
71
|
+
for label, frame in (
|
|
72
|
+
("consistent", consistent),
|
|
73
|
+
("inconsistent", inconsistent),
|
|
74
|
+
("gapped", gapped),
|
|
75
|
+
):
|
|
76
|
+
audit = _audit(frame)
|
|
77
|
+
o = audit.overall
|
|
78
|
+
print(
|
|
79
|
+
f"{label:<12} -> {o.verdict:<5} "
|
|
80
|
+
f"({o.chunks_pass}P/{o.chunks_warn}W/{o.chunks_block}B of {o.chunks_total}, "
|
|
81
|
+
f"pass_ratio={o.pass_ratio:.2f}, reasons={list(o.reasons) or 'clean'})"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == "__main__":
|
|
86
|
+
main()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "disprove"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "The strategy evidence engine: honest PASS/WARN/BLOCK verdicts on trading strategies. Its credential: it blocked its own flagship strategy."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "Vadims" }]
|
|
9
|
+
keywords = ["trading", "backtesting", "validation", "evidence", "quant", "audit"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Topic :: Office/Business :: Financial :: Investment",
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"polars>=1.0",
|
|
19
|
+
"numpy>=1.26",
|
|
20
|
+
"scipy>=1.11",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
disprove = "disprove.cli:main"
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/866y4tb8hc-coder/disprove"
|
|
28
|
+
|
|
29
|
+
[build-system]
|
|
30
|
+
requires = ["hatchling"]
|
|
31
|
+
build-backend = "hatchling.build"
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = ["src/disprove"]
|
|
35
|
+
|
|
36
|
+
[tool.pytest.ini_options]
|
|
37
|
+
testpaths = ["tests"]
|
|
38
|
+
|
|
39
|
+
[tool.ruff]
|
|
40
|
+
line-length = 100
|
|
41
|
+
|
|
42
|
+
[dependency-groups]
|
|
43
|
+
dev = ["pytest>=8.0", "ruff>=0.4"]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Disprove — the strategy evidence engine.
|
|
2
|
+
|
|
3
|
+
Honest PASS/WARN/BLOCK verdicts on trading strategies: cost-charged
|
|
4
|
+
accounting, fixed-horizon consistency chunks, data-integrity blocks, and
|
|
5
|
+
thresholds you commit to before seeing the result.
|
|
6
|
+
|
|
7
|
+
Its credential: it blocked its own flagship strategy.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from disprove.audit import CarryAuditPolicy, FundingCarryChunkAudit, audit_funding_carry_chunks
|
|
11
|
+
from disprove.chunks import (
|
|
12
|
+
FixedHorizonChunk,
|
|
13
|
+
FixedHorizonChunkMetrics,
|
|
14
|
+
FixedHorizonChunkPolicy,
|
|
15
|
+
FixedHorizonChunkVerdict,
|
|
16
|
+
FixedHorizonOverallVerdict,
|
|
17
|
+
compute_chunk_metrics,
|
|
18
|
+
evaluate_chunk_verdict,
|
|
19
|
+
evaluate_overall_verdict,
|
|
20
|
+
split_fixed_horizon_chunks,
|
|
21
|
+
)
|
|
22
|
+
from disprove.costs import CostModel
|
|
23
|
+
from disprove.engine import BacktestResult, run_backtest, run_backtest_v2
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"CarryAuditPolicy",
|
|
29
|
+
"FundingCarryChunkAudit",
|
|
30
|
+
"audit_funding_carry_chunks",
|
|
31
|
+
"FixedHorizonChunk",
|
|
32
|
+
"FixedHorizonChunkMetrics",
|
|
33
|
+
"FixedHorizonChunkPolicy",
|
|
34
|
+
"FixedHorizonChunkVerdict",
|
|
35
|
+
"FixedHorizonOverallVerdict",
|
|
36
|
+
"compute_chunk_metrics",
|
|
37
|
+
"evaluate_chunk_verdict",
|
|
38
|
+
"evaluate_overall_verdict",
|
|
39
|
+
"split_fixed_horizon_chunks",
|
|
40
|
+
"CostModel",
|
|
41
|
+
"BacktestResult",
|
|
42
|
+
"run_backtest",
|
|
43
|
+
"run_backtest_v2",
|
|
44
|
+
"__version__",
|
|
45
|
+
]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Fixed-horizon chunk audit for the funding-carry strategy.
|
|
2
|
+
|
|
3
|
+
Wires the (previously orphaned) fixed-horizon chunk verdict engine to real
|
|
4
|
+
funding-carry accounting: per-funding-period net PnL computed with the SAME
|
|
5
|
+
signal derivation as the paper-forward journal builder and the SAME engine
|
|
6
|
+
arithmetic as the backtester. The output is the promotion-grade
|
|
7
|
+
PASS/WARN/BLOCK gate artifact source.
|
|
8
|
+
|
|
9
|
+
Method (deterministic, no I/O):
|
|
10
|
+
1. positions = regime_funding_carry_signal(features, cost-derived hurdle)
|
|
11
|
+
2. pnl_series = run_backtest(funding_rate, positions, cost_model, notional)
|
|
12
|
+
3. observations = (funding_time, per-period net PnL) on the funding grid —
|
|
13
|
+
flat periods contribute 0.0 (breakeven, never a win), so calendar gaps in
|
|
14
|
+
the DATA (not in activity) are what the chunk integrity checks flag.
|
|
15
|
+
4. split -> per-chunk metrics -> per-chunk verdicts -> overall verdict.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
|
|
21
|
+
import polars as pl
|
|
22
|
+
|
|
23
|
+
from disprove.costs import CostModel
|
|
24
|
+
from disprove.engine import run_backtest
|
|
25
|
+
from disprove.features import compute_funding_features
|
|
26
|
+
from disprove.chunks import (
|
|
27
|
+
FixedHorizonChunk,
|
|
28
|
+
FixedHorizonChunkMetrics,
|
|
29
|
+
FixedHorizonChunkPolicy,
|
|
30
|
+
FixedHorizonChunkVerdict,
|
|
31
|
+
FixedHorizonOverallVerdict,
|
|
32
|
+
compute_chunk_metrics,
|
|
33
|
+
evaluate_chunk_verdict,
|
|
34
|
+
evaluate_overall_verdict,
|
|
35
|
+
split_fixed_horizon_chunks,
|
|
36
|
+
)
|
|
37
|
+
from disprove.signals import regime_funding_carry_signal
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class CarryAuditPolicy:
|
|
41
|
+
"""Signal/accounting parameters for the carry audit.
|
|
42
|
+
|
|
43
|
+
window: trailing funding-rate mean window (periods).
|
|
44
|
+
expected_holding_periods: amortization horizon for the cost-derived hurdle.
|
|
45
|
+
notional_usdt: audited notional.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
window: int = 6
|
|
49
|
+
expected_holding_periods: float = 30.0
|
|
50
|
+
notional_usdt: float = 10_000.0
|
|
51
|
+
|
|
52
|
+
def __post_init__(self) -> None:
|
|
53
|
+
if self.window <= 0:
|
|
54
|
+
raise ValueError("window must be positive")
|
|
55
|
+
if self.expected_holding_periods <= 0:
|
|
56
|
+
raise ValueError("expected_holding_periods must be positive")
|
|
57
|
+
if self.notional_usdt <= 0:
|
|
58
|
+
raise ValueError("notional_usdt must be positive")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
DEFAULT_FUNDING_INTERVAL_MS = 8 * 3600 * 1000
|
|
62
|
+
|
|
63
|
+
# Binance funding timestamps jitter by tens of milliseconds around the exact
|
|
64
|
+
# 8h grid (observed: ±47ms over 7,362 BTCUSDT intervals). The gap detector
|
|
65
|
+
# compares strictly against expected_interval_ms, so without tolerance ~30% of
|
|
66
|
+
# perfectly normal intervals read as data gaps. One minute is far above any
|
|
67
|
+
# observed jitter and far below a genuinely missed funding interval.
|
|
68
|
+
DEFAULT_JITTER_TOLERANCE_MS = 60 * 1000
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(frozen=True)
|
|
72
|
+
class FundingCarryChunkAudit:
|
|
73
|
+
"""Full audit result: every layer retained for the report writers."""
|
|
74
|
+
|
|
75
|
+
chunks: tuple[FixedHorizonChunk, ...]
|
|
76
|
+
metrics: tuple[FixedHorizonChunkMetrics, ...]
|
|
77
|
+
verdicts: tuple[FixedHorizonChunkVerdict, ...]
|
|
78
|
+
overall: FixedHorizonOverallVerdict
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def audit_funding_carry_chunks(
|
|
82
|
+
funding: pl.DataFrame,
|
|
83
|
+
*,
|
|
84
|
+
cost_model: CostModel,
|
|
85
|
+
policy: CarryAuditPolicy,
|
|
86
|
+
chunk_policy: FixedHorizonChunkPolicy | None = None,
|
|
87
|
+
horizon_ms: int,
|
|
88
|
+
expected_interval_ms: int = DEFAULT_FUNDING_INTERVAL_MS,
|
|
89
|
+
) -> FundingCarryChunkAudit:
|
|
90
|
+
"""Run the fixed-horizon chunk audit over a funding-rate history.
|
|
91
|
+
|
|
92
|
+
``funding`` needs ``funding_time`` (ms) and ``funding_rate`` columns.
|
|
93
|
+
Signal and accounting mirror the internal journal builder:
|
|
94
|
+
cost-derived hurdle, regime signal, engine PnL net of position-change
|
|
95
|
+
costs. Unsorted/duplicate timestamps raise (data-integrity failure).
|
|
96
|
+
"""
|
|
97
|
+
times = funding["funding_time"].to_list()
|
|
98
|
+
features = compute_funding_features(funding, window=policy.window)
|
|
99
|
+
hurdle = cost_model.position_change_bps() / policy.expected_holding_periods
|
|
100
|
+
positions = regime_funding_carry_signal(
|
|
101
|
+
features, window=policy.window, hurdle_bps_per_period=hurdle
|
|
102
|
+
)
|
|
103
|
+
pnl_series = run_backtest(
|
|
104
|
+
funding["funding_rate"], positions, cost_model, policy.notional_usdt
|
|
105
|
+
).pnl_series
|
|
106
|
+
|
|
107
|
+
observations = list(zip(times, (float(v) for v in pnl_series)))
|
|
108
|
+
chunks = split_fixed_horizon_chunks(
|
|
109
|
+
observations,
|
|
110
|
+
horizon_ms=horizon_ms,
|
|
111
|
+
expected_interval_ms=expected_interval_ms + DEFAULT_JITTER_TOLERANCE_MS,
|
|
112
|
+
)
|
|
113
|
+
metrics = tuple(compute_chunk_metrics(chunk) for chunk in chunks)
|
|
114
|
+
verdicts = tuple(
|
|
115
|
+
evaluate_chunk_verdict(chunk, metric, policy=chunk_policy)
|
|
116
|
+
for chunk, metric in zip(chunks, metrics)
|
|
117
|
+
)
|
|
118
|
+
overall = evaluate_overall_verdict(verdicts, policy=chunk_policy)
|
|
119
|
+
return FundingCarryChunkAudit(
|
|
120
|
+
chunks=chunks, metrics=metrics, verdicts=verdicts, overall=overall
|
|
121
|
+
)
|