panelbeater 0.0.17__tar.gz → 0.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelbeater-0.0.17/panelbeater.egg-info → panelbeater-0.2.12}/PKG-INFO +16 -3
- {panelbeater-0.0.17 → panelbeater-0.2.12}/README.md +7 -1
- panelbeater-0.2.12/panelbeater/__init__.py +18 -0
- panelbeater-0.2.12/panelbeater/__main__.py +100 -0
- panelbeater-0.2.12/panelbeater/copula.py +65 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater/download.py +39 -5
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater/features.py +3 -5
- panelbeater-0.2.12/panelbeater/fit.py +35 -0
- panelbeater-0.2.12/panelbeater/normalizer.py +92 -0
- panelbeater-0.2.12/panelbeater/options.py +352 -0
- panelbeater-0.2.12/panelbeater/simulate.py +87 -0
- panelbeater-0.2.12/panelbeater/trades.py +40 -0
- panelbeater-0.2.12/panelbeater/wt.py +18 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12/panelbeater.egg-info}/PKG-INFO +16 -3
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/SOURCES.txt +6 -0
- panelbeater-0.2.12/panelbeater.egg-info/requires.txt +16 -0
- panelbeater-0.0.17/panelbeater.egg-info/requires.txt → panelbeater-0.2.12/requirements.txt +8 -1
- {panelbeater-0.0.17 → panelbeater-0.2.12}/setup.py +1 -1
- panelbeater-0.0.17/panelbeater/__init__.py +0 -3
- panelbeater-0.0.17/panelbeater/__main__.py +0 -95
- panelbeater-0.0.17/panelbeater/normalizer.py +0 -64
- panelbeater-0.0.17/requirements.txt +0 -9
- {panelbeater-0.0.17 → panelbeater-0.2.12}/LICENSE +0 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/MANIFEST.in +0 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/dependency_links.txt +0 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/entry_points.txt +0 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/not-zip-safe +0 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/top_level.txt +0 -0
- {panelbeater-0.0.17 → panelbeater-0.2.12}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: panelbeater
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.12
|
|
4
4
|
Summary: A CLI for finding mispriced options.
|
|
5
5
|
Home-page: https://github.com/8W9aG/panelbeater
|
|
6
6
|
Author: Will Sackfield
|
|
@@ -18,8 +18,15 @@ Requires-Dist: numpy>=2.2.6
|
|
|
18
18
|
Requires-Dist: feature-engine>=1.9.3
|
|
19
19
|
Requires-Dist: requests-cache>=1.2.1
|
|
20
20
|
Requires-Dist: scikit-learn>=1.6.1
|
|
21
|
-
Requires-Dist: wavetrainer>=0.
|
|
21
|
+
Requires-Dist: wavetrainer>=0.3.4
|
|
22
22
|
Requires-Dist: tqdm>=4.67.1
|
|
23
|
+
Requires-Dist: pyvinecopulib>=0.7.5
|
|
24
|
+
Requires-Dist: fredapi>=0.5.2
|
|
25
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
26
|
+
Requires-Dist: kaleido>=1.2.0
|
|
27
|
+
Requires-Dist: plotly>=6.3.1
|
|
28
|
+
Requires-Dist: scipy>=1.16.3
|
|
29
|
+
Requires-Dist: joblib>=1.5.2
|
|
23
30
|
|
|
24
31
|
# panelbeater
|
|
25
32
|
|
|
@@ -42,6 +49,12 @@ Python 3.11.6:
|
|
|
42
49
|
- [scikit-learn](https://scikit-learn.org/stable/)
|
|
43
50
|
- [wavetrainer](https://github.com/8W9aG/wavetrainer/)
|
|
44
51
|
- [tqdm](https://tqdm.github.io/)
|
|
52
|
+
- [pyvinecopulib](https://github.com/vinecopulib/pyvinecopulib)
|
|
53
|
+
- [python-dotenv](https://saurabh-kumar.com/python-dotenv/)
|
|
54
|
+
- [kaleido](https://github.com/plotly/kaleido)
|
|
55
|
+
- [plotly](https://plotly.com/)
|
|
56
|
+
- [scipy](https://scipy.org/)
|
|
57
|
+
- [joblib](https://joblib.readthedocs.io/en/stable/)
|
|
45
58
|
|
|
46
59
|
## Raison D'être :thought_balloon:
|
|
47
60
|
|
|
@@ -52,7 +65,7 @@ Python 3.11.6:
|
|
|
52
65
|
`panelbeater` goes through the following steps:
|
|
53
66
|
1. Downloads the historical data.
|
|
54
67
|
2. Performs feature engineering on the data.
|
|
55
|
-
3. Trains the required models to operate on the data panel.
|
|
68
|
+
3. Trains the required models and copulas to operate on the data panel.
|
|
56
69
|
4. Downloads the current data.
|
|
57
70
|
5. Runs inference on t+X for the latest options to find the probability distribution on the asset prices to their expiry dates.
|
|
58
71
|
6. Finds any mispriced options and size the position accordingly.
|
|
@@ -19,6 +19,12 @@ Python 3.11.6:
|
|
|
19
19
|
- [scikit-learn](https://scikit-learn.org/stable/)
|
|
20
20
|
- [wavetrainer](https://github.com/8W9aG/wavetrainer/)
|
|
21
21
|
- [tqdm](https://tqdm.github.io/)
|
|
22
|
+
- [pyvinecopulib](https://github.com/vinecopulib/pyvinecopulib)
|
|
23
|
+
- [python-dotenv](https://saurabh-kumar.com/python-dotenv/)
|
|
24
|
+
- [kaleido](https://github.com/plotly/kaleido)
|
|
25
|
+
- [plotly](https://plotly.com/)
|
|
26
|
+
- [scipy](https://scipy.org/)
|
|
27
|
+
- [joblib](https://joblib.readthedocs.io/en/stable/)
|
|
22
28
|
|
|
23
29
|
## Raison D'être :thought_balloon:
|
|
24
30
|
|
|
@@ -29,7 +35,7 @@ Python 3.11.6:
|
|
|
29
35
|
`panelbeater` goes through the following steps:
|
|
30
36
|
1. Downloads the historical data.
|
|
31
37
|
2. Performs feature engineering on the data.
|
|
32
|
-
3. Trains the required models to operate on the data panel.
|
|
38
|
+
3. Trains the required models and copulas to operate on the data panel.
|
|
33
39
|
4. Downloads the current data.
|
|
34
40
|
5. Runs inference on t+X for the latest options to find the probability distribution on the asset prices to their expiry dates.
|
|
35
41
|
6. Finds any mispriced options and size the position accordingly.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""panelbeater initialisation."""
|
|
2
|
+
|
|
3
|
+
from .download import download
|
|
4
|
+
from .fit import fit
|
|
5
|
+
from .simulate import SIMULATION_FILENAME, run_single_simulation, simulate
|
|
6
|
+
from .trades import trades
|
|
7
|
+
from .wt import create_wt
|
|
8
|
+
|
|
9
|
+
__VERSION__ = "0.2.12"
|
|
10
|
+
__all__ = [
|
|
11
|
+
"download",
|
|
12
|
+
"fit",
|
|
13
|
+
"create_wt",
|
|
14
|
+
"simulate",
|
|
15
|
+
"run_single_simulation",
|
|
16
|
+
"trades",
|
|
17
|
+
"SIMULATION_FILENAME",
|
|
18
|
+
]
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""The CLI for finding mispriced options."""
|
|
2
|
+
|
|
3
|
+
# pylint: disable=too-many-locals,use-dict-literal,invalid-name
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
import requests_cache
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
from .download import download
|
|
10
|
+
from .fit import fit
|
|
11
|
+
from .simulate import simulate
|
|
12
|
+
from .trades import trades
|
|
13
|
+
|
|
14
|
+
_TICKERS = [
|
|
15
|
+
# Equities
|
|
16
|
+
"SPY",
|
|
17
|
+
"QQQ",
|
|
18
|
+
"EEM",
|
|
19
|
+
# Commodities
|
|
20
|
+
"GC=F",
|
|
21
|
+
"CL=F",
|
|
22
|
+
"SI=F",
|
|
23
|
+
# FX
|
|
24
|
+
# "EURUSD=X",
|
|
25
|
+
# "USDJPY=X",
|
|
26
|
+
# Crypto
|
|
27
|
+
# "BTC-USD",
|
|
28
|
+
# "ETH-USD",
|
|
29
|
+
]
|
|
30
|
+
_MACROS = [
|
|
31
|
+
"GDP",
|
|
32
|
+
"UNRATE",
|
|
33
|
+
"CPIAUCSL",
|
|
34
|
+
"FEDFUNDS",
|
|
35
|
+
"DGS10",
|
|
36
|
+
"T10Y2Y",
|
|
37
|
+
# "M2SL",
|
|
38
|
+
# "VIXCLS",
|
|
39
|
+
# "DTWEXBGS",
|
|
40
|
+
# "INDPRO",
|
|
41
|
+
]
|
|
42
|
+
_WINDOWS = [
|
|
43
|
+
5,
|
|
44
|
+
10,
|
|
45
|
+
20,
|
|
46
|
+
60,
|
|
47
|
+
120,
|
|
48
|
+
200,
|
|
49
|
+
]
|
|
50
|
+
_LAGS = [1, 3, 5, 10, 20, 30]
|
|
51
|
+
_DAYS_OUT = 30
|
|
52
|
+
_SIMS = 1000
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main() -> None:
|
|
56
|
+
"""The main CLI function."""
|
|
57
|
+
load_dotenv()
|
|
58
|
+
parser = argparse.ArgumentParser()
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--inference",
|
|
61
|
+
help="Whether to do inference.",
|
|
62
|
+
required=False,
|
|
63
|
+
default=True,
|
|
64
|
+
action=argparse.BooleanOptionalAction,
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--train",
|
|
68
|
+
help="Whether to do training.",
|
|
69
|
+
required=False,
|
|
70
|
+
default=True,
|
|
71
|
+
action=argparse.BooleanOptionalAction,
|
|
72
|
+
)
|
|
73
|
+
parser.add_argument(
|
|
74
|
+
"--trades",
|
|
75
|
+
help="Whether to generate trades.",
|
|
76
|
+
required=False,
|
|
77
|
+
default=True,
|
|
78
|
+
action=argparse.BooleanOptionalAction,
|
|
79
|
+
)
|
|
80
|
+
args = parser.parse_args()
|
|
81
|
+
|
|
82
|
+
# Setup main objects
|
|
83
|
+
session = requests_cache.CachedSession("panelbeater-cache")
|
|
84
|
+
|
|
85
|
+
# Fit the models
|
|
86
|
+
df_y = download(tickers=_TICKERS, macros=_MACROS, session=session)
|
|
87
|
+
if args.train:
|
|
88
|
+
fit(df_y=df_y, windows=_WINDOWS, lags=_LAGS)
|
|
89
|
+
|
|
90
|
+
if args.inference:
|
|
91
|
+
simulate(
|
|
92
|
+
sims=_SIMS, df_y=df_y, days_out=_DAYS_OUT, windows=_WINDOWS, lags=_LAGS
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if args.trades:
|
|
96
|
+
trades(df_y=df_y, days_out=_DAYS_OUT, tickers=_TICKERS)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
if __name__ == "__main__":
|
|
100
|
+
main()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Handle joint distributions."""
|
|
2
|
+
|
|
3
|
+
# pylint: disable=too-many-locals,pointless-string-statement
|
|
4
|
+
import os
|
|
5
|
+
import pickle
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any, cast
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import pyvinecopulib as pv
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _vine_filename(df_returns: pd.DataFrame) -> str:
|
|
15
|
+
struct_str = "-".join(sorted(df_returns.columns.values.tolist()))
|
|
16
|
+
return f"market_structure_{struct_str}.pkl"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_vine_copula(df_returns: pd.DataFrame) -> pv.Vinecop:
|
|
20
|
+
"""Loads a vine copula model."""
|
|
21
|
+
df_returns = df_returns.reindex(sorted(df_returns.columns), axis=1)
|
|
22
|
+
with open(_vine_filename(df_returns=df_returns), "rb") as f:
|
|
23
|
+
return pickle.load(f)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def fit_vine_copula(df_returns: pd.DataFrame, ttl_days: int = 30) -> pv.Vinecop:
|
|
27
|
+
"""
|
|
28
|
+
Returns a fitted vine copula.
|
|
29
|
+
Loads from disk if a valid (non-expired) model exists; otherwise fits and saves.
|
|
30
|
+
"""
|
|
31
|
+
df_returns = df_returns.reindex(sorted(df_returns.columns), axis=1)
|
|
32
|
+
vine_file = _vine_filename(df_returns=df_returns)
|
|
33
|
+
|
|
34
|
+
# 1. Check for valid cached model
|
|
35
|
+
if os.path.exists(vine_file):
|
|
36
|
+
file_age_seconds = time.time() - os.path.getmtime(vine_file)
|
|
37
|
+
if file_age_seconds < (ttl_days * 24 * 60 * 60):
|
|
38
|
+
print(f"Loading cached vine copula from {vine_file}")
|
|
39
|
+
return load_vine_copula(df_returns=df_returns)
|
|
40
|
+
|
|
41
|
+
# 2. If expired or missing, fit a new one
|
|
42
|
+
print("Vine copula is missing or expired. Fitting new model...")
|
|
43
|
+
n = len(df_returns)
|
|
44
|
+
# Manual PIT transform to Uniform [0, 1]
|
|
45
|
+
u = df_returns.rank(method="average").values / (n + 1)
|
|
46
|
+
|
|
47
|
+
controls = pv.FitControlsVinecop(
|
|
48
|
+
family_set=[pv.BicopFamily.gaussian, pv.BicopFamily.student], # type: ignore
|
|
49
|
+
tree_criterion="tau",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
cop = pv.Vinecop.from_data(u, controls=controls)
|
|
53
|
+
|
|
54
|
+
# 3. Save via Pickle
|
|
55
|
+
with open(vine_file, "wb") as f:
|
|
56
|
+
# HIGHEST_PROTOCOL is faster and produces smaller files (currently Protocol 5)
|
|
57
|
+
pickle.dump(cop, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
58
|
+
|
|
59
|
+
return cop
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def sample_joint_step(cop: pv.Vinecop) -> np.ndarray[Any, np.dtype[np.float64]]:
|
|
63
|
+
"""Returns one joint sample vector for the panel."""
|
|
64
|
+
simulated = np.array(cop.simulate(1))
|
|
65
|
+
return cast(np.ndarray[Any, np.dtype[np.float64]], simulated[0])
|
|
@@ -1,11 +1,23 @@
|
|
|
1
1
|
"""Download historical data."""
|
|
2
2
|
|
|
3
|
+
# pylint: disable=invalid-name,global-statement,unused-argument
|
|
4
|
+
import os
|
|
5
|
+
|
|
3
6
|
import numpy as np
|
|
4
7
|
import pandas as pd
|
|
5
8
|
import requests_cache
|
|
6
9
|
import tqdm
|
|
7
10
|
import yfinance as yf
|
|
8
|
-
from
|
|
11
|
+
from fredapi import Fred # type: ignore
|
|
12
|
+
|
|
13
|
+
_FRED_CLIENT = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_fred_client() -> Fred:
|
|
17
|
+
global _FRED_CLIENT
|
|
18
|
+
if _FRED_CLIENT is None:
|
|
19
|
+
_FRED_CLIENT = Fred(api_key=os.environ["FRED_API_KEY"])
|
|
20
|
+
return _FRED_CLIENT
|
|
9
21
|
|
|
10
22
|
|
|
11
23
|
def _load_yahoo_prices(tickers: list[str]) -> pd.DataFrame:
|
|
@@ -35,11 +47,32 @@ def _load_fred_series(
|
|
|
35
47
|
codes: list[str], session: requests_cache.CachedSession
|
|
36
48
|
) -> pd.DataFrame:
|
|
37
49
|
"""Load FRED series, forward-fill to daily to align with markets."""
|
|
38
|
-
|
|
50
|
+
client = _get_fred_client()
|
|
51
|
+
dfs: list[pd.Series] = []
|
|
39
52
|
for code in tqdm.tqdm(codes, desc="Downloading macros"):
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
53
|
+
try:
|
|
54
|
+
df = client.get_series_all_releases(code)
|
|
55
|
+
df["date"] = pd.to_datetime(df["date"])
|
|
56
|
+
df["realtime_start"] = pd.to_datetime(df["realtime_start"])
|
|
57
|
+
|
|
58
|
+
def select_latest(group: pd.DataFrame) -> pd.DataFrame:
|
|
59
|
+
latest_df = group[
|
|
60
|
+
group["realtime_start"] == group["realtime_start"].max()
|
|
61
|
+
]
|
|
62
|
+
if not isinstance(latest_df, pd.DataFrame):
|
|
63
|
+
raise ValueError("latest_df is not a DataFrame")
|
|
64
|
+
return latest_df
|
|
65
|
+
|
|
66
|
+
df = df.groupby("date").apply(select_latest)
|
|
67
|
+
df = df.set_index("date")
|
|
68
|
+
df.index = df.index.date # type: ignore
|
|
69
|
+
df = df.sort_index()
|
|
70
|
+
dfs.append(df["value"].rename(code)) # type: ignore
|
|
71
|
+
except ValueError:
|
|
72
|
+
df = client.get_series(code)
|
|
73
|
+
df.index = df.index.date # type: ignore
|
|
74
|
+
df = df.sort_index()
|
|
75
|
+
dfs.append(df.rename(code))
|
|
43
76
|
macro = pd.concat(dfs, axis=1).sort_index()
|
|
44
77
|
# daily frequency with forward-fill (macro is slower cadence)
|
|
45
78
|
macro = macro.asfreq("D").ffill()
|
|
@@ -63,4 +96,5 @@ def download(
|
|
|
63
96
|
levels = pd.concat(
|
|
64
97
|
[prices.add_prefix("PX_"), macro.add_prefix("MACRO_")], axis=1
|
|
65
98
|
).ffill()
|
|
99
|
+
print(levels)
|
|
66
100
|
return levels.replace([np.inf, -np.inf], np.nan)
|
|
@@ -4,13 +4,12 @@ import warnings
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
|
-
import tqdm
|
|
8
7
|
from feature_engine.datetime import DatetimeFeatures
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def _ticker_features(df: pd.DataFrame, windows: list[int]) -> pd.DataFrame:
|
|
12
11
|
cols = df.columns.values.tolist()
|
|
13
|
-
for col in
|
|
12
|
+
for col in cols:
|
|
14
13
|
s = df[col]
|
|
15
14
|
for w in windows:
|
|
16
15
|
with warnings.catch_warnings():
|
|
@@ -31,16 +30,15 @@ def _meta_ticker_feature(
|
|
|
31
30
|
df: pd.DataFrame, lags: list[int], windows: list[int]
|
|
32
31
|
) -> pd.DataFrame:
|
|
33
32
|
dfs = [df]
|
|
34
|
-
for lag in
|
|
33
|
+
for lag in lags:
|
|
35
34
|
dfs.append(df.shift(lag).add_suffix(f"_lag{lag}"))
|
|
36
|
-
for window in
|
|
35
|
+
for window in windows:
|
|
37
36
|
dfs.append(df.rolling(window).mean().add_suffix(f"_rmean{window}")) # type: ignore
|
|
38
37
|
dfs.append(df.rolling(window).std().add_suffix(f"_rstd{window}")) # type: ignore
|
|
39
38
|
return pd.concat(dfs, axis=1).replace([np.inf, -np.inf], np.nan)
|
|
40
39
|
|
|
41
40
|
|
|
42
41
|
def _dt_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
43
|
-
print("Generating datetime features")
|
|
44
42
|
dtf = DatetimeFeatures(features_to_extract="all", variables="index")
|
|
45
43
|
return dtf.fit_transform(df)
|
|
46
44
|
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Handles fitting models."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from .copula import fit_vine_copula
|
|
9
|
+
from .features import features
|
|
10
|
+
from .normalizer import normalize
|
|
11
|
+
from .wt import create_wt
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def fit(
|
|
15
|
+
df_y: pd.DataFrame,
|
|
16
|
+
windows: list[int],
|
|
17
|
+
lags: list[int],
|
|
18
|
+
fit_func: Callable[[pd.DataFrame, pd.DataFrame, Any], None] | None = None,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""Fit the models."""
|
|
21
|
+
wavetrainer = create_wt()
|
|
22
|
+
# Fit Vine Copula on historical returns
|
|
23
|
+
# We use pct_change to capture the dependency of returns
|
|
24
|
+
returns = df_y.pct_change().dropna()
|
|
25
|
+
if isinstance(returns, pd.Series):
|
|
26
|
+
returns = returns.to_frame()
|
|
27
|
+
fit_vine_copula(returns)
|
|
28
|
+
df_x = features(df=df_y.copy(), windows=windows, lags=lags)
|
|
29
|
+
df_y_norm = normalize(df=df_y.copy())
|
|
30
|
+
if fit_func is None:
|
|
31
|
+
with warnings.catch_warnings():
|
|
32
|
+
warnings.simplefilter("ignore", category=RuntimeWarning)
|
|
33
|
+
wavetrainer.fit(df_x, y=df_y_norm)
|
|
34
|
+
else:
|
|
35
|
+
fit_func(df_x, df_y_norm, wavetrainer)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Normalize the Y targets to standard deviations."""
|
|
2
|
+
|
|
3
|
+
# pylint: disable=too-many-locals
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from wavetrainer.model.model import PROBABILITY_COLUMN_PREFIX
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _is_float(s: str) -> bool:
|
|
12
|
+
try:
|
|
13
|
+
float(s)
|
|
14
|
+
return True
|
|
15
|
+
except ValueError:
|
|
16
|
+
return False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def normalize(df: pd.DataFrame) -> pd.DataFrame:
|
|
20
|
+
"""Normalize the dataframe per column by z-score bucketing."""
|
|
21
|
+
df = df.pct_change(fill_method=None).replace([np.inf, -np.inf], np.nan)
|
|
22
|
+
mu = df.rolling(365).mean()
|
|
23
|
+
sigma = df.rolling(365).std()
|
|
24
|
+
df = ((((df - mu) / sigma) * 2.0).round() / 2.0).clip(-3, 3)
|
|
25
|
+
dfs = []
|
|
26
|
+
for col in df.columns:
|
|
27
|
+
for unique_val in df[col].unique():
|
|
28
|
+
if math.isnan(unique_val):
|
|
29
|
+
continue
|
|
30
|
+
s = (df[col] == unique_val).rename(f"{col}_{unique_val}")
|
|
31
|
+
dfs.append(s)
|
|
32
|
+
return pd.concat(dfs, axis=1)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def denormalize(
|
|
36
|
+
df: pd.DataFrame, y: pd.DataFrame, u_sample: np.ndarray | None = None
|
|
37
|
+
) -> pd.DataFrame:
|
|
38
|
+
"""Denormalize the dataframe back to a total value."""
|
|
39
|
+
df = df.reindex(y.index)
|
|
40
|
+
for col in y.columns:
|
|
41
|
+
df[col] = y[col]
|
|
42
|
+
date_to_add = df.index[-1] + pd.Timedelta(days=1)
|
|
43
|
+
|
|
44
|
+
cols = set(df.columns.values.tolist())
|
|
45
|
+
target_cols = {"_".join(x.split("_")[:2]) for x in cols}
|
|
46
|
+
asset_idx = 0
|
|
47
|
+
for col in target_cols:
|
|
48
|
+
# 1. Gather all predicted probabilities for this asset's buckets
|
|
49
|
+
z_cols = {x for x in cols if x.startswith(col) and x != col}
|
|
50
|
+
if not z_cols:
|
|
51
|
+
continue
|
|
52
|
+
historical_series = y[col].pct_change().dropna()
|
|
53
|
+
|
|
54
|
+
# Sort buckets (stds) and their associated probabilities
|
|
55
|
+
stds = sorted(
|
|
56
|
+
[
|
|
57
|
+
float(x.replace(col, "").split("_")[1])
|
|
58
|
+
for x in z_cols
|
|
59
|
+
if _is_float(x.replace(col, "").split("_")[1])
|
|
60
|
+
]
|
|
61
|
+
)
|
|
62
|
+
probs = []
|
|
63
|
+
for std in stds:
|
|
64
|
+
std_suffix = f"{col}_{std}_{PROBABILITY_COLUMN_PREFIX}"
|
|
65
|
+
prob_col = sorted([x for x in cols if x.startswith(std_suffix)])[-1]
|
|
66
|
+
prob = df[prob_col].dropna().iloc[-1]
|
|
67
|
+
probs.append(prob)
|
|
68
|
+
|
|
69
|
+
# Normalize probabilities (ensure they sum to 1.0)
|
|
70
|
+
probs = np.array(probs) / np.sum(probs)
|
|
71
|
+
|
|
72
|
+
# 2. Select the bucket using Inverse Transform Sampling
|
|
73
|
+
highest_std = 0.0
|
|
74
|
+
if u_sample is not None and asset_idx < len(u_sample):
|
|
75
|
+
cumulative_probs = np.cumsum(probs)
|
|
76
|
+
idx = np.searchsorted(cumulative_probs, u_sample[asset_idx])
|
|
77
|
+
highest_std = stds[min(idx, len(stds) - 1)]
|
|
78
|
+
asset_idx += 1
|
|
79
|
+
else:
|
|
80
|
+
highest_std = np.random.choice(stds, p=probs)
|
|
81
|
+
|
|
82
|
+
# 3. Use Pandas rolling on the historical y dataframe to avoid ndarray errors
|
|
83
|
+
mu = float(historical_series.rolling(365).mean().fillna(0.0).iloc[-1]) # pyright: ignore
|
|
84
|
+
sigma = float(historical_series.rolling(365).std().fillna(0.0).iloc[-1])
|
|
85
|
+
|
|
86
|
+
lower_bound = highest_std - 0.25
|
|
87
|
+
upper_bound = highest_std + 0.25
|
|
88
|
+
jittered_std = np.random.uniform(lower_bound, upper_bound)
|
|
89
|
+
value = (jittered_std * sigma) + mu
|
|
90
|
+
df.loc[date_to_add, col] = y[col].iloc[-1] * (1.0 + value)
|
|
91
|
+
|
|
92
|
+
return df[sorted(target_cols)] # pyright: ignore
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"""Process the options for the assets."""
|
|
2
|
+
|
|
3
|
+
# pylint: disable=too-many-locals,consider-using-f-string,use-dict-literal,invalid-name,too-many-arguments,too-many-positional-arguments,too-many-statements,line-too-long
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import plotly.express as px
|
|
9
|
+
import yfinance as yf
|
|
10
|
+
from scipy import stats
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_price_probabilities(sim_df, target_date, bin_width=1.0):
|
|
14
|
+
"""
|
|
15
|
+
Calculates the probability distribution of prices for a specific date.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
sim_df: The simulation DataFrame (rows=dates, cols=paths)
|
|
19
|
+
target_date: The specific date (index) or integer location to analyze
|
|
20
|
+
bin_width: The size of the price buckets (e.g., $1.00)
|
|
21
|
+
"""
|
|
22
|
+
# 1. Slice the simulation at the specific point in time
|
|
23
|
+
# This handles both a date-string index or a simple integer row index
|
|
24
|
+
if isinstance(target_date, int):
|
|
25
|
+
prices_at_t = sim_df.iloc[target_date]
|
|
26
|
+
else:
|
|
27
|
+
prices_at_t = sim_df.loc[target_date]
|
|
28
|
+
|
|
29
|
+
# 2. Define bins based on the range of prices on that specific day
|
|
30
|
+
min_p = np.floor(prices_at_t.min() / bin_width) * bin_width
|
|
31
|
+
max_p = np.ceil(prices_at_t.max() / bin_width) * bin_width
|
|
32
|
+
bins = np.arange(min_p, max_p + bin_width, bin_width)
|
|
33
|
+
|
|
34
|
+
# 3. Calculate probabilities
|
|
35
|
+
counts, bin_edges = np.histogram(prices_at_t, bins=bins)
|
|
36
|
+
probabilities = counts / len(prices_at_t)
|
|
37
|
+
|
|
38
|
+
# 4. Format into a DataFrame
|
|
39
|
+
price_points = bin_edges[:-1] + (bin_width / 2)
|
|
40
|
+
dist_df = pd.DataFrame({"price_point": price_points, "probability": probabilities})
|
|
41
|
+
|
|
42
|
+
return dist_df[dist_df["probability"] > 0].reset_index(drop=True)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def calculate_full_kelly(row, sim_df):
|
|
46
|
+
"""Calculate the kelly criterion for a probability mispricing."""
|
|
47
|
+
target_date = row["date"]
|
|
48
|
+
strike = row["strike"]
|
|
49
|
+
price = row["market_ask"]
|
|
50
|
+
|
|
51
|
+
if price <= 0:
|
|
52
|
+
return 0, 0
|
|
53
|
+
|
|
54
|
+
# Extract the simulated prices for this specific date
|
|
55
|
+
prices_at_t = sim_df.loc[target_date].values
|
|
56
|
+
|
|
57
|
+
# Calculate the Payoff for every path
|
|
58
|
+
if row["type"] == "call":
|
|
59
|
+
payoffs = np.maximum(prices_at_t - strike, 0)
|
|
60
|
+
else:
|
|
61
|
+
payoffs = np.maximum(strike - prices_at_t, 0)
|
|
62
|
+
|
|
63
|
+
expected_payoff = np.mean(payoffs)
|
|
64
|
+
|
|
65
|
+
# 1. Probability of winning (p)
|
|
66
|
+
p = row["model_prob"]
|
|
67
|
+
if p <= 0:
|
|
68
|
+
return 0, 0
|
|
69
|
+
|
|
70
|
+
# 2. Net Odds (b)
|
|
71
|
+
# This is (Expected Profit if we win) / (Amount Lost if we lose)
|
|
72
|
+
# Average payoff of the winning paths
|
|
73
|
+
avg_win_payoff = expected_payoff / p
|
|
74
|
+
net_profit_if_win = avg_win_payoff - price
|
|
75
|
+
b = net_profit_if_win / price
|
|
76
|
+
|
|
77
|
+
if b <= 0:
|
|
78
|
+
return 0, 0
|
|
79
|
+
|
|
80
|
+
# 3. Full Kelly Formula: f* = (p(b+1) - 1) / b
|
|
81
|
+
f_star = (p * (b + 1) - 1) / b
|
|
82
|
+
|
|
83
|
+
return max(0, f_star), expected_payoff - price
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def black_scholes_price(S, K, T, r, sigma, option_type="put"):
|
|
87
|
+
"""Calculate the black scholes price for an option."""
|
|
88
|
+
# S = Trigger Asset Price, K = Strike, T = Time remaining, r = Risk-free rate, sigma = IV
|
|
89
|
+
d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
|
|
90
|
+
d2 = d1 - sigma * np.sqrt(T)
|
|
91
|
+
if option_type == "call":
|
|
92
|
+
return S * stats.norm.cdf(d1) - K * np.exp(-r * T) * stats.norm.cdf(d2)
|
|
93
|
+
return K * np.exp(-r * T) * stats.norm.cdf(-d2) - S * stats.norm.cdf(-d1)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def find_mispriced_options(ticker_symbol: str, sim_df: pd.DataFrame) -> None:
|
|
97
|
+
"""Find any mispriced options for an asset."""
|
|
98
|
+
|
|
99
|
+
# 1. Initialize the Ticker
|
|
100
|
+
ticker = yf.Ticker(ticker_symbol)
|
|
101
|
+
|
|
102
|
+
# 1. Get dates from your simulation
|
|
103
|
+
sim_dates = pd.to_datetime(sim_df.index).date.tolist() # pyright: ignore
|
|
104
|
+
|
|
105
|
+
# 2. Get available expiries from the market
|
|
106
|
+
available_expiries = [
|
|
107
|
+
datetime.strptime(d, "%Y-%m-%d").date() for d in ticker.options
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
# 3. Find the common dates
|
|
111
|
+
# We want to find which days in our simulation actually have a tradeable option chain
|
|
112
|
+
common_dates = sorted(list(set(sim_dates).intersection(set(available_expiries))))
|
|
113
|
+
|
|
114
|
+
print(f"Simulation covers {len(sim_dates)} days.")
|
|
115
|
+
print(f"Market has {len(available_expiries)} expiries available.")
|
|
116
|
+
print(f"Matches found for: {common_dates}")
|
|
117
|
+
|
|
118
|
+
# Storage for our comparison results
|
|
119
|
+
date_results = []
|
|
120
|
+
|
|
121
|
+
for target_date in common_dates:
|
|
122
|
+
print(f"\n--- Processing Date: {target_date} ---")
|
|
123
|
+
|
|
124
|
+
# 1. Get YOUR model's probability for this specific day
|
|
125
|
+
# We use the function we built earlier
|
|
126
|
+
date_str = target_date.strftime("%Y-%m-%d")
|
|
127
|
+
|
|
128
|
+
# 2. Download the MARKET's chain for this specific day
|
|
129
|
+
chain = ticker.option_chain(date_str)
|
|
130
|
+
spot = ticker.history(period="1d")["Close"].iloc[-1]
|
|
131
|
+
calls = chain.calls[["strike", "bid", "ask", "impliedVolatility"]].copy()
|
|
132
|
+
calls = calls[calls["strike"] > spot * 1.02]
|
|
133
|
+
calls["option_type"] = "call"
|
|
134
|
+
puts = chain.puts[["strike", "bid", "ask", "impliedVolatility"]].copy()
|
|
135
|
+
puts = puts[puts["strike"] < spot * 0.98]
|
|
136
|
+
puts["option_type"] = "put"
|
|
137
|
+
|
|
138
|
+
# 3. Combine into one market view
|
|
139
|
+
full_chain = pd.concat([calls, puts])
|
|
140
|
+
|
|
141
|
+
# 4. Get your Model's Price Distribution for this specific day
|
|
142
|
+
# We grab the prices from sim_df for this row/date
|
|
143
|
+
model_prices_at_t = sim_df.loc[date_str].values
|
|
144
|
+
|
|
145
|
+
# 5. Compare every strike in the market to your model's probability
|
|
146
|
+
for _, row in full_chain.iterrows():
|
|
147
|
+
k = row["strike"]
|
|
148
|
+
|
|
149
|
+
if row["option_type"] == "call":
|
|
150
|
+
# Prob of finishing ABOVE the strike
|
|
151
|
+
model_prob = np.mean(model_prices_at_t > k)
|
|
152
|
+
else:
|
|
153
|
+
# Prob of finishing BELOW the strike
|
|
154
|
+
model_prob = np.mean(model_prices_at_t < k)
|
|
155
|
+
|
|
156
|
+
date_results.append(
|
|
157
|
+
{
|
|
158
|
+
"date": date_str,
|
|
159
|
+
"strike": k,
|
|
160
|
+
"type": row["option_type"],
|
|
161
|
+
"market_iv": row["impliedVolatility"],
|
|
162
|
+
"market_ask": row["ask"],
|
|
163
|
+
"model_prob": model_prob,
|
|
164
|
+
}
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
comparison_df = pd.DataFrame(date_results)
|
|
168
|
+
# Apply the calculation
|
|
169
|
+
results = comparison_df.apply(lambda row: calculate_full_kelly(row, sim_df), axis=1)
|
|
170
|
+
if results.empty:
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
comparison_df[["kelly_fraction", "expected_profit"]] = pd.DataFrame(
|
|
174
|
+
results.tolist(), index=comparison_df.index
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Filter for liquid options and positive edge
|
|
178
|
+
top_5 = (
|
|
179
|
+
comparison_df[comparison_df["market_ask"] > 0.10] # pyright: ignore
|
|
180
|
+
.sort_values(by="kelly_fraction", ascending=False)
|
|
181
|
+
.head(4)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Formatting for the final report
|
|
185
|
+
summary_report = top_5[
|
|
186
|
+
["date", "strike", "type", "model_prob", "kelly_fraction", "expected_profit"]
|
|
187
|
+
].copy()
|
|
188
|
+
summary_report["model_prob"] = summary_report["model_prob"].map("{:.1%}".format) # pyright: ignore
|
|
189
|
+
summary_report["kelly_fraction"] = summary_report["kelly_fraction"].map( # pyright: ignore
|
|
190
|
+
"{:.2%}".format
|
|
191
|
+
)
|
|
192
|
+
summary_report["expected_profit"] = summary_report["expected_profit"].map( # pyright: ignore
|
|
193
|
+
"${:,.2f}".format
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
print(summary_report)
|
|
197
|
+
|
|
198
|
+
fig = px.scatter(
|
|
199
|
+
comparison_df[comparison_df["kelly_fraction"] > 0],
|
|
200
|
+
x="strike",
|
|
201
|
+
y="kelly_fraction",
|
|
202
|
+
color="type",
|
|
203
|
+
size="model_prob",
|
|
204
|
+
hover_data=["date"],
|
|
205
|
+
title="Full Kelly Allocation: Conviction by Strike and Option Type",
|
|
206
|
+
labels={"kelly_fraction": "Kelly Bet Size (%)", "strike": "Strike Price ($)"},
|
|
207
|
+
template="plotly_dark",
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Highlight the top 5 with annotations or larger markers
|
|
211
|
+
fig.update_traces(marker=dict(line=dict(width=1, color="White")))
|
|
212
|
+
fig.write_image(
|
|
213
|
+
f"kelly_conviction_report_{ticker_symbol}.png", width=1200, height=800
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
exit_strategies = []
|
|
217
|
+
|
|
218
|
+
for _, trade in top_5.iterrows():
|
|
219
|
+
# Select appropriate simulation slices
|
|
220
|
+
sim_slice = sim_df.loc[trade["date"]]
|
|
221
|
+
|
|
222
|
+
# Calculate distribution stats for this specific expiry
|
|
223
|
+
mu = sim_slice.mean()
|
|
224
|
+
sigma = sim_slice.std()
|
|
225
|
+
|
|
226
|
+
# --- REASONABLE LOGIC START ---
|
|
227
|
+
# Instead of 95/5, use 0.5 to 1.0 standard deviation for targets
|
|
228
|
+
# This targets the 'meat' of the move your model predicts
|
|
229
|
+
if trade["type"] == "call":
|
|
230
|
+
# TP: The mean predicted price (where the bulk of the probability lies)
|
|
231
|
+
# SL: Half a standard deviation below the current spot or the mean
|
|
232
|
+
tp_price = mu + (0.2 * sigma)
|
|
233
|
+
sl_price = mu - (0.5 * sigma)
|
|
234
|
+
else:
|
|
235
|
+
# Put: Profit on the downside mean
|
|
236
|
+
tp_price = mu - (0.2 * sigma)
|
|
237
|
+
sl_price = mu + (0.5 * sigma)
|
|
238
|
+
# --- REASONABLE LOGIC END ---
|
|
239
|
+
|
|
240
|
+
# 1. Get today's date and calculate time to expiry
|
|
241
|
+
today = datetime.now()
|
|
242
|
+
expiry_date = datetime.strptime(trade["date"], "%Y-%m-%d") # type: ignore
|
|
243
|
+
days_remaining = (expiry_date - today).days
|
|
244
|
+
|
|
245
|
+
# IMPORTANT: Exit triggers should be modeled for 'Today' or 'Soon',
|
|
246
|
+
# not the moment of expiry, otherwise extrinsic value is 0.
|
|
247
|
+
# We assume we hold for 25% of the remaining duration or at least 1 day.
|
|
248
|
+
holding_period_days = max(days_remaining * 0.25, 1)
|
|
249
|
+
time_to_trigger = max(days_remaining - holding_period_days, 0.5) / 365.0
|
|
250
|
+
|
|
251
|
+
tp_option_price = black_scholes_price(
|
|
252
|
+
tp_price,
|
|
253
|
+
trade["strike"],
|
|
254
|
+
time_to_trigger,
|
|
255
|
+
0.04,
|
|
256
|
+
trade["market_iv"],
|
|
257
|
+
str(trade["type"]),
|
|
258
|
+
)
|
|
259
|
+
sl_option_price = black_scholes_price(
|
|
260
|
+
sl_price,
|
|
261
|
+
trade["strike"],
|
|
262
|
+
time_to_trigger,
|
|
263
|
+
0.04,
|
|
264
|
+
trade["market_iv"],
|
|
265
|
+
str(trade["type"]),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
exit_strategies.append(
|
|
269
|
+
{
|
|
270
|
+
"Strike": trade["strike"],
|
|
271
|
+
"Type": trade["type"],
|
|
272
|
+
"Kelly %": trade["kelly_fraction"],
|
|
273
|
+
"TP Asset Trigger": tp_price,
|
|
274
|
+
"SL Asset Trigger": sl_price,
|
|
275
|
+
"TP Option Price": tp_option_price,
|
|
276
|
+
"SL Option Price": sl_option_price,
|
|
277
|
+
}
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
exit_df = pd.DataFrame(exit_strategies)
|
|
281
|
+
print(exit_df)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def determine_spot_position(ticker_symbol: str, sim_df: pd.DataFrame) -> None:
|
|
285
|
+
"""
|
|
286
|
+
Determines optimal spot position (Long/Short), Kelly sizing,
|
|
287
|
+
and path-based exit levels for assets without options.
|
|
288
|
+
"""
|
|
289
|
+
# 1. Get Current Market Data
|
|
290
|
+
ticker = yf.Ticker(ticker_symbol)
|
|
291
|
+
spot_history = ticker.history(period="1d")
|
|
292
|
+
|
|
293
|
+
if spot_history.empty:
|
|
294
|
+
print(f"No market data for {ticker_symbol}")
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
spot_price = spot_history["Close"].iloc[-1]
|
|
298
|
+
|
|
299
|
+
# 2. Extract the Terminal Distribution
|
|
300
|
+
# Find the latest date in the index
|
|
301
|
+
last_date = sim_df.index.max()
|
|
302
|
+
|
|
303
|
+
# Filter the DF for that date.
|
|
304
|
+
# This results in N rows (where N = number of simulations)
|
|
305
|
+
terminal_distribution = sim_df.loc[[last_date]]
|
|
306
|
+
|
|
307
|
+
# Extract the specific ticker column
|
|
308
|
+
# terminal_prices is now a Series of predicted prices across all paths
|
|
309
|
+
terminal_prices = terminal_distribution[f"PX_{ticker_symbol}"]
|
|
310
|
+
|
|
311
|
+
# 3. Determine Bias and Winning Path Ratio (p)
|
|
312
|
+
median_terminal = terminal_prices.median() # This will now work!
|
|
313
|
+
is_long = median_terminal > spot_price
|
|
314
|
+
|
|
315
|
+
if is_long:
|
|
316
|
+
# Probability of finishing higher than spot
|
|
317
|
+
p = np.mean(terminal_prices > spot_price)
|
|
318
|
+
tp_price = terminal_prices.quantile(0.95)
|
|
319
|
+
sl_price = terminal_prices.quantile(0.05)
|
|
320
|
+
else:
|
|
321
|
+
# Probability of finishing lower than spot
|
|
322
|
+
p = np.mean(terminal_prices < spot_price)
|
|
323
|
+
tp_price = terminal_prices.quantile(0.05)
|
|
324
|
+
sl_price = terminal_prices.quantile(0.95)
|
|
325
|
+
|
|
326
|
+
# 3. Calculate Odds (b) for Kelly
|
|
327
|
+
# b = (Expected Profit) / (Expected Loss if Stopped)
|
|
328
|
+
expected_profit = abs(tp_price - spot_price)
|
|
329
|
+
expected_loss = abs(spot_price - sl_price)
|
|
330
|
+
b = expected_profit / expected_loss
|
|
331
|
+
|
|
332
|
+
# 4. Full Kelly Formula: f* = (p(b+1) - 1) / b
|
|
333
|
+
if b > 0 and p > 0:
|
|
334
|
+
f_star = (p * (b + 1) - 1) / b
|
|
335
|
+
kelly_size = max(0, f_star)
|
|
336
|
+
else:
|
|
337
|
+
kelly_size = 0
|
|
338
|
+
|
|
339
|
+
# 5. Apply a 'Trader's Cap' (e.g., 10% of portfolio for spot)
|
|
340
|
+
final_size = min(kelly_size, 0.10)
|
|
341
|
+
|
|
342
|
+
# Output Results
|
|
343
|
+
print(f"\n--- SPOT ANALYSIS FOR {ticker_symbol} ---")
|
|
344
|
+
print(f"Current Price: ${spot_price:.2f}")
|
|
345
|
+
print(f"Position: {'LONG' if is_long else 'SHORT'}")
|
|
346
|
+
print(f"Win Probability (p): {p:.1%}")
|
|
347
|
+
print(f"Risk/Reward Ratio (b): {b:.2f}")
|
|
348
|
+
print(f"Kelly Fraction: {kelly_size:.2%}")
|
|
349
|
+
print(f"Recommended Size (Capped): {final_size:.2%}")
|
|
350
|
+
print("-" * 30)
|
|
351
|
+
print(f"Take Profit Target: ${tp_price:.2f}")
|
|
352
|
+
print(f"Stop Loss (Invalidation): ${sl_price:.2f}")
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Handle simulations from the models."""
|
|
2
|
+
# pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
3
|
+
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import pyvinecopulib as pv
|
|
8
|
+
import tqdm
|
|
9
|
+
from joblib import Parallel, delayed
|
|
10
|
+
|
|
11
|
+
from .copula import load_vine_copula, sample_joint_step
|
|
12
|
+
from .features import features
|
|
13
|
+
from .normalizer import denormalize
|
|
14
|
+
from .wt import create_wt
|
|
15
|
+
|
|
16
|
+
SIMULATION_COLUMN = "simulation"
|
|
17
|
+
SIMULATION_FILENAME = "sims.parquet"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def run_single_simulation(
|
|
21
|
+
sim_idx: int,
|
|
22
|
+
df_y,
|
|
23
|
+
days_out: int,
|
|
24
|
+
windows: list[int],
|
|
25
|
+
lags: list[int],
|
|
26
|
+
vine_cop: pv.Vinecop,
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Encapsulates a single Monte Carlo path generation.
|
|
30
|
+
"""
|
|
31
|
+
# Local copies for thread-safety (though joblib uses processes)
|
|
32
|
+
df_y = df_y.copy()
|
|
33
|
+
wavetrainer = create_wt()
|
|
34
|
+
|
|
35
|
+
for _ in tqdm.tqdm(range(days_out), desc="Simulation Days"):
|
|
36
|
+
# 1. Feature Engineering
|
|
37
|
+
df_x = features(df=df_y.copy(), windows=windows, lags=lags)
|
|
38
|
+
|
|
39
|
+
# 2. Get Model Prediction (u_step sample from Copula)
|
|
40
|
+
u_step = sample_joint_step(vine_cop)
|
|
41
|
+
|
|
42
|
+
# 3. Transform and Denormalize to get next day prices
|
|
43
|
+
df_next = wavetrainer.transform(df_x.iloc[[-1]], ignore_no_dates=True).drop(
|
|
44
|
+
columns=df_x.columns.values.tolist()
|
|
45
|
+
)
|
|
46
|
+
df_y = denormalize(df_next, y=df_y.copy(), u_sample=u_step)
|
|
47
|
+
|
|
48
|
+
# Mark the simulation index and return only the relevant tail (for memory efficiency)
|
|
49
|
+
df_result = df_y.tail(days_out + 1).copy()
|
|
50
|
+
df_result[SIMULATION_COLUMN] = sim_idx
|
|
51
|
+
return df_result
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def simulate(
|
|
55
|
+
sims: int,
|
|
56
|
+
df_y: pd.DataFrame,
|
|
57
|
+
days_out: int,
|
|
58
|
+
windows: list[int],
|
|
59
|
+
lags: list[int],
|
|
60
|
+
sim_func: Callable[
|
|
61
|
+
[int, pd.DataFrame, int, list[int], list[int], pv.Vinecop], list[pd.DataFrame]
|
|
62
|
+
]
|
|
63
|
+
| None = None,
|
|
64
|
+
) -> pd.DataFrame:
|
|
65
|
+
"""Simulate from trained models."""
|
|
66
|
+
print(f"Starting {sims} simulations in parallel...")
|
|
67
|
+
vine_cop = load_vine_copula(df_returns=df_y)
|
|
68
|
+
print("Loaded vine copula")
|
|
69
|
+
if sim_func is None:
|
|
70
|
+
# n_jobs=-1 uses all available CPU cores
|
|
71
|
+
all_sims = Parallel(n_jobs=-1)(
|
|
72
|
+
delayed(run_single_simulation)(
|
|
73
|
+
i, df_y.copy(), days_out, windows, lags, vine_cop
|
|
74
|
+
)
|
|
75
|
+
for i in tqdm.tqdm(range(sims), desc="Simulating")
|
|
76
|
+
)
|
|
77
|
+
else:
|
|
78
|
+
all_sims = sim_func(sims, df_y.copy(), days_out, windows, lags, vine_cop)
|
|
79
|
+
# Combine all simulations into one large DataFrame
|
|
80
|
+
df_mc = pd.concat(all_sims) # type: ignore
|
|
81
|
+
df_mc.to_parquet(SIMULATION_FILENAME)
|
|
82
|
+
return df_mc
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def load_simulations() -> pd.DataFrame:
|
|
86
|
+
"""Load the rendered simulations."""
|
|
87
|
+
return pd.read_parquet(SIMULATION_FILENAME)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Handle generating trades."""
|
|
2
|
+
|
|
3
|
+
# pylint: disable=use-dict-literal
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import tqdm
|
|
6
|
+
|
|
7
|
+
from .options import determine_spot_position, find_mispriced_options
|
|
8
|
+
from .simulate import SIMULATION_COLUMN, load_simulations
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def trades(df_y: pd.DataFrame, days_out: int, tickers: list[str]) -> None:
|
|
12
|
+
"""Calculate new trades."""
|
|
13
|
+
df_mc = load_simulations()
|
|
14
|
+
pd.options.plotting.backend = "plotly"
|
|
15
|
+
for col in tqdm.tqdm(df_y.columns.values.tolist(), desc="Plotting assets"):
|
|
16
|
+
if col == SIMULATION_COLUMN:
|
|
17
|
+
continue
|
|
18
|
+
plot_df = df_mc.pivot(columns=SIMULATION_COLUMN, values=col).tail(days_out + 1)
|
|
19
|
+
# Plotting
|
|
20
|
+
fig = plot_df.plot(
|
|
21
|
+
title=f"Monte Carlo Simulation: {col}",
|
|
22
|
+
labels={"value": "Price", "index": "Date", "simulation": "Path ID"},
|
|
23
|
+
template="plotly_dark",
|
|
24
|
+
)
|
|
25
|
+
# Add any additional styling
|
|
26
|
+
fig.add_scatter(
|
|
27
|
+
x=plot_df.index,
|
|
28
|
+
y=plot_df.median(axis=1),
|
|
29
|
+
name="Median",
|
|
30
|
+
line=dict(color="white", width=10),
|
|
31
|
+
)
|
|
32
|
+
fig.write_image(
|
|
33
|
+
f"monte_carlo_results_{col}.png", width=1200, height=800, scale=2
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Find the current options prices
|
|
37
|
+
for ticker in tickers:
|
|
38
|
+
print(f"Finding pricing options for {ticker}")
|
|
39
|
+
find_mispriced_options(ticker, df_mc[f"PX_{ticker}"].copy()) # pyright: ignore
|
|
40
|
+
determine_spot_position(ticker, df_mc[f"PX_{ticker}"].copy()) # pyright: ignore
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Handles wavetrainer interaction."""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
|
|
5
|
+
import wavetrainer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_wt() -> wavetrainer.trainer.Trainer: # pyright: ignore
|
|
9
|
+
"""Creates a wavetrainer instance."""
|
|
10
|
+
return wavetrainer.create(
|
|
11
|
+
"panelbeater-train",
|
|
12
|
+
walkforward_timedelta=datetime.timedelta(days=30),
|
|
13
|
+
validation_size=datetime.timedelta(days=365),
|
|
14
|
+
test_size=datetime.timedelta(days=365),
|
|
15
|
+
allowed_models={"catboost"},
|
|
16
|
+
max_false_positive_reduction_steps=0,
|
|
17
|
+
use_power_transformer=True,
|
|
18
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: panelbeater
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.12
|
|
4
4
|
Summary: A CLI for finding mispriced options.
|
|
5
5
|
Home-page: https://github.com/8W9aG/panelbeater
|
|
6
6
|
Author: Will Sackfield
|
|
@@ -18,8 +18,15 @@ Requires-Dist: numpy>=2.2.6
|
|
|
18
18
|
Requires-Dist: feature-engine>=1.9.3
|
|
19
19
|
Requires-Dist: requests-cache>=1.2.1
|
|
20
20
|
Requires-Dist: scikit-learn>=1.6.1
|
|
21
|
-
Requires-Dist: wavetrainer>=0.
|
|
21
|
+
Requires-Dist: wavetrainer>=0.3.4
|
|
22
22
|
Requires-Dist: tqdm>=4.67.1
|
|
23
|
+
Requires-Dist: pyvinecopulib>=0.7.5
|
|
24
|
+
Requires-Dist: fredapi>=0.5.2
|
|
25
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
26
|
+
Requires-Dist: kaleido>=1.2.0
|
|
27
|
+
Requires-Dist: plotly>=6.3.1
|
|
28
|
+
Requires-Dist: scipy>=1.16.3
|
|
29
|
+
Requires-Dist: joblib>=1.5.2
|
|
23
30
|
|
|
24
31
|
# panelbeater
|
|
25
32
|
|
|
@@ -42,6 +49,12 @@ Python 3.11.6:
|
|
|
42
49
|
- [scikit-learn](https://scikit-learn.org/stable/)
|
|
43
50
|
- [wavetrainer](https://github.com/8W9aG/wavetrainer/)
|
|
44
51
|
- [tqdm](https://tqdm.github.io/)
|
|
52
|
+
- [pyvinecopulib](https://github.com/vinecopulib/pyvinecopulib)
|
|
53
|
+
- [python-dotenv](https://saurabh-kumar.com/python-dotenv/)
|
|
54
|
+
- [kaleido](https://github.com/plotly/kaleido)
|
|
55
|
+
- [plotly](https://plotly.com/)
|
|
56
|
+
- [scipy](https://scipy.org/)
|
|
57
|
+
- [joblib](https://joblib.readthedocs.io/en/stable/)
|
|
45
58
|
|
|
46
59
|
## Raison D'être :thought_balloon:
|
|
47
60
|
|
|
@@ -52,7 +65,7 @@ Python 3.11.6:
|
|
|
52
65
|
`panelbeater` goes through the following steps:
|
|
53
66
|
1. Downloads the historical data.
|
|
54
67
|
2. Performs feature engineering on the data.
|
|
55
|
-
3. Trains the required models to operate on the data panel.
|
|
68
|
+
3. Trains the required models and copulas to operate on the data panel.
|
|
56
69
|
4. Downloads the current data.
|
|
57
70
|
5. Runs inference on t+X for the latest options to find the probability distribution on the asset prices to their expiry dates.
|
|
58
71
|
6. Finds any mispriced options and size the position accordingly.
|
|
@@ -5,9 +5,15 @@ requirements.txt
|
|
|
5
5
|
setup.py
|
|
6
6
|
panelbeater/__init__.py
|
|
7
7
|
panelbeater/__main__.py
|
|
8
|
+
panelbeater/copula.py
|
|
8
9
|
panelbeater/download.py
|
|
9
10
|
panelbeater/features.py
|
|
11
|
+
panelbeater/fit.py
|
|
10
12
|
panelbeater/normalizer.py
|
|
13
|
+
panelbeater/options.py
|
|
14
|
+
panelbeater/simulate.py
|
|
15
|
+
panelbeater/trades.py
|
|
16
|
+
panelbeater/wt.py
|
|
11
17
|
panelbeater.egg-info/PKG-INFO
|
|
12
18
|
panelbeater.egg-info/SOURCES.txt
|
|
13
19
|
panelbeater.egg-info/dependency_links.txt
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
yfinance==0.2.66
|
|
2
|
+
pandas>=2.3.3
|
|
3
|
+
pandas-datareader>=0.10.0
|
|
4
|
+
numpy>=2.2.6
|
|
5
|
+
feature-engine>=1.9.3
|
|
6
|
+
requests-cache>=1.2.1
|
|
7
|
+
scikit-learn>=1.6.1
|
|
8
|
+
wavetrainer>=0.3.4
|
|
9
|
+
tqdm>=4.67.1
|
|
10
|
+
pyvinecopulib>=0.7.5
|
|
11
|
+
fredapi>=0.5.2
|
|
12
|
+
python-dotenv>=1.1.0
|
|
13
|
+
kaleido>=1.2.0
|
|
14
|
+
plotly>=6.3.1
|
|
15
|
+
scipy>=1.16.3
|
|
16
|
+
joblib>=1.5.2
|
|
@@ -5,5 +5,12 @@ numpy>=2.2.6
|
|
|
5
5
|
feature-engine>=1.9.3
|
|
6
6
|
requests-cache>=1.2.1
|
|
7
7
|
scikit-learn>=1.6.1
|
|
8
|
-
wavetrainer>=0.
|
|
8
|
+
wavetrainer>=0.3.4
|
|
9
9
|
tqdm>=4.67.1
|
|
10
|
+
pyvinecopulib>=0.7.5
|
|
11
|
+
fredapi>=0.5.2
|
|
12
|
+
python-dotenv>=1.1.0
|
|
13
|
+
kaleido>=1.2.0
|
|
14
|
+
plotly>=6.3.1
|
|
15
|
+
scipy>=1.16.3
|
|
16
|
+
joblib>=1.5.2
|
|
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
|
23
23
|
|
|
24
24
|
setup(
|
|
25
25
|
name='panelbeater',
|
|
26
|
-
version='0.
|
|
26
|
+
version='0.2.12',
|
|
27
27
|
description='A CLI for finding mispriced options.',
|
|
28
28
|
long_description=long_description,
|
|
29
29
|
long_description_content_type='text/markdown',
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
"""The CLI for finding mispriced options."""
|
|
2
|
-
|
|
3
|
-
import argparse
|
|
4
|
-
import datetime
|
|
5
|
-
|
|
6
|
-
import requests_cache
|
|
7
|
-
import tqdm
|
|
8
|
-
import wavetrainer as wt
|
|
9
|
-
from wavetrainer.model_type import QUANTILE_KEY
|
|
10
|
-
|
|
11
|
-
from .download import download
|
|
12
|
-
from .features import features
|
|
13
|
-
from .normalizer import denormalize, normalize
|
|
14
|
-
|
|
15
|
-
_TICKERS = [
|
|
16
|
-
# Equities
|
|
17
|
-
"SPY",
|
|
18
|
-
"QQQ",
|
|
19
|
-
"EEM",
|
|
20
|
-
# Commodities
|
|
21
|
-
"GC=F",
|
|
22
|
-
"CL=F",
|
|
23
|
-
"SI=F",
|
|
24
|
-
# FX
|
|
25
|
-
"EURUSD=X",
|
|
26
|
-
"USDJPY=X",
|
|
27
|
-
# Crypto
|
|
28
|
-
"BTC-USD",
|
|
29
|
-
"ETH-USD",
|
|
30
|
-
]
|
|
31
|
-
_MACROS = [
|
|
32
|
-
"GDP",
|
|
33
|
-
"UNRATE",
|
|
34
|
-
"CPIAUCSL",
|
|
35
|
-
"FEDFUNDS",
|
|
36
|
-
"DGS10",
|
|
37
|
-
"T10Y2Y",
|
|
38
|
-
"M2SL",
|
|
39
|
-
"VIXCLS",
|
|
40
|
-
"DTWEXBGS",
|
|
41
|
-
"INDPRO",
|
|
42
|
-
]
|
|
43
|
-
_WINDOWS = [
|
|
44
|
-
5,
|
|
45
|
-
10,
|
|
46
|
-
20,
|
|
47
|
-
60,
|
|
48
|
-
120,
|
|
49
|
-
200,
|
|
50
|
-
]
|
|
51
|
-
_LAGS = [1, 3, 5, 10, 20, 30]
|
|
52
|
-
_DAYS_OUT = 30
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def main() -> None:
|
|
56
|
-
"""The main CLI function."""
|
|
57
|
-
parser = argparse.ArgumentParser()
|
|
58
|
-
parser.add_argument(
|
|
59
|
-
"--inference",
|
|
60
|
-
help="Whether to skip training and just do inference.",
|
|
61
|
-
required=False,
|
|
62
|
-
default=False,
|
|
63
|
-
action="store_true",
|
|
64
|
-
)
|
|
65
|
-
args = parser.parse_args()
|
|
66
|
-
|
|
67
|
-
# Setup main objects
|
|
68
|
-
session = requests_cache.CachedSession("panelbeater-cache")
|
|
69
|
-
wavetrainer = wt.create(
|
|
70
|
-
"panelbeater-train",
|
|
71
|
-
walkforward_timedelta=datetime.timedelta(days=30),
|
|
72
|
-
validation_size=datetime.timedelta(days=365),
|
|
73
|
-
test_size=datetime.timedelta(days=365),
|
|
74
|
-
allowed_models={"catboost"},
|
|
75
|
-
max_false_positive_reduction_steps=0,
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
# Fit the models
|
|
79
|
-
df_y = download(tickers=_TICKERS, macros=_MACROS, session=session)
|
|
80
|
-
df_x = features(df=df_y.copy(), windows=_WINDOWS, lags=_LAGS)
|
|
81
|
-
df_y_norm = normalize(df=df_y.copy())
|
|
82
|
-
df_y_norm.attrs = {QUANTILE_KEY: True}
|
|
83
|
-
if not args.inference:
|
|
84
|
-
wavetrainer.fit(df_x, y=df_y_norm)
|
|
85
|
-
for _ in tqdm.tqdm(range(_DAYS_OUT), desc="Running t+X simulation"):
|
|
86
|
-
df_next = wavetrainer.transform(df_x, ignore_no_dates=True).drop(columns=df_x)
|
|
87
|
-
df_y = denormalize(df_next, y=df_y)
|
|
88
|
-
df_x = features(df=df_y.copy(), windows=_WINDOWS, lags=_LAGS)
|
|
89
|
-
df_y_norm = normalize(df=df_y.copy())
|
|
90
|
-
|
|
91
|
-
# Find the current options prices
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if __name__ == "__main__":
|
|
95
|
-
main()
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
"""Normalize the Y targets to standard deviations."""
|
|
2
|
-
|
|
3
|
-
# pylint: disable=too-many-locals
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
|
-
from wavetrainer.model.model import PROBABILITY_COLUMN_PREFIX
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def _is_float(s: str) -> bool:
|
|
11
|
-
try:
|
|
12
|
-
float(s)
|
|
13
|
-
return True
|
|
14
|
-
except ValueError:
|
|
15
|
-
return False
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def normalize(df: pd.DataFrame) -> pd.DataFrame:
|
|
19
|
-
"""Normalize the dataframe per column by z-score bucketing."""
|
|
20
|
-
df = df.pct_change(fill_method=None).replace([np.inf, -np.inf], np.nan)
|
|
21
|
-
mu = df.rolling(365).mean()
|
|
22
|
-
sigma = df.rolling(365).std()
|
|
23
|
-
return ((df - mu) / sigma).fillna(0.0)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def denormalize(df: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
|
|
27
|
-
"""Denormalize the dataframe back to a total value."""
|
|
28
|
-
for col in y.columns:
|
|
29
|
-
df[col] = y[col]
|
|
30
|
-
date_to_add = df.index[-1] + pd.Timedelta(days=1)
|
|
31
|
-
|
|
32
|
-
cols = set(df.columns.values.tolist())
|
|
33
|
-
target_cols = {"_".join(x.split("_")[:2]) for x in cols}
|
|
34
|
-
for col in target_cols:
|
|
35
|
-
# Find the standard deviations
|
|
36
|
-
z_cols = {x for x in cols if x.startswith(col) and x != col}
|
|
37
|
-
if not z_cols:
|
|
38
|
-
continue
|
|
39
|
-
stds = sorted(
|
|
40
|
-
[
|
|
41
|
-
float(x.replace(col, "").split("_")[1])
|
|
42
|
-
for x in z_cols
|
|
43
|
-
if _is_float(x.replace(col, "").split("_")[1])
|
|
44
|
-
]
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
# Find the highest probability standard deviation
|
|
48
|
-
highest_std_value = 0.0
|
|
49
|
-
highest_std = None
|
|
50
|
-
for std in stds:
|
|
51
|
-
std_suffix = f"{col}_{std}_{PROBABILITY_COLUMN_PREFIX}"
|
|
52
|
-
std_true_col = sorted([x for x in cols if x.startswith(std_suffix)])[-1]
|
|
53
|
-
std_value = df[std_true_col].iloc[-1]
|
|
54
|
-
if std_value > highest_std_value:
|
|
55
|
-
highest_std_value = std_value
|
|
56
|
-
highest_std = std
|
|
57
|
-
|
|
58
|
-
# Convert the standard deviation back to a value
|
|
59
|
-
mu = df[col].rolling(365).mean()
|
|
60
|
-
sigma = df[col].rolling(365).std()
|
|
61
|
-
value = (highest_std * sigma) + mu
|
|
62
|
-
df.loc[date_to_add, col] = df[col].iloc[-1] * (1.0 + value)
|
|
63
|
-
|
|
64
|
-
return df.drop(columns=list(cols))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|