panelbeater 0.0.17__tar.gz → 0.2.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {panelbeater-0.0.17/panelbeater.egg-info → panelbeater-0.2.12}/PKG-INFO +16 -3
  2. {panelbeater-0.0.17 → panelbeater-0.2.12}/README.md +7 -1
  3. panelbeater-0.2.12/panelbeater/__init__.py +18 -0
  4. panelbeater-0.2.12/panelbeater/__main__.py +100 -0
  5. panelbeater-0.2.12/panelbeater/copula.py +65 -0
  6. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater/download.py +39 -5
  7. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater/features.py +3 -5
  8. panelbeater-0.2.12/panelbeater/fit.py +35 -0
  9. panelbeater-0.2.12/panelbeater/normalizer.py +92 -0
  10. panelbeater-0.2.12/panelbeater/options.py +352 -0
  11. panelbeater-0.2.12/panelbeater/simulate.py +87 -0
  12. panelbeater-0.2.12/panelbeater/trades.py +40 -0
  13. panelbeater-0.2.12/panelbeater/wt.py +18 -0
  14. {panelbeater-0.0.17 → panelbeater-0.2.12/panelbeater.egg-info}/PKG-INFO +16 -3
  15. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/SOURCES.txt +6 -0
  16. panelbeater-0.2.12/panelbeater.egg-info/requires.txt +16 -0
  17. panelbeater-0.0.17/panelbeater.egg-info/requires.txt → panelbeater-0.2.12/requirements.txt +8 -1
  18. {panelbeater-0.0.17 → panelbeater-0.2.12}/setup.py +1 -1
  19. panelbeater-0.0.17/panelbeater/__init__.py +0 -3
  20. panelbeater-0.0.17/panelbeater/__main__.py +0 -95
  21. panelbeater-0.0.17/panelbeater/normalizer.py +0 -64
  22. panelbeater-0.0.17/requirements.txt +0 -9
  23. {panelbeater-0.0.17 → panelbeater-0.2.12}/LICENSE +0 -0
  24. {panelbeater-0.0.17 → panelbeater-0.2.12}/MANIFEST.in +0 -0
  25. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/dependency_links.txt +0 -0
  26. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/entry_points.txt +0 -0
  27. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/not-zip-safe +0 -0
  28. {panelbeater-0.0.17 → panelbeater-0.2.12}/panelbeater.egg-info/top_level.txt +0 -0
  29. {panelbeater-0.0.17 → panelbeater-0.2.12}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: panelbeater
3
- Version: 0.0.17
3
+ Version: 0.2.12
4
4
  Summary: A CLI for finding mispriced options.
5
5
  Home-page: https://github.com/8W9aG/panelbeater
6
6
  Author: Will Sackfield
@@ -18,8 +18,15 @@ Requires-Dist: numpy>=2.2.6
18
18
  Requires-Dist: feature-engine>=1.9.3
19
19
  Requires-Dist: requests-cache>=1.2.1
20
20
  Requires-Dist: scikit-learn>=1.6.1
21
- Requires-Dist: wavetrainer>=0.2.43
21
+ Requires-Dist: wavetrainer>=0.3.4
22
22
  Requires-Dist: tqdm>=4.67.1
23
+ Requires-Dist: pyvinecopulib>=0.7.5
24
+ Requires-Dist: fredapi>=0.5.2
25
+ Requires-Dist: python-dotenv>=1.1.0
26
+ Requires-Dist: kaleido>=1.2.0
27
+ Requires-Dist: plotly>=6.3.1
28
+ Requires-Dist: scipy>=1.16.3
29
+ Requires-Dist: joblib>=1.5.2
23
30
 
24
31
  # panelbeater
25
32
 
@@ -42,6 +49,12 @@ Python 3.11.6:
42
49
  - [scikit-learn](https://scikit-learn.org/stable/)
43
50
  - [wavetrainer](https://github.com/8W9aG/wavetrainer/)
44
51
  - [tqdm](https://tqdm.github.io/)
52
+ - [pyvinecopulib](https://github.com/vinecopulib/pyvinecopulib)
53
+ - [python-dotenv](https://saurabh-kumar.com/python-dotenv/)
54
+ - [kaleido](https://github.com/plotly/kaleido)
55
+ - [plotly](https://plotly.com/)
56
+ - [scipy](https://scipy.org/)
57
+ - [joblib](https://joblib.readthedocs.io/en/stable/)
45
58
 
46
59
  ## Raison D'être :thought_balloon:
47
60
 
@@ -52,7 +65,7 @@ Python 3.11.6:
52
65
  `panelbeater` goes through the following steps:
53
66
  1. Downloads the historical data.
54
67
  2. Performs feature engineering on the data.
55
- 3. Trains the required models to operate on the data panel.
68
+ 3. Trains the required models and copulas to operate on the data panel.
56
69
  4. Downloads the current data.
57
70
  5. Runs inference on t+X for the latest options to find the probability distribution on the asset prices to their expiry dates.
58
71
  6. Finds any mispriced options and size the position accordingly.
@@ -19,6 +19,12 @@ Python 3.11.6:
19
19
  - [scikit-learn](https://scikit-learn.org/stable/)
20
20
  - [wavetrainer](https://github.com/8W9aG/wavetrainer/)
21
21
  - [tqdm](https://tqdm.github.io/)
22
+ - [pyvinecopulib](https://github.com/vinecopulib/pyvinecopulib)
23
+ - [python-dotenv](https://saurabh-kumar.com/python-dotenv/)
24
+ - [kaleido](https://github.com/plotly/kaleido)
25
+ - [plotly](https://plotly.com/)
26
+ - [scipy](https://scipy.org/)
27
+ - [joblib](https://joblib.readthedocs.io/en/stable/)
22
28
 
23
29
  ## Raison D'être :thought_balloon:
24
30
 
@@ -29,7 +35,7 @@ Python 3.11.6:
29
35
  `panelbeater` goes through the following steps:
30
36
  1. Downloads the historical data.
31
37
  2. Performs feature engineering on the data.
32
- 3. Trains the required models to operate on the data panel.
38
+ 3. Trains the required models and copulas to operate on the data panel.
33
39
  4. Downloads the current data.
34
40
  5. Runs inference on t+X for the latest options to find the probability distribution on the asset prices to their expiry dates.
35
41
  6. Finds any mispriced options and size the position accordingly.
@@ -0,0 +1,18 @@
1
+ """panelbeater initialisation."""
2
+
3
+ from .download import download
4
+ from .fit import fit
5
+ from .simulate import SIMULATION_FILENAME, run_single_simulation, simulate
6
+ from .trades import trades
7
+ from .wt import create_wt
8
+
9
+ __VERSION__ = "0.2.12"
10
+ __all__ = [
11
+ "download",
12
+ "fit",
13
+ "create_wt",
14
+ "simulate",
15
+ "run_single_simulation",
16
+ "trades",
17
+ "SIMULATION_FILENAME",
18
+ ]
@@ -0,0 +1,100 @@
1
+ """The CLI for finding mispriced options."""
2
+
3
+ # pylint: disable=too-many-locals,use-dict-literal,invalid-name
4
+ import argparse
5
+
6
+ import requests_cache
7
+ from dotenv import load_dotenv
8
+
9
+ from .download import download
10
+ from .fit import fit
11
+ from .simulate import simulate
12
+ from .trades import trades
13
+
14
+ _TICKERS = [
15
+ # Equities
16
+ "SPY",
17
+ "QQQ",
18
+ "EEM",
19
+ # Commodities
20
+ "GC=F",
21
+ "CL=F",
22
+ "SI=F",
23
+ # FX
24
+ # "EURUSD=X",
25
+ # "USDJPY=X",
26
+ # Crypto
27
+ # "BTC-USD",
28
+ # "ETH-USD",
29
+ ]
30
+ _MACROS = [
31
+ "GDP",
32
+ "UNRATE",
33
+ "CPIAUCSL",
34
+ "FEDFUNDS",
35
+ "DGS10",
36
+ "T10Y2Y",
37
+ # "M2SL",
38
+ # "VIXCLS",
39
+ # "DTWEXBGS",
40
+ # "INDPRO",
41
+ ]
42
+ _WINDOWS = [
43
+ 5,
44
+ 10,
45
+ 20,
46
+ 60,
47
+ 120,
48
+ 200,
49
+ ]
50
+ _LAGS = [1, 3, 5, 10, 20, 30]
51
+ _DAYS_OUT = 30
52
+ _SIMS = 1000
53
+
54
+
55
+ def main() -> None:
56
+ """The main CLI function."""
57
+ load_dotenv()
58
+ parser = argparse.ArgumentParser()
59
+ parser.add_argument(
60
+ "--inference",
61
+ help="Whether to do inference.",
62
+ required=False,
63
+ default=True,
64
+ action=argparse.BooleanOptionalAction,
65
+ )
66
+ parser.add_argument(
67
+ "--train",
68
+ help="Whether to do training.",
69
+ required=False,
70
+ default=True,
71
+ action=argparse.BooleanOptionalAction,
72
+ )
73
+ parser.add_argument(
74
+ "--trades",
75
+ help="Whether to generate trades.",
76
+ required=False,
77
+ default=True,
78
+ action=argparse.BooleanOptionalAction,
79
+ )
80
+ args = parser.parse_args()
81
+
82
+ # Setup main objects
83
+ session = requests_cache.CachedSession("panelbeater-cache")
84
+
85
+ # Fit the models
86
+ df_y = download(tickers=_TICKERS, macros=_MACROS, session=session)
87
+ if args.train:
88
+ fit(df_y=df_y, windows=_WINDOWS, lags=_LAGS)
89
+
90
+ if args.inference:
91
+ simulate(
92
+ sims=_SIMS, df_y=df_y, days_out=_DAYS_OUT, windows=_WINDOWS, lags=_LAGS
93
+ )
94
+
95
+ if args.trades:
96
+ trades(df_y=df_y, days_out=_DAYS_OUT, tickers=_TICKERS)
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
@@ -0,0 +1,65 @@
1
+ """Handle joint distributions."""
2
+
3
+ # pylint: disable=too-many-locals,pointless-string-statement
4
+ import os
5
+ import pickle
6
+ import time
7
+ from typing import Any, cast
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ import pyvinecopulib as pv
12
+
13
+
14
+ def _vine_filename(df_returns: pd.DataFrame) -> str:
15
+ struct_str = "-".join(sorted(df_returns.columns.values.tolist()))
16
+ return f"market_structure_{struct_str}.pkl"
17
+
18
+
19
+ def load_vine_copula(df_returns: pd.DataFrame) -> pv.Vinecop:
20
+ """Loads a vine copula model."""
21
+ df_returns = df_returns.reindex(sorted(df_returns.columns), axis=1)
22
+ with open(_vine_filename(df_returns=df_returns), "rb") as f:
23
+ return pickle.load(f)
24
+
25
+
26
+ def fit_vine_copula(df_returns: pd.DataFrame, ttl_days: int = 30) -> pv.Vinecop:
27
+ """
28
+ Returns a fitted vine copula.
29
+ Loads from disk if a valid (non-expired) model exists; otherwise fits and saves.
30
+ """
31
+ df_returns = df_returns.reindex(sorted(df_returns.columns), axis=1)
32
+ vine_file = _vine_filename(df_returns=df_returns)
33
+
34
+ # 1. Check for valid cached model
35
+ if os.path.exists(vine_file):
36
+ file_age_seconds = time.time() - os.path.getmtime(vine_file)
37
+ if file_age_seconds < (ttl_days * 24 * 60 * 60):
38
+ print(f"Loading cached vine copula from {vine_file}")
39
+ return load_vine_copula(df_returns=df_returns)
40
+
41
+ # 2. If expired or missing, fit a new one
42
+ print("Vine copula is missing or expired. Fitting new model...")
43
+ n = len(df_returns)
44
+ # Manual PIT transform to Uniform [0, 1]
45
+ u = df_returns.rank(method="average").values / (n + 1)
46
+
47
+ controls = pv.FitControlsVinecop(
48
+ family_set=[pv.BicopFamily.gaussian, pv.BicopFamily.student], # type: ignore
49
+ tree_criterion="tau",
50
+ )
51
+
52
+ cop = pv.Vinecop.from_data(u, controls=controls)
53
+
54
+ # 3. Save via Pickle
55
+ with open(vine_file, "wb") as f:
56
+ # HIGHEST_PROTOCOL is faster and produces smaller files (currently Protocol 5)
57
+ pickle.dump(cop, f, protocol=pickle.HIGHEST_PROTOCOL)
58
+
59
+ return cop
60
+
61
+
62
+ def sample_joint_step(cop: pv.Vinecop) -> np.ndarray[Any, np.dtype[np.float64]]:
63
+ """Returns one joint sample vector for the panel."""
64
+ simulated = np.array(cop.simulate(1))
65
+ return cast(np.ndarray[Any, np.dtype[np.float64]], simulated[0])
@@ -1,11 +1,23 @@
1
1
  """Download historical data."""
2
2
 
3
+ # pylint: disable=invalid-name,global-statement,unused-argument
4
+ import os
5
+
3
6
  import numpy as np
4
7
  import pandas as pd
5
8
  import requests_cache
6
9
  import tqdm
7
10
  import yfinance as yf
8
- from pandas_datareader import data as fred
11
+ from fredapi import Fred # type: ignore
12
+
13
+ _FRED_CLIENT = None
14
+
15
+
16
+ def _get_fred_client() -> Fred:
17
+ global _FRED_CLIENT
18
+ if _FRED_CLIENT is None:
19
+ _FRED_CLIENT = Fred(api_key=os.environ["FRED_API_KEY"])
20
+ return _FRED_CLIENT
9
21
 
10
22
 
11
23
  def _load_yahoo_prices(tickers: list[str]) -> pd.DataFrame:
@@ -35,11 +47,32 @@ def _load_fred_series(
35
47
  codes: list[str], session: requests_cache.CachedSession
36
48
  ) -> pd.DataFrame:
37
49
  """Load FRED series, forward-fill to daily to align with markets."""
38
- dfs = []
50
+ client = _get_fred_client()
51
+ dfs: list[pd.Series] = []
39
52
  for code in tqdm.tqdm(codes, desc="Downloading macros"):
40
- s = fred.DataReader(code, "fred", start="2000-01-01", session=session)
41
- s.columns = [code]
42
- dfs.append(s)
53
+ try:
54
+ df = client.get_series_all_releases(code)
55
+ df["date"] = pd.to_datetime(df["date"])
56
+ df["realtime_start"] = pd.to_datetime(df["realtime_start"])
57
+
58
+ def select_latest(group: pd.DataFrame) -> pd.DataFrame:
59
+ latest_df = group[
60
+ group["realtime_start"] == group["realtime_start"].max()
61
+ ]
62
+ if not isinstance(latest_df, pd.DataFrame):
63
+ raise ValueError("latest_df is not a DataFrame")
64
+ return latest_df
65
+
66
+ df = df.groupby("date").apply(select_latest)
67
+ df = df.set_index("date")
68
+ df.index = df.index.date # type: ignore
69
+ df = df.sort_index()
70
+ dfs.append(df["value"].rename(code)) # type: ignore
71
+ except ValueError:
72
+ df = client.get_series(code)
73
+ df.index = df.index.date # type: ignore
74
+ df = df.sort_index()
75
+ dfs.append(df.rename(code))
43
76
  macro = pd.concat(dfs, axis=1).sort_index()
44
77
  # daily frequency with forward-fill (macro is slower cadence)
45
78
  macro = macro.asfreq("D").ffill()
@@ -63,4 +96,5 @@ def download(
63
96
  levels = pd.concat(
64
97
  [prices.add_prefix("PX_"), macro.add_prefix("MACRO_")], axis=1
65
98
  ).ffill()
99
+ print(levels)
66
100
  return levels.replace([np.inf, -np.inf], np.nan)
@@ -4,13 +4,12 @@ import warnings
4
4
 
5
5
  import numpy as np
6
6
  import pandas as pd
7
- import tqdm
8
7
  from feature_engine.datetime import DatetimeFeatures
9
8
 
10
9
 
11
10
  def _ticker_features(df: pd.DataFrame, windows: list[int]) -> pd.DataFrame:
12
11
  cols = df.columns.values.tolist()
13
- for col in tqdm.tqdm(cols, desc="Generating ticker features"):
12
+ for col in cols:
14
13
  s = df[col]
15
14
  for w in windows:
16
15
  with warnings.catch_warnings():
@@ -31,16 +30,15 @@ def _meta_ticker_feature(
31
30
  df: pd.DataFrame, lags: list[int], windows: list[int]
32
31
  ) -> pd.DataFrame:
33
32
  dfs = [df]
34
- for lag in tqdm.tqdm(lags, desc="Generating lags"):
33
+ for lag in lags:
35
34
  dfs.append(df.shift(lag).add_suffix(f"_lag{lag}"))
36
- for window in tqdm.tqdm(windows, desc="Generating window features"):
35
+ for window in windows:
37
36
  dfs.append(df.rolling(window).mean().add_suffix(f"_rmean{window}")) # type: ignore
38
37
  dfs.append(df.rolling(window).std().add_suffix(f"_rstd{window}")) # type: ignore
39
38
  return pd.concat(dfs, axis=1).replace([np.inf, -np.inf], np.nan)
40
39
 
41
40
 
42
41
  def _dt_features(df: pd.DataFrame) -> pd.DataFrame:
43
- print("Generating datetime features")
44
42
  dtf = DatetimeFeatures(features_to_extract="all", variables="index")
45
43
  return dtf.fit_transform(df)
46
44
 
@@ -0,0 +1,35 @@
1
+ """Handles fitting models."""
2
+
3
+ import warnings
4
+ from typing import Any, Callable
5
+
6
+ import pandas as pd
7
+
8
+ from .copula import fit_vine_copula
9
+ from .features import features
10
+ from .normalizer import normalize
11
+ from .wt import create_wt
12
+
13
+
14
+ def fit(
15
+ df_y: pd.DataFrame,
16
+ windows: list[int],
17
+ lags: list[int],
18
+ fit_func: Callable[[pd.DataFrame, pd.DataFrame, Any], None] | None = None,
19
+ ) -> None:
20
+ """Fit the models."""
21
+ wavetrainer = create_wt()
22
+ # Fit Vine Copula on historical returns
23
+ # We use pct_change to capture the dependency of returns
24
+ returns = df_y.pct_change().dropna()
25
+ if isinstance(returns, pd.Series):
26
+ returns = returns.to_frame()
27
+ fit_vine_copula(returns)
28
+ df_x = features(df=df_y.copy(), windows=windows, lags=lags)
29
+ df_y_norm = normalize(df=df_y.copy())
30
+ if fit_func is None:
31
+ with warnings.catch_warnings():
32
+ warnings.simplefilter("ignore", category=RuntimeWarning)
33
+ wavetrainer.fit(df_x, y=df_y_norm)
34
+ else:
35
+ fit_func(df_x, df_y_norm, wavetrainer)
@@ -0,0 +1,92 @@
1
+ """Normalize the Y targets to standard deviations."""
2
+
3
+ # pylint: disable=too-many-locals
4
+ import math
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from wavetrainer.model.model import PROBABILITY_COLUMN_PREFIX
9
+
10
+
11
+ def _is_float(s: str) -> bool:
12
+ try:
13
+ float(s)
14
+ return True
15
+ except ValueError:
16
+ return False
17
+
18
+
19
+ def normalize(df: pd.DataFrame) -> pd.DataFrame:
20
+ """Normalize the dataframe per column by z-score bucketing."""
21
+ df = df.pct_change(fill_method=None).replace([np.inf, -np.inf], np.nan)
22
+ mu = df.rolling(365).mean()
23
+ sigma = df.rolling(365).std()
24
+ df = ((((df - mu) / sigma) * 2.0).round() / 2.0).clip(-3, 3)
25
+ dfs = []
26
+ for col in df.columns:
27
+ for unique_val in df[col].unique():
28
+ if math.isnan(unique_val):
29
+ continue
30
+ s = (df[col] == unique_val).rename(f"{col}_{unique_val}")
31
+ dfs.append(s)
32
+ return pd.concat(dfs, axis=1)
33
+
34
+
35
+ def denormalize(
36
+ df: pd.DataFrame, y: pd.DataFrame, u_sample: np.ndarray | None = None
37
+ ) -> pd.DataFrame:
38
+ """Denormalize the dataframe back to a total value."""
39
+ df = df.reindex(y.index)
40
+ for col in y.columns:
41
+ df[col] = y[col]
42
+ date_to_add = df.index[-1] + pd.Timedelta(days=1)
43
+
44
+ cols = set(df.columns.values.tolist())
45
+ target_cols = {"_".join(x.split("_")[:2]) for x in cols}
46
+ asset_idx = 0
47
+ for col in target_cols:
48
+ # 1. Gather all predicted probabilities for this asset's buckets
49
+ z_cols = {x for x in cols if x.startswith(col) and x != col}
50
+ if not z_cols:
51
+ continue
52
+ historical_series = y[col].pct_change().dropna()
53
+
54
+ # Sort buckets (stds) and their associated probabilities
55
+ stds = sorted(
56
+ [
57
+ float(x.replace(col, "").split("_")[1])
58
+ for x in z_cols
59
+ if _is_float(x.replace(col, "").split("_")[1])
60
+ ]
61
+ )
62
+ probs = []
63
+ for std in stds:
64
+ std_suffix = f"{col}_{std}_{PROBABILITY_COLUMN_PREFIX}"
65
+ prob_col = sorted([x for x in cols if x.startswith(std_suffix)])[-1]
66
+ prob = df[prob_col].dropna().iloc[-1]
67
+ probs.append(prob)
68
+
69
+ # Normalize probabilities (ensure they sum to 1.0)
70
+ probs = np.array(probs) / np.sum(probs)
71
+
72
+ # 2. Select the bucket using Inverse Transform Sampling
73
+ highest_std = 0.0
74
+ if u_sample is not None and asset_idx < len(u_sample):
75
+ cumulative_probs = np.cumsum(probs)
76
+ idx = np.searchsorted(cumulative_probs, u_sample[asset_idx])
77
+ highest_std = stds[min(idx, len(stds) - 1)]
78
+ asset_idx += 1
79
+ else:
80
+ highest_std = np.random.choice(stds, p=probs)
81
+
82
+ # 3. Use Pandas rolling on the historical y dataframe to avoid ndarray errors
83
+ mu = float(historical_series.rolling(365).mean().fillna(0.0).iloc[-1]) # pyright: ignore
84
+ sigma = float(historical_series.rolling(365).std().fillna(0.0).iloc[-1])
85
+
86
+ lower_bound = highest_std - 0.25
87
+ upper_bound = highest_std + 0.25
88
+ jittered_std = np.random.uniform(lower_bound, upper_bound)
89
+ value = (jittered_std * sigma) + mu
90
+ df.loc[date_to_add, col] = y[col].iloc[-1] * (1.0 + value)
91
+
92
+ return df[sorted(target_cols)] # pyright: ignore
@@ -0,0 +1,352 @@
1
+ """Process the options for the assets."""
2
+
3
+ # pylint: disable=too-many-locals,consider-using-f-string,use-dict-literal,invalid-name,too-many-arguments,too-many-positional-arguments,too-many-statements,line-too-long
4
+ from datetime import datetime
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import plotly.express as px
9
+ import yfinance as yf
10
+ from scipy import stats
11
+
12
+
13
+ def get_price_probabilities(sim_df, target_date, bin_width=1.0):
14
+ """
15
+ Calculates the probability distribution of prices for a specific date.
16
+
17
+ Args:
18
+ sim_df: The simulation DataFrame (rows=dates, cols=paths)
19
+ target_date: The specific date (index) or integer location to analyze
20
+ bin_width: The size of the price buckets (e.g., $1.00)
21
+ """
22
+ # 1. Slice the simulation at the specific point in time
23
+ # This handles both a date-string index or a simple integer row index
24
+ if isinstance(target_date, int):
25
+ prices_at_t = sim_df.iloc[target_date]
26
+ else:
27
+ prices_at_t = sim_df.loc[target_date]
28
+
29
+ # 2. Define bins based on the range of prices on that specific day
30
+ min_p = np.floor(prices_at_t.min() / bin_width) * bin_width
31
+ max_p = np.ceil(prices_at_t.max() / bin_width) * bin_width
32
+ bins = np.arange(min_p, max_p + bin_width, bin_width)
33
+
34
+ # 3. Calculate probabilities
35
+ counts, bin_edges = np.histogram(prices_at_t, bins=bins)
36
+ probabilities = counts / len(prices_at_t)
37
+
38
+ # 4. Format into a DataFrame
39
+ price_points = bin_edges[:-1] + (bin_width / 2)
40
+ dist_df = pd.DataFrame({"price_point": price_points, "probability": probabilities})
41
+
42
+ return dist_df[dist_df["probability"] > 0].reset_index(drop=True)
43
+
44
+
45
+ def calculate_full_kelly(row, sim_df):
46
+ """Calculate the kelly criterion for a probability mispricing."""
47
+ target_date = row["date"]
48
+ strike = row["strike"]
49
+ price = row["market_ask"]
50
+
51
+ if price <= 0:
52
+ return 0, 0
53
+
54
+ # Extract the simulated prices for this specific date
55
+ prices_at_t = sim_df.loc[target_date].values
56
+
57
+ # Calculate the Payoff for every path
58
+ if row["type"] == "call":
59
+ payoffs = np.maximum(prices_at_t - strike, 0)
60
+ else:
61
+ payoffs = np.maximum(strike - prices_at_t, 0)
62
+
63
+ expected_payoff = np.mean(payoffs)
64
+
65
+ # 1. Probability of winning (p)
66
+ p = row["model_prob"]
67
+ if p <= 0:
68
+ return 0, 0
69
+
70
+ # 2. Net Odds (b)
71
+ # This is (Expected Profit if we win) / (Amount Lost if we lose)
72
+ # Average payoff of the winning paths
73
+ avg_win_payoff = expected_payoff / p
74
+ net_profit_if_win = avg_win_payoff - price
75
+ b = net_profit_if_win / price
76
+
77
+ if b <= 0:
78
+ return 0, 0
79
+
80
+ # 3. Full Kelly Formula: f* = (p(b+1) - 1) / b
81
+ f_star = (p * (b + 1) - 1) / b
82
+
83
+ return max(0, f_star), expected_payoff - price
84
+
85
+
86
+ def black_scholes_price(S, K, T, r, sigma, option_type="put"):
87
+ """Calculate the black scholes price for an option."""
88
+ # S = Trigger Asset Price, K = Strike, T = Time remaining, r = Risk-free rate, sigma = IV
89
+ d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
90
+ d2 = d1 - sigma * np.sqrt(T)
91
+ if option_type == "call":
92
+ return S * stats.norm.cdf(d1) - K * np.exp(-r * T) * stats.norm.cdf(d2)
93
+ return K * np.exp(-r * T) * stats.norm.cdf(-d2) - S * stats.norm.cdf(-d1)
94
+
95
+
96
+ def find_mispriced_options(ticker_symbol: str, sim_df: pd.DataFrame) -> None:
97
+ """Find any mispriced options for an asset."""
98
+
99
+ # 1. Initialize the Ticker
100
+ ticker = yf.Ticker(ticker_symbol)
101
+
102
+ # 1. Get dates from your simulation
103
+ sim_dates = pd.to_datetime(sim_df.index).date.tolist() # pyright: ignore
104
+
105
+ # 2. Get available expiries from the market
106
+ available_expiries = [
107
+ datetime.strptime(d, "%Y-%m-%d").date() for d in ticker.options
108
+ ]
109
+
110
+ # 3. Find the common dates
111
+ # We want to find which days in our simulation actually have a tradeable option chain
112
+ common_dates = sorted(list(set(sim_dates).intersection(set(available_expiries))))
113
+
114
+ print(f"Simulation covers {len(sim_dates)} days.")
115
+ print(f"Market has {len(available_expiries)} expiries available.")
116
+ print(f"Matches found for: {common_dates}")
117
+
118
+ # Storage for our comparison results
119
+ date_results = []
120
+
121
+ for target_date in common_dates:
122
+ print(f"\n--- Processing Date: {target_date} ---")
123
+
124
+ # 1. Get YOUR model's probability for this specific day
125
+ # We use the function we built earlier
126
+ date_str = target_date.strftime("%Y-%m-%d")
127
+
128
+ # 2. Download the MARKET's chain for this specific day
129
+ chain = ticker.option_chain(date_str)
130
+ spot = ticker.history(period="1d")["Close"].iloc[-1]
131
+ calls = chain.calls[["strike", "bid", "ask", "impliedVolatility"]].copy()
132
+ calls = calls[calls["strike"] > spot * 1.02]
133
+ calls["option_type"] = "call"
134
+ puts = chain.puts[["strike", "bid", "ask", "impliedVolatility"]].copy()
135
+ puts = puts[puts["strike"] < spot * 0.98]
136
+ puts["option_type"] = "put"
137
+
138
+ # 3. Combine into one market view
139
+ full_chain = pd.concat([calls, puts])
140
+
141
+ # 4. Get your Model's Price Distribution for this specific day
142
+ # We grab the prices from sim_df for this row/date
143
+ model_prices_at_t = sim_df.loc[date_str].values
144
+
145
+ # 5. Compare every strike in the market to your model's probability
146
+ for _, row in full_chain.iterrows():
147
+ k = row["strike"]
148
+
149
+ if row["option_type"] == "call":
150
+ # Prob of finishing ABOVE the strike
151
+ model_prob = np.mean(model_prices_at_t > k)
152
+ else:
153
+ # Prob of finishing BELOW the strike
154
+ model_prob = np.mean(model_prices_at_t < k)
155
+
156
+ date_results.append(
157
+ {
158
+ "date": date_str,
159
+ "strike": k,
160
+ "type": row["option_type"],
161
+ "market_iv": row["impliedVolatility"],
162
+ "market_ask": row["ask"],
163
+ "model_prob": model_prob,
164
+ }
165
+ )
166
+
167
+ comparison_df = pd.DataFrame(date_results)
168
+ # Apply the calculation
169
+ results = comparison_df.apply(lambda row: calculate_full_kelly(row, sim_df), axis=1)
170
+ if results.empty:
171
+ return
172
+
173
+ comparison_df[["kelly_fraction", "expected_profit"]] = pd.DataFrame(
174
+ results.tolist(), index=comparison_df.index
175
+ )
176
+
177
+ # Filter for liquid options and positive edge
178
+ top_5 = (
179
+ comparison_df[comparison_df["market_ask"] > 0.10] # pyright: ignore
180
+ .sort_values(by="kelly_fraction", ascending=False)
181
+ .head(4)
182
+ )
183
+
184
+ # Formatting for the final report
185
+ summary_report = top_5[
186
+ ["date", "strike", "type", "model_prob", "kelly_fraction", "expected_profit"]
187
+ ].copy()
188
+ summary_report["model_prob"] = summary_report["model_prob"].map("{:.1%}".format) # pyright: ignore
189
+ summary_report["kelly_fraction"] = summary_report["kelly_fraction"].map( # pyright: ignore
190
+ "{:.2%}".format
191
+ )
192
+ summary_report["expected_profit"] = summary_report["expected_profit"].map( # pyright: ignore
193
+ "${:,.2f}".format
194
+ )
195
+
196
+ print(summary_report)
197
+
198
+ fig = px.scatter(
199
+ comparison_df[comparison_df["kelly_fraction"] > 0],
200
+ x="strike",
201
+ y="kelly_fraction",
202
+ color="type",
203
+ size="model_prob",
204
+ hover_data=["date"],
205
+ title="Full Kelly Allocation: Conviction by Strike and Option Type",
206
+ labels={"kelly_fraction": "Kelly Bet Size (%)", "strike": "Strike Price ($)"},
207
+ template="plotly_dark",
208
+ )
209
+
210
+ # Highlight the top 5 with annotations or larger markers
211
+ fig.update_traces(marker=dict(line=dict(width=1, color="White")))
212
+ fig.write_image(
213
+ f"kelly_conviction_report_{ticker_symbol}.png", width=1200, height=800
214
+ )
215
+
216
+ exit_strategies = []
217
+
218
+ for _, trade in top_5.iterrows():
219
+ # Select appropriate simulation slices
220
+ sim_slice = sim_df.loc[trade["date"]]
221
+
222
+ # Calculate distribution stats for this specific expiry
223
+ mu = sim_slice.mean()
224
+ sigma = sim_slice.std()
225
+
226
+ # --- REASONABLE LOGIC START ---
227
+ # Instead of 95/5, use 0.5 to 1.0 standard deviation for targets
228
+ # This targets the 'meat' of the move your model predicts
229
+ if trade["type"] == "call":
230
+ # TP: The mean predicted price (where the bulk of the probability lies)
231
+ # SL: Half a standard deviation below the current spot or the mean
232
+ tp_price = mu + (0.2 * sigma)
233
+ sl_price = mu - (0.5 * sigma)
234
+ else:
235
+ # Put: Profit on the downside mean
236
+ tp_price = mu - (0.2 * sigma)
237
+ sl_price = mu + (0.5 * sigma)
238
+ # --- REASONABLE LOGIC END ---
239
+
240
+ # 1. Get today's date and calculate time to expiry
241
+ today = datetime.now()
242
+ expiry_date = datetime.strptime(trade["date"], "%Y-%m-%d") # type: ignore
243
+ days_remaining = (expiry_date - today).days
244
+
245
+ # IMPORTANT: Exit triggers should be modeled for 'Today' or 'Soon',
246
+ # not the moment of expiry, otherwise extrinsic value is 0.
247
+ # We assume we hold for 25% of the remaining duration or at least 1 day.
248
+ holding_period_days = max(days_remaining * 0.25, 1)
249
+ time_to_trigger = max(days_remaining - holding_period_days, 0.5) / 365.0
250
+
251
+ tp_option_price = black_scholes_price(
252
+ tp_price,
253
+ trade["strike"],
254
+ time_to_trigger,
255
+ 0.04,
256
+ trade["market_iv"],
257
+ str(trade["type"]),
258
+ )
259
+ sl_option_price = black_scholes_price(
260
+ sl_price,
261
+ trade["strike"],
262
+ time_to_trigger,
263
+ 0.04,
264
+ trade["market_iv"],
265
+ str(trade["type"]),
266
+ )
267
+
268
+ exit_strategies.append(
269
+ {
270
+ "Strike": trade["strike"],
271
+ "Type": trade["type"],
272
+ "Kelly %": trade["kelly_fraction"],
273
+ "TP Asset Trigger": tp_price,
274
+ "SL Asset Trigger": sl_price,
275
+ "TP Option Price": tp_option_price,
276
+ "SL Option Price": sl_option_price,
277
+ }
278
+ )
279
+
280
+ exit_df = pd.DataFrame(exit_strategies)
281
+ print(exit_df)
282
+
283
+
284
+ def determine_spot_position(ticker_symbol: str, sim_df: pd.DataFrame) -> None:
285
+ """
286
+ Determines optimal spot position (Long/Short), Kelly sizing,
287
+ and path-based exit levels for assets without options.
288
+ """
289
+ # 1. Get Current Market Data
290
+ ticker = yf.Ticker(ticker_symbol)
291
+ spot_history = ticker.history(period="1d")
292
+
293
+ if spot_history.empty:
294
+ print(f"No market data for {ticker_symbol}")
295
+ return
296
+
297
+ spot_price = spot_history["Close"].iloc[-1]
298
+
299
+ # 2. Extract the Terminal Distribution
300
+ # Find the latest date in the index
301
+ last_date = sim_df.index.max()
302
+
303
+ # Filter the DF for that date.
304
+ # This results in N rows (where N = number of simulations)
305
+ terminal_distribution = sim_df.loc[[last_date]]
306
+
307
+ # Extract the specific ticker column
308
+ # terminal_prices is now a Series of predicted prices across all paths
309
+ terminal_prices = terminal_distribution[f"PX_{ticker_symbol}"]
310
+
311
+ # 3. Determine Bias and Winning Path Ratio (p)
312
+ median_terminal = terminal_prices.median() # This will now work!
313
+ is_long = median_terminal > spot_price
314
+
315
+ if is_long:
316
+ # Probability of finishing higher than spot
317
+ p = np.mean(terminal_prices > spot_price)
318
+ tp_price = terminal_prices.quantile(0.95)
319
+ sl_price = terminal_prices.quantile(0.05)
320
+ else:
321
+ # Probability of finishing lower than spot
322
+ p = np.mean(terminal_prices < spot_price)
323
+ tp_price = terminal_prices.quantile(0.05)
324
+ sl_price = terminal_prices.quantile(0.95)
325
+
326
+ # 3. Calculate Odds (b) for Kelly
327
+ # b = (Expected Profit) / (Expected Loss if Stopped)
328
+ expected_profit = abs(tp_price - spot_price)
329
+ expected_loss = abs(spot_price - sl_price)
330
+ b = expected_profit / expected_loss
331
+
332
+ # 4. Full Kelly Formula: f* = (p(b+1) - 1) / b
333
+ if b > 0 and p > 0:
334
+ f_star = (p * (b + 1) - 1) / b
335
+ kelly_size = max(0, f_star)
336
+ else:
337
+ kelly_size = 0
338
+
339
+ # 5. Apply a 'Trader's Cap' (e.g., 10% of portfolio for spot)
340
+ final_size = min(kelly_size, 0.10)
341
+
342
+ # Output Results
343
+ print(f"\n--- SPOT ANALYSIS FOR {ticker_symbol} ---")
344
+ print(f"Current Price: ${spot_price:.2f}")
345
+ print(f"Position: {'LONG' if is_long else 'SHORT'}")
346
+ print(f"Win Probability (p): {p:.1%}")
347
+ print(f"Risk/Reward Ratio (b): {b:.2f}")
348
+ print(f"Kelly Fraction: {kelly_size:.2%}")
349
+ print(f"Recommended Size (Capped): {final_size:.2%}")
350
+ print("-" * 30)
351
+ print(f"Take Profit Target: ${tp_price:.2f}")
352
+ print(f"Stop Loss (Invalidation): ${sl_price:.2f}")
@@ -0,0 +1,87 @@
1
+ """Handle simulations from the models."""
2
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
3
+
4
+ from typing import Callable
5
+
6
+ import pandas as pd
7
+ import pyvinecopulib as pv
8
+ import tqdm
9
+ from joblib import Parallel, delayed
10
+
11
+ from .copula import load_vine_copula, sample_joint_step
12
+ from .features import features
13
+ from .normalizer import denormalize
14
+ from .wt import create_wt
15
+
16
+ SIMULATION_COLUMN = "simulation"
17
+ SIMULATION_FILENAME = "sims.parquet"
18
+
19
+
20
+ def run_single_simulation(
21
+ sim_idx: int,
22
+ df_y,
23
+ days_out: int,
24
+ windows: list[int],
25
+ lags: list[int],
26
+ vine_cop: pv.Vinecop,
27
+ ):
28
+ """
29
+ Encapsulates a single Monte Carlo path generation.
30
+ """
31
+ # Local copies for thread-safety (though joblib uses processes)
32
+ df_y = df_y.copy()
33
+ wavetrainer = create_wt()
34
+
35
+ for _ in tqdm.tqdm(range(days_out), desc="Simulation Days"):
36
+ # 1. Feature Engineering
37
+ df_x = features(df=df_y.copy(), windows=windows, lags=lags)
38
+
39
+ # 2. Get Model Prediction (u_step sample from Copula)
40
+ u_step = sample_joint_step(vine_cop)
41
+
42
+ # 3. Transform and Denormalize to get next day prices
43
+ df_next = wavetrainer.transform(df_x.iloc[[-1]], ignore_no_dates=True).drop(
44
+ columns=df_x.columns.values.tolist()
45
+ )
46
+ df_y = denormalize(df_next, y=df_y.copy(), u_sample=u_step)
47
+
48
+ # Mark the simulation index and return only the relevant tail (for memory efficiency)
49
+ df_result = df_y.tail(days_out + 1).copy()
50
+ df_result[SIMULATION_COLUMN] = sim_idx
51
+ return df_result
52
+
53
+
54
+ def simulate(
55
+ sims: int,
56
+ df_y: pd.DataFrame,
57
+ days_out: int,
58
+ windows: list[int],
59
+ lags: list[int],
60
+ sim_func: Callable[
61
+ [int, pd.DataFrame, int, list[int], list[int], pv.Vinecop], list[pd.DataFrame]
62
+ ]
63
+ | None = None,
64
+ ) -> pd.DataFrame:
65
+ """Simulate from trained models."""
66
+ print(f"Starting {sims} simulations in parallel...")
67
+ vine_cop = load_vine_copula(df_returns=df_y)
68
+ print("Loaded vine copula")
69
+ if sim_func is None:
70
+ # n_jobs=-1 uses all available CPU cores
71
+ all_sims = Parallel(n_jobs=-1)(
72
+ delayed(run_single_simulation)(
73
+ i, df_y.copy(), days_out, windows, lags, vine_cop
74
+ )
75
+ for i in tqdm.tqdm(range(sims), desc="Simulating")
76
+ )
77
+ else:
78
+ all_sims = sim_func(sims, df_y.copy(), days_out, windows, lags, vine_cop)
79
+ # Combine all simulations into one large DataFrame
80
+ df_mc = pd.concat(all_sims) # type: ignore
81
+ df_mc.to_parquet(SIMULATION_FILENAME)
82
+ return df_mc
83
+
84
+
85
+ def load_simulations() -> pd.DataFrame:
86
+ """Load the rendered simulations."""
87
+ return pd.read_parquet(SIMULATION_FILENAME)
@@ -0,0 +1,40 @@
1
+ """Handle generating trades."""
2
+
3
+ # pylint: disable=use-dict-literal
4
+ import pandas as pd
5
+ import tqdm
6
+
7
+ from .options import determine_spot_position, find_mispriced_options
8
+ from .simulate import SIMULATION_COLUMN, load_simulations
9
+
10
+
11
+ def trades(df_y: pd.DataFrame, days_out: int, tickers: list[str]) -> None:
12
+ """Calculate new trades."""
13
+ df_mc = load_simulations()
14
+ pd.options.plotting.backend = "plotly"
15
+ for col in tqdm.tqdm(df_y.columns.values.tolist(), desc="Plotting assets"):
16
+ if col == SIMULATION_COLUMN:
17
+ continue
18
+ plot_df = df_mc.pivot(columns=SIMULATION_COLUMN, values=col).tail(days_out + 1)
19
+ # Plotting
20
+ fig = plot_df.plot(
21
+ title=f"Monte Carlo Simulation: {col}",
22
+ labels={"value": "Price", "index": "Date", "simulation": "Path ID"},
23
+ template="plotly_dark",
24
+ )
25
+ # Add any additional styling
26
+ fig.add_scatter(
27
+ x=plot_df.index,
28
+ y=plot_df.median(axis=1),
29
+ name="Median",
30
+ line=dict(color="white", width=10),
31
+ )
32
+ fig.write_image(
33
+ f"monte_carlo_results_{col}.png", width=1200, height=800, scale=2
34
+ )
35
+
36
+ # Find the current options prices
37
+ for ticker in tickers:
38
+ print(f"Finding pricing options for {ticker}")
39
+ find_mispriced_options(ticker, df_mc[f"PX_{ticker}"].copy()) # pyright: ignore
40
+ determine_spot_position(ticker, df_mc[f"PX_{ticker}"].copy()) # pyright: ignore
@@ -0,0 +1,18 @@
1
+ """Handles wavetrainer interaction."""
2
+
3
+ import datetime
4
+
5
+ import wavetrainer
6
+
7
+
8
+ def create_wt() -> wavetrainer.trainer.Trainer: # pyright: ignore
9
+ """Creates a wavetrainer instance."""
10
+ return wavetrainer.create(
11
+ "panelbeater-train",
12
+ walkforward_timedelta=datetime.timedelta(days=30),
13
+ validation_size=datetime.timedelta(days=365),
14
+ test_size=datetime.timedelta(days=365),
15
+ allowed_models={"catboost"},
16
+ max_false_positive_reduction_steps=0,
17
+ use_power_transformer=True,
18
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: panelbeater
3
- Version: 0.0.17
3
+ Version: 0.2.12
4
4
  Summary: A CLI for finding mispriced options.
5
5
  Home-page: https://github.com/8W9aG/panelbeater
6
6
  Author: Will Sackfield
@@ -18,8 +18,15 @@ Requires-Dist: numpy>=2.2.6
18
18
  Requires-Dist: feature-engine>=1.9.3
19
19
  Requires-Dist: requests-cache>=1.2.1
20
20
  Requires-Dist: scikit-learn>=1.6.1
21
- Requires-Dist: wavetrainer>=0.2.43
21
+ Requires-Dist: wavetrainer>=0.3.4
22
22
  Requires-Dist: tqdm>=4.67.1
23
+ Requires-Dist: pyvinecopulib>=0.7.5
24
+ Requires-Dist: fredapi>=0.5.2
25
+ Requires-Dist: python-dotenv>=1.1.0
26
+ Requires-Dist: kaleido>=1.2.0
27
+ Requires-Dist: plotly>=6.3.1
28
+ Requires-Dist: scipy>=1.16.3
29
+ Requires-Dist: joblib>=1.5.2
23
30
 
24
31
  # panelbeater
25
32
 
@@ -42,6 +49,12 @@ Python 3.11.6:
42
49
  - [scikit-learn](https://scikit-learn.org/stable/)
43
50
  - [wavetrainer](https://github.com/8W9aG/wavetrainer/)
44
51
  - [tqdm](https://tqdm.github.io/)
52
+ - [pyvinecopulib](https://github.com/vinecopulib/pyvinecopulib)
53
+ - [python-dotenv](https://saurabh-kumar.com/python-dotenv/)
54
+ - [kaleido](https://github.com/plotly/kaleido)
55
+ - [plotly](https://plotly.com/)
56
+ - [scipy](https://scipy.org/)
57
+ - [joblib](https://joblib.readthedocs.io/en/stable/)
45
58
 
46
59
  ## Raison D'être :thought_balloon:
47
60
 
@@ -52,7 +65,7 @@ Python 3.11.6:
52
65
  `panelbeater` goes through the following steps:
53
66
  1. Downloads the historical data.
54
67
  2. Performs feature engineering on the data.
55
- 3. Trains the required models to operate on the data panel.
68
+ 3. Trains the required models and copulas to operate on the data panel.
56
69
  4. Downloads the current data.
57
70
  5. Runs inference on t+X for the latest options to find the probability distribution on the asset prices to their expiry dates.
58
71
  6. Finds any mispriced options and size the position accordingly.
@@ -5,9 +5,15 @@ requirements.txt
5
5
  setup.py
6
6
  panelbeater/__init__.py
7
7
  panelbeater/__main__.py
8
+ panelbeater/copula.py
8
9
  panelbeater/download.py
9
10
  panelbeater/features.py
11
+ panelbeater/fit.py
10
12
  panelbeater/normalizer.py
13
+ panelbeater/options.py
14
+ panelbeater/simulate.py
15
+ panelbeater/trades.py
16
+ panelbeater/wt.py
11
17
  panelbeater.egg-info/PKG-INFO
12
18
  panelbeater.egg-info/SOURCES.txt
13
19
  panelbeater.egg-info/dependency_links.txt
@@ -0,0 +1,16 @@
1
+ yfinance==0.2.66
2
+ pandas>=2.3.3
3
+ pandas-datareader>=0.10.0
4
+ numpy>=2.2.6
5
+ feature-engine>=1.9.3
6
+ requests-cache>=1.2.1
7
+ scikit-learn>=1.6.1
8
+ wavetrainer>=0.3.4
9
+ tqdm>=4.67.1
10
+ pyvinecopulib>=0.7.5
11
+ fredapi>=0.5.2
12
+ python-dotenv>=1.1.0
13
+ kaleido>=1.2.0
14
+ plotly>=6.3.1
15
+ scipy>=1.16.3
16
+ joblib>=1.5.2
@@ -5,5 +5,12 @@ numpy>=2.2.6
5
5
  feature-engine>=1.9.3
6
6
  requests-cache>=1.2.1
7
7
  scikit-learn>=1.6.1
8
- wavetrainer>=0.2.43
8
+ wavetrainer>=0.3.4
9
9
  tqdm>=4.67.1
10
+ pyvinecopulib>=0.7.5
11
+ fredapi>=0.5.2
12
+ python-dotenv>=1.1.0
13
+ kaleido>=1.2.0
14
+ plotly>=6.3.1
15
+ scipy>=1.16.3
16
+ joblib>=1.5.2
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='panelbeater',
26
- version='0.0.17',
26
+ version='0.2.12',
27
27
  description='A CLI for finding mispriced options.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -1,3 +0,0 @@
1
- """panelbeater initialisation."""
2
-
3
- __VERSION__ = "0.0.17"
@@ -1,95 +0,0 @@
1
- """The CLI for finding mispriced options."""
2
-
3
- import argparse
4
- import datetime
5
-
6
- import requests_cache
7
- import tqdm
8
- import wavetrainer as wt
9
- from wavetrainer.model_type import QUANTILE_KEY
10
-
11
- from .download import download
12
- from .features import features
13
- from .normalizer import denormalize, normalize
14
-
15
- _TICKERS = [
16
- # Equities
17
- "SPY",
18
- "QQQ",
19
- "EEM",
20
- # Commodities
21
- "GC=F",
22
- "CL=F",
23
- "SI=F",
24
- # FX
25
- "EURUSD=X",
26
- "USDJPY=X",
27
- # Crypto
28
- "BTC-USD",
29
- "ETH-USD",
30
- ]
31
- _MACROS = [
32
- "GDP",
33
- "UNRATE",
34
- "CPIAUCSL",
35
- "FEDFUNDS",
36
- "DGS10",
37
- "T10Y2Y",
38
- "M2SL",
39
- "VIXCLS",
40
- "DTWEXBGS",
41
- "INDPRO",
42
- ]
43
- _WINDOWS = [
44
- 5,
45
- 10,
46
- 20,
47
- 60,
48
- 120,
49
- 200,
50
- ]
51
- _LAGS = [1, 3, 5, 10, 20, 30]
52
- _DAYS_OUT = 30
53
-
54
-
55
- def main() -> None:
56
- """The main CLI function."""
57
- parser = argparse.ArgumentParser()
58
- parser.add_argument(
59
- "--inference",
60
- help="Whether to skip training and just do inference.",
61
- required=False,
62
- default=False,
63
- action="store_true",
64
- )
65
- args = parser.parse_args()
66
-
67
- # Setup main objects
68
- session = requests_cache.CachedSession("panelbeater-cache")
69
- wavetrainer = wt.create(
70
- "panelbeater-train",
71
- walkforward_timedelta=datetime.timedelta(days=30),
72
- validation_size=datetime.timedelta(days=365),
73
- test_size=datetime.timedelta(days=365),
74
- allowed_models={"catboost"},
75
- max_false_positive_reduction_steps=0,
76
- )
77
-
78
- # Fit the models
79
- df_y = download(tickers=_TICKERS, macros=_MACROS, session=session)
80
- df_x = features(df=df_y.copy(), windows=_WINDOWS, lags=_LAGS)
81
- df_y_norm = normalize(df=df_y.copy())
82
- df_y_norm.attrs = {QUANTILE_KEY: True}
83
- if not args.inference:
84
- wavetrainer.fit(df_x, y=df_y_norm)
85
- for _ in tqdm.tqdm(range(_DAYS_OUT), desc="Running t+X simulation"):
86
- df_next = wavetrainer.transform(df_x, ignore_no_dates=True).drop(columns=df_x)
87
- df_y = denormalize(df_next, y=df_y)
88
- df_x = features(df=df_y.copy(), windows=_WINDOWS, lags=_LAGS)
89
- df_y_norm = normalize(df=df_y.copy())
90
-
91
- # Find the current options prices
92
-
93
-
94
- if __name__ == "__main__":
95
- main()
@@ -1,64 +0,0 @@
1
- """Normalize the Y targets to standard deviations."""
2
-
3
- # pylint: disable=too-many-locals
4
-
5
- import numpy as np
6
- import pandas as pd
7
- from wavetrainer.model.model import PROBABILITY_COLUMN_PREFIX
8
-
9
-
10
- def _is_float(s: str) -> bool:
11
- try:
12
- float(s)
13
- return True
14
- except ValueError:
15
- return False
16
-
17
-
18
- def normalize(df: pd.DataFrame) -> pd.DataFrame:
19
- """Normalize the dataframe per column by z-score bucketing."""
20
- df = df.pct_change(fill_method=None).replace([np.inf, -np.inf], np.nan)
21
- mu = df.rolling(365).mean()
22
- sigma = df.rolling(365).std()
23
- return ((df - mu) / sigma).fillna(0.0)
24
-
25
-
26
- def denormalize(df: pd.DataFrame, y: pd.DataFrame) -> pd.DataFrame:
27
- """Denormalize the dataframe back to a total value."""
28
- for col in y.columns:
29
- df[col] = y[col]
30
- date_to_add = df.index[-1] + pd.Timedelta(days=1)
31
-
32
- cols = set(df.columns.values.tolist())
33
- target_cols = {"_".join(x.split("_")[:2]) for x in cols}
34
- for col in target_cols:
35
- # Find the standard deviations
36
- z_cols = {x for x in cols if x.startswith(col) and x != col}
37
- if not z_cols:
38
- continue
39
- stds = sorted(
40
- [
41
- float(x.replace(col, "").split("_")[1])
42
- for x in z_cols
43
- if _is_float(x.replace(col, "").split("_")[1])
44
- ]
45
- )
46
-
47
- # Find the highest probability standard deviation
48
- highest_std_value = 0.0
49
- highest_std = None
50
- for std in stds:
51
- std_suffix = f"{col}_{std}_{PROBABILITY_COLUMN_PREFIX}"
52
- std_true_col = sorted([x for x in cols if x.startswith(std_suffix)])[-1]
53
- std_value = df[std_true_col].iloc[-1]
54
- if std_value > highest_std_value:
55
- highest_std_value = std_value
56
- highest_std = std
57
-
58
- # Convert the standard deviation back to a value
59
- mu = df[col].rolling(365).mean()
60
- sigma = df[col].rolling(365).std()
61
- value = (highest_std * sigma) + mu
62
- df.loc[date_to_add, col] = df[col].iloc[-1] * (1.0 + value)
63
-
64
- return df.drop(columns=list(cols))
@@ -1,9 +0,0 @@
1
- yfinance==0.2.66
2
- pandas>=2.3.3
3
- pandas-datareader>=0.10.0
4
- numpy>=2.2.6
5
- feature-engine>=1.9.3
6
- requests-cache>=1.2.1
7
- scikit-learn>=1.6.1
8
- wavetrainer>=0.2.43
9
- tqdm>=4.67.1
File without changes
File without changes
File without changes