sdevpy 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sdevpy/__init__.py +0 -0
- sdevpy/analytics/bachelier.py +66 -0
- sdevpy/analytics/black.py +81 -0
- sdevpy/analytics/fbsabr.py +183 -0
- sdevpy/analytics/mcheston.py +203 -0
- sdevpy/analytics/mcsabr.py +221 -0
- sdevpy/analytics/mczabr.py +220 -0
- sdevpy/analytics/sabr.py +72 -0
- sdevpy/example.py +2 -0
- sdevpy/machinelearning/callbacks.py +112 -0
- sdevpy/machinelearning/datasets.py +32 -0
- sdevpy/machinelearning/learningmodel.py +151 -0
- sdevpy/machinelearning/learningschedules.py +23 -0
- sdevpy/machinelearning/topology.py +65 -0
- sdevpy/maths/interpolations.py +28 -0
- sdevpy/maths/metrics.py +14 -0
- sdevpy/maths/optimization.py +1 -0
- sdevpy/maths/rand.py +99 -0
- sdevpy/projects/datafiles.py +28 -0
- sdevpy/projects/pinns/ernst_pinns.py +324 -0
- sdevpy/projects/pinns/pinns.py +345 -0
- sdevpy/projects/pinns/pinns_worst_of.py +635 -0
- sdevpy/projects/stovol/stovolgen.py +65 -0
- sdevpy/projects/stovol/stovolplot.py +110 -0
- sdevpy/projects/stovol/stovoltrain.py +247 -0
- sdevpy/projects/stovol/xsabrfit.py +255 -0
- sdevpy/settings.py +14 -0
- sdevpy/test.py +199 -0
- sdevpy/tools/clipboard.py +40 -0
- sdevpy/tools/constants.py +3 -0
- sdevpy/tools/filemanager.py +59 -0
- sdevpy/tools/jsonmanager.py +48 -0
- sdevpy/tools/timegrids.py +89 -0
- sdevpy/tools/timer.py +32 -0
- sdevpy/volsurfacegen/fbsabrgenerator.py +64 -0
- sdevpy/volsurfacegen/mchestongenerator.py +216 -0
- sdevpy/volsurfacegen/mcsabrgenerator.py +228 -0
- sdevpy/volsurfacegen/mczabrgenerator.py +227 -0
- sdevpy/volsurfacegen/sabrgenerator.py +282 -0
- sdevpy/volsurfacegen/smilegenerator.py +124 -0
- sdevpy/volsurfacegen/stovolfactory.py +44 -0
- sdevpy-0.0.1.dist-info/LICENSE +21 -0
- sdevpy-0.0.1.dist-info/METADATA +21 -0
- sdevpy-0.0.1.dist-info/RECORD +46 -0
- sdevpy-0.0.1.dist-info/WHEEL +5 -0
- sdevpy-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
""" Plot helpers for XSABR project """
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import matplotlib.ticker as mtick
|
|
4
|
+
from analytics import bachelier
|
|
5
|
+
from analytics import black
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def plot_transform_surface(expiries, strikes, are_calls, fwd, ref_prices, mod_prices, title_,
|
|
9
|
+
transform='ShiftedBlackScholes'):
|
|
10
|
+
""" Calculate quantities to display for the surface and display them in charts. Transformed
|
|
11
|
+
quantities available are: Price, ShiftedBlackScholes (3%) and Bachelier (normal vols). """
|
|
12
|
+
# Transform prices
|
|
13
|
+
ref_disp = transform_surface(expiries, strikes, are_calls, fwd, ref_prices, transform)
|
|
14
|
+
mod_disp = transform_surface(expiries, strikes, are_calls, fwd, mod_prices, transform)
|
|
15
|
+
|
|
16
|
+
# Display transformed prices
|
|
17
|
+
num_charts = expiries.shape[0]
|
|
18
|
+
num_cols = 2
|
|
19
|
+
num_rows = int(num_charts / num_cols)
|
|
20
|
+
# print("num_rows: " + str(num_rows))
|
|
21
|
+
ylabel = 'Price' if transform is 'Price' else 'Vol'
|
|
22
|
+
|
|
23
|
+
fig, axs = plt.subplots(num_rows, num_cols, layout="constrained")
|
|
24
|
+
fig.suptitle(title_, size='x-large', weight='bold')
|
|
25
|
+
fig.set_size_inches(12, 8)
|
|
26
|
+
for i in range(num_rows):
|
|
27
|
+
for j in range(num_cols):
|
|
28
|
+
k = num_cols * i + j
|
|
29
|
+
axs[i, j].plot(strikes[k], ref_disp[k], color='blue', label='Reference')
|
|
30
|
+
axs[i, j].plot(strikes[k], mod_disp[k], color='red', label='Model')
|
|
31
|
+
axs[i, j].xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=1))
|
|
32
|
+
axs[i, j].yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1, decimals=0))
|
|
33
|
+
axs[i, j].set_xlabel('Strike')
|
|
34
|
+
axs[i, j].set_ylabel(ylabel)
|
|
35
|
+
axs[i, j].set_title(f"T={expiries[k, 0]}")
|
|
36
|
+
axs[i, j].legend(loc='upper right')
|
|
37
|
+
|
|
38
|
+
plt.show()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def transform_surface(expiries, strikes, are_calls, fwd, prices, transform='ShiftedBlackScholes'):
|
|
42
|
+
""" Tranform prices into: Price, ShiftedBlackScholes (3%) and Bachelier (normal vols). """
|
|
43
|
+
# Transform prices
|
|
44
|
+
trans_prices = []
|
|
45
|
+
if transform is 'Price':
|
|
46
|
+
trans_prices = prices
|
|
47
|
+
elif transform is 'ShiftedBlackScholes':
|
|
48
|
+
shift = 0.03
|
|
49
|
+
sfwd = fwd + shift
|
|
50
|
+
for i, expiry in enumerate(expiries):
|
|
51
|
+
strikes_ = strikes[i]
|
|
52
|
+
are_calls_ = are_calls[i]
|
|
53
|
+
trans_prices_ = []
|
|
54
|
+
for j, strike in enumerate(strikes_):
|
|
55
|
+
sstrike = strike + shift
|
|
56
|
+
trans_prices_.append(black.implied_vol(expiry, sstrike, are_calls_[j], sfwd, prices[i, j]))
|
|
57
|
+
trans_prices.append(trans_prices_)
|
|
58
|
+
elif transform is 'Bachelier':
|
|
59
|
+
for i, expiry in enumerate(expiries):
|
|
60
|
+
strikes_ = strikes[i]
|
|
61
|
+
are_calls_ = are_calls[i]
|
|
62
|
+
trans_prices_ = []
|
|
63
|
+
for j, strike in enumerate(strikes_):
|
|
64
|
+
trans_prices_.append(bachelier.implied_vol(expiry, strike, are_calls_[j], fwd, prices[i, j]))
|
|
65
|
+
trans_prices.append(trans_prices_)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError("Unknown transform type: " + transform)
|
|
68
|
+
|
|
69
|
+
return trans_prices
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# def strike_ladder(expiry, spread_ladder, fwd, test_params, generator, model,
|
|
73
|
+
# transform='ShiftedBlackScholes'):
|
|
74
|
+
# """ Plot volatilities along a ladder of strike spreads """
|
|
75
|
+
# is_call = generator.is_call
|
|
76
|
+
|
|
77
|
+
# # Calculate prices
|
|
78
|
+
# rf_prc, md_prc, strikes, sprds = generator.price_strike_ladder(model, expiry, spread_ladder,
|
|
79
|
+
# fwd, test_params)
|
|
80
|
+
|
|
81
|
+
# # Invert to normal vols
|
|
82
|
+
# rf_nvols = []
|
|
83
|
+
# md_nvols = []
|
|
84
|
+
# if transform is 'ShiftedBlackScholes':
|
|
85
|
+
# shift = 0.03
|
|
86
|
+
# for i, strike in enumerate(strikes):
|
|
87
|
+
# sstrike = strike + shift
|
|
88
|
+
# sfwd = fwd + shift
|
|
89
|
+
# rf_nvols.append(black.implied_vol(expiry, sstrike, is_call, sfwd, rf_prc[i]))
|
|
90
|
+
# md_nvols.append(black.implied_vol(expiry, sstrike, is_call, sfwd, md_prc[i]))
|
|
91
|
+
# elif transform is 'Bachelier':
|
|
92
|
+
# for i, strike in enumerate(strikes):
|
|
93
|
+
# rf_nvols.append(bachelier.implied_vol(expiry, strike, is_call, fwd, rf_prc[i]))
|
|
94
|
+
# md_nvols.append(bachelier.implied_vol(expiry, strike, is_call, fwd, md_prc[i]))
|
|
95
|
+
# else:
|
|
96
|
+
# raise ValueError("Unknown transform type: " + transform)
|
|
97
|
+
|
|
98
|
+
# lnvol = test_params['LnVol']
|
|
99
|
+
# beta = test_params['Beta']
|
|
100
|
+
# nu = test_params['Nu']
|
|
101
|
+
# rho = test_params['Rho']
|
|
102
|
+
# # Plot
|
|
103
|
+
# plt.title(f'T={expiry:.2f}, F={fwd * 100:.2f}, LnVol={lnvol * 100:.2f}, Beta={beta:.2f}' +
|
|
104
|
+
# f',\n Nu={nu*100:.2f}, Rho={rho * 100:.2f}')
|
|
105
|
+
|
|
106
|
+
# plt.xlabel('Spread')
|
|
107
|
+
# plt.ylabel('Volatility')
|
|
108
|
+
# plt.plot(sprds, rf_nvols, color='blue', label='Reference')
|
|
109
|
+
# plt.plot(sprds, md_nvols, color='red', label='Model')
|
|
110
|
+
# plt.legend(loc='upper right')
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
""" Train ANN on datasets for Stochastic Local Vol models. We implement the direct map here.
|
|
2
|
+
Datasets of parameters (inputs) vs prices/implied vols (outputs) have been generated
|
|
3
|
+
in a previous set and are now read from tsv. The network here is either loaded from a
|
|
4
|
+
pre-trained state or trained from scratch. Pre-trained models can be loaded and training
|
|
5
|
+
resumed. """
|
|
6
|
+
import os
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
import numpy as np
|
|
9
|
+
import tensorflow as tf
|
|
10
|
+
import matplotlib.pyplot as plt
|
|
11
|
+
import settings
|
|
12
|
+
from machinelearning.topology import compose_model
|
|
13
|
+
from machinelearning.learningmodel import LearningModel, load_learning_model
|
|
14
|
+
from machinelearning.learningschedules import FlooredExponentialDecay
|
|
15
|
+
from machinelearning.callbacks import RefCallback
|
|
16
|
+
from machinelearning.datasets import prepare_sets
|
|
17
|
+
from tools.filemanager import check_directory
|
|
18
|
+
from tools.timer import Stopwatch
|
|
19
|
+
from tools import clipboard
|
|
20
|
+
from maths.metrics import rmse, tf_rmse
|
|
21
|
+
from volsurfacegen.stovolfactory import set_generator
|
|
22
|
+
from projects.stovol import stovolplot as xplt
|
|
23
|
+
|
|
24
|
+
# Create generator that samples/prices from a trained model
|
|
25
|
+
# Fine-train models
|
|
26
|
+
# Lazy instantiation from remote/name code
|
|
27
|
+
# Store data in Kaggle
|
|
28
|
+
|
|
29
|
+
# ################ Runtime configuration ##########################################################
|
|
30
|
+
# MODEL_TYPE = "SABR"
|
|
31
|
+
MODEL_TYPE = "ShiftedSABR"
|
|
32
|
+
# MODEL_TYPE = "McShiftedSABR"
|
|
33
|
+
# MODEL_TYPE = "FbSABR"
|
|
34
|
+
# MODEL_TYPE = "McShiftedZABR"
|
|
35
|
+
# MODEL_TYPE = "McShiftedHeston"
|
|
36
|
+
USE_TRAINED = True
|
|
37
|
+
TRAIN = False
|
|
38
|
+
if USE_TRAINED is False and TRAIN is False:
|
|
39
|
+
raise RuntimeError("When not using pre-trained models, a new model must be trained")
|
|
40
|
+
|
|
41
|
+
TRAIN_PERCENT = 0.90 # Proportion of dataset used for training (rest used for test)
|
|
42
|
+
EPOCHS = 400
|
|
43
|
+
BATCH_SIZE = 1000
|
|
44
|
+
SHOW_VOL_CHARTS = True # Show smile section charts
|
|
45
|
+
# For comparison to reference values (accuracy of reference)
|
|
46
|
+
NUM_MC = 50 * 1000 # 100 * 1000
|
|
47
|
+
POINTS_PER_YEAR = 20 # 25
|
|
48
|
+
|
|
49
|
+
print(">> Set up runtime configuration")
|
|
50
|
+
project_folder = os.path.join(settings.WORKFOLDER, "stovol")
|
|
51
|
+
print("> Project folder: " + project_folder)
|
|
52
|
+
data_folder = os.path.join(project_folder, "samples")
|
|
53
|
+
print("> Data folder: " + data_folder)
|
|
54
|
+
check_directory(data_folder)
|
|
55
|
+
print("> Chosen model: " + MODEL_TYPE)
|
|
56
|
+
data_file = os.path.join(data_folder, MODEL_TYPE + "_samples.tsv")
|
|
57
|
+
model_folder = os.path.join(project_folder, "models")
|
|
58
|
+
print("> Model folder: " + model_folder)
|
|
59
|
+
|
|
60
|
+
# ################ Helper functions ###############################################################
|
|
61
|
+
def bps_rmse(y_true, y_ref):
|
|
62
|
+
""" RMSE in bps """
|
|
63
|
+
return 10000.0 * rmse(y_true, y_ref)
|
|
64
|
+
|
|
65
|
+
def tf_bps_rmse(y_true, y_ref):
|
|
66
|
+
""" RMSE in bps in tensorflow """
|
|
67
|
+
return 10000.0 * tf_rmse(y_true, y_ref)
|
|
68
|
+
|
|
69
|
+
# ################ Select generator ###############################################################
|
|
70
|
+
# Select generator. The number of expiries and surface size are irrelevant as here we do not
|
|
71
|
+
# generate sample data but read it from files. Number of MC and points per year are required
|
|
72
|
+
# to calculate the reference values against which we can validate the model.
|
|
73
|
+
generator = set_generator(MODEL_TYPE, num_mc=NUM_MC, points_per_year=POINTS_PER_YEAR)
|
|
74
|
+
|
|
75
|
+
# ################ Prepare datasets ###############################################################
|
|
76
|
+
# Datasets are always read, as even if we don't train, we're still going to evaluate the
|
|
77
|
+
# performance of the pre-trained model
|
|
78
|
+
print(">> Preparing datasets")
|
|
79
|
+
# Retrieve dataset
|
|
80
|
+
print("> Reading dataset from file: " + data_file)
|
|
81
|
+
x_set, y_set, data_df = generator.retrieve_datasets(data_file, shuffle=True)
|
|
82
|
+
input_dim = x_set.shape[1]
|
|
83
|
+
output_dim = y_set.shape[1]
|
|
84
|
+
print("> Input dimension: " + str(input_dim))
|
|
85
|
+
print("> Output dimension: " + str(output_dim))
|
|
86
|
+
print("> Dataset extract")
|
|
87
|
+
print(data_df.head())
|
|
88
|
+
# Split into training and test sets
|
|
89
|
+
TRS = TRAIN_PERCENT * 100
|
|
90
|
+
print(f"> Splitting between training set ({TRS:.2f}%) and test set ({100 - TRS:.2f}%)")
|
|
91
|
+
x_train, y_train, x_test, y_test = prepare_sets(x_set, y_set, TRAIN_PERCENT)
|
|
92
|
+
|
|
93
|
+
# ################ Compose/Load the model #########################################################
|
|
94
|
+
# Compose new model or load pre-trained one
|
|
95
|
+
if USE_TRAINED:
|
|
96
|
+
print(">> Loading pre-trained model")
|
|
97
|
+
model_folder_name = os.path.join(model_folder, MODEL_TYPE)
|
|
98
|
+
print("> Loading pre-trained model from: " + model_folder_name)
|
|
99
|
+
model = load_learning_model(model_folder_name)
|
|
100
|
+
keras_model = model.model
|
|
101
|
+
HIDDEN_LAYERS = NUM_NEURONS = DROP_OUT = None
|
|
102
|
+
topology = model.topology_
|
|
103
|
+
if topology is not None:
|
|
104
|
+
HIDDEN_LAYERS = topology['layers']
|
|
105
|
+
NUM_NEURONS = topology['neurons']
|
|
106
|
+
DROP_OUT = topology['dropout']
|
|
107
|
+
else:
|
|
108
|
+
print(">> Composing new model")
|
|
109
|
+
# Initialize the model
|
|
110
|
+
HIDDEN_LAYERS = ['softplus', 'softplus', 'softplus']
|
|
111
|
+
NUM_NEURONS = 64
|
|
112
|
+
DROP_OUT = 0.0
|
|
113
|
+
keras_model = compose_model(input_dim, output_dim, HIDDEN_LAYERS, NUM_NEURONS, DROP_OUT)
|
|
114
|
+
topology = { 'layers': HIDDEN_LAYERS, 'neurons': NUM_NEURONS, 'dropout': DROP_OUT}
|
|
115
|
+
|
|
116
|
+
model = LearningModel(keras_model)
|
|
117
|
+
model.topology_ = topology
|
|
118
|
+
|
|
119
|
+
# Display topology
|
|
120
|
+
print(f"> Hidden layer structure: {HIDDEN_LAYERS}")
|
|
121
|
+
print(f"> Number of neurons per layer: {NUM_NEURONS}")
|
|
122
|
+
print(f"> Drop-out rate: {DROP_OUT:.2f}")
|
|
123
|
+
|
|
124
|
+
# ################ Train the model ################################################################
|
|
125
|
+
if TRAIN:
|
|
126
|
+
# Learning rate scheduler
|
|
127
|
+
INIT_LR = 1.0e-2
|
|
128
|
+
FINAL_LR = 1.0e-4
|
|
129
|
+
DECAY = 0.97
|
|
130
|
+
STEPS = 250
|
|
131
|
+
lr_schedule = FlooredExponentialDecay(INIT_LR, FINAL_LR, DECAY, STEPS)
|
|
132
|
+
|
|
133
|
+
# Optimizer
|
|
134
|
+
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
|
|
135
|
+
model.optimizer_ = optimizer.get_config()
|
|
136
|
+
print("> Optimizer settings")
|
|
137
|
+
optim_fields = model.optimizer_
|
|
138
|
+
for field, value in optim_fields.items():
|
|
139
|
+
print("> ", field, ":", value)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# Compile
|
|
143
|
+
print("> Compile model")
|
|
144
|
+
keras_model.compile(loss=tf_bps_rmse, optimizer=optimizer)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# Callbacks
|
|
148
|
+
EPOCH_SAMPLING = 5
|
|
149
|
+
callback = RefCallback(x_test, y_test, bps_rmse, optimizer=optimizer,
|
|
150
|
+
epoch_sampling=EPOCH_SAMPLING)
|
|
151
|
+
# callback = None
|
|
152
|
+
# callback = SDevPyCallback(optimizer=optimizer, epoch_sampling=EPOCH_SAMPLING)
|
|
153
|
+
|
|
154
|
+
# Train the network
|
|
155
|
+
print(">> Training ANN model")
|
|
156
|
+
trn_timer = Stopwatch("Training")
|
|
157
|
+
trn_timer.trigger()
|
|
158
|
+
# shuffled_indices = np.random.permutation(x_train.shape[0])
|
|
159
|
+
# x_train = x_train[shuffled_indices]
|
|
160
|
+
# y_train = y_train[shuffled_indices]
|
|
161
|
+
model.train(x_train, y_train, EPOCHS, BATCH_SIZE, callback)
|
|
162
|
+
trn_timer.stop()
|
|
163
|
+
trn_timer.print()
|
|
164
|
+
|
|
165
|
+
# Save trained model to file
|
|
166
|
+
now = datetime.now()
|
|
167
|
+
dt_string = now.strftime("%Y%m%d-%H_%M_%S")
|
|
168
|
+
model_folder_name = os.path.join(model_folder, MODEL_TYPE + "_" + dt_string)
|
|
169
|
+
print("Saving model to: " + model_folder_name)
|
|
170
|
+
model.save(model_folder_name)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ################ Performance analysis ###########################################################
|
|
174
|
+
# Analyse results
|
|
175
|
+
print(">> Analyse results")
|
|
176
|
+
|
|
177
|
+
# Check performance
|
|
178
|
+
train_pred = model.predict(x_train)
|
|
179
|
+
train_rmse = bps_rmse(train_pred, y_train)
|
|
180
|
+
print(f"RMSE(nvol) on training set: {train_rmse:,.2f}")
|
|
181
|
+
|
|
182
|
+
test_pred = model.predict(x_test)
|
|
183
|
+
test_rmse = bps_rmse(test_pred, y_test)
|
|
184
|
+
print(f"RMSE(nvol) on test set: {test_rmse:,.2f}")
|
|
185
|
+
|
|
186
|
+
# Generate strike spread axis
|
|
187
|
+
if SHOW_VOL_CHARTS:
|
|
188
|
+
NUM_STRIKES = 100
|
|
189
|
+
PARAMS = { 'LnVol': 0.20, 'Beta': 0.5, 'Nu': 0.55, 'Rho': 0.25, 'Gamma': 0.7, 'Kappa': 1.0,
|
|
190
|
+
'Theta': 0.05, 'Xi': 0.50 }
|
|
191
|
+
FWD = 0.028
|
|
192
|
+
|
|
193
|
+
# Any number of expiries can be calculated, but for optimum display choose no more than 6
|
|
194
|
+
EXPIRIES = np.asarray([0.25, 0.50, 1.0, 5.00, 10.0, 30.0]).reshape(-1, 1)
|
|
195
|
+
NUM_EXPIRIES = EXPIRIES.shape[0]
|
|
196
|
+
METHOD = 'Percentiles'
|
|
197
|
+
PERCENTS = np.linspace(0.01, 0.99, num=NUM_STRIKES)
|
|
198
|
+
PERCENTS = np.asarray([PERCENTS] * NUM_EXPIRIES)
|
|
199
|
+
|
|
200
|
+
strikes = generator.convert_strikes(EXPIRIES, PERCENTS, FWD, PARAMS, METHOD)
|
|
201
|
+
ARE_CALLS = [[False] * NUM_STRIKES] * NUM_EXPIRIES # All puts
|
|
202
|
+
# ARE_CALLS = [[False if s < FWD else True for s in expks] for expks in strikes] # Puts/calls
|
|
203
|
+
# print(ARE_CALLS)
|
|
204
|
+
|
|
205
|
+
print("Calculating chart surface with reference model")
|
|
206
|
+
ref_prices = generator.price_surface_ref(EXPIRIES, strikes, ARE_CALLS, FWD, PARAMS)
|
|
207
|
+
# print(ref_prices.shape)
|
|
208
|
+
# clipboard.export2d(ref_prices)
|
|
209
|
+
print("Calculating chart surface with trained model")
|
|
210
|
+
mod_prices = generator.price_surface_mod(model, EXPIRIES, strikes, ARE_CALLS, FWD, PARAMS)
|
|
211
|
+
# print(mod_prices.shape)
|
|
212
|
+
# clipboard.export2d(mod_prices)
|
|
213
|
+
print(f"Ref-Mod RMSE(price): {bps_rmse(ref_prices, mod_prices):.2f}")
|
|
214
|
+
|
|
215
|
+
# Available tranforms: Price, ShiftedBlackScholes, Bachelier
|
|
216
|
+
TITLE = f"{MODEL_TYPE} smile sections, forward={FWD*100:.2f}"#,%\n parameters={PARAMS}"
|
|
217
|
+
TRANSFORM = "Bachelier"
|
|
218
|
+
# TRANSFORM = "Price"
|
|
219
|
+
#TRANSFORM = "ShiftedBlackScholes"
|
|
220
|
+
xplt.plot_transform_surface(EXPIRIES, strikes, ARE_CALLS, FWD, ref_prices, mod_prices,
|
|
221
|
+
TITLE, transform=TRANSFORM)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# Show training history
|
|
225
|
+
if TRAIN:
|
|
226
|
+
hist_epochs = callback.epochs
|
|
227
|
+
hist_losses = callback.losses
|
|
228
|
+
hist_lr = callback.learning_rates
|
|
229
|
+
sampled_epochs = callback.sampled_epochs
|
|
230
|
+
test_losses = callback.test_losses
|
|
231
|
+
|
|
232
|
+
plt.figure(figsize=(14, 7))
|
|
233
|
+
plt.subplots_adjust(hspace=0.40)
|
|
234
|
+
|
|
235
|
+
plt.subplot(1, 2, 1)
|
|
236
|
+
plt.xlabel('Epoch')
|
|
237
|
+
plt.ylabel('Loss')
|
|
238
|
+
plt.yscale("log")
|
|
239
|
+
plt.plot(hist_epochs, hist_losses, label='Loss on training set')
|
|
240
|
+
plt.plot(sampled_epochs, test_losses, color='red', label='Loss on test set')
|
|
241
|
+
plt.legend(loc='upper right')
|
|
242
|
+
plt.subplot(1, 2, 2)
|
|
243
|
+
plt.xlabel('Epoch')
|
|
244
|
+
plt.ylabel('Learning rate')
|
|
245
|
+
plt.plot(hist_epochs, hist_lr)
|
|
246
|
+
|
|
247
|
+
plt.show()
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
""" Fit ANN to Stochastic Local Vol models. We implement the direct map here. Datasets of parameters
|
|
2
|
+
(inputs) vs prices/implied vols (outputs) are generated (or read from tsv) to train a network that
|
|
3
|
+
learns the so-called 'direct' calculation, i.e. prices from parameter. """
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
import numpy as np
|
|
7
|
+
import tensorflow as tf
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
9
|
+
from volsurfacegen.sabrgenerator import SabrGenerator, ShiftedSabrGenerator
|
|
10
|
+
from volsurfacegen.mcsabrgenerator import McShiftedSabrGenerator
|
|
11
|
+
from volsurfacegen.fbsabrgenerator import FbSabrGenerator
|
|
12
|
+
from volsurfacegen.mczabrgenerator import McShiftedZabrGenerator
|
|
13
|
+
from volsurfacegen.mchestongenerator import McShiftedHestonGenerator
|
|
14
|
+
import settings
|
|
15
|
+
from machinelearning.topology import compose_model
|
|
16
|
+
from machinelearning.learningmodel import LearningModel, load_learning_model
|
|
17
|
+
from machinelearning.learningschedules import FlooredExponentialDecay
|
|
18
|
+
from machinelearning.callbacks import RefCallback
|
|
19
|
+
from machinelearning.datasets import prepare_sets
|
|
20
|
+
from tools.filemanager import check_directory
|
|
21
|
+
from tools.timer import Stopwatch
|
|
22
|
+
from maths.metrics import rmse, tf_rmse
|
|
23
|
+
from projects.xsabr import xsabrplot as xplt
|
|
24
|
+
|
|
25
|
+
# Re-training from saved model
|
|
26
|
+
# Implement new class over LearningModel that gives prices directly, having stored
|
|
27
|
+
# the model. Implement inversions to shifted BS and Bachelier as well.
|
|
28
|
+
# Possibility to test only without training
|
|
29
|
+
# Utility to merge sample data files into 1
|
|
30
|
+
# Finalize and fine-train models on extended parameter range
|
|
31
|
+
# Store data in Kaggle
|
|
32
|
+
|
|
33
|
+
# ################ Runtime configuration ##########################################################
|
|
34
|
+
MODEL_TYPE = "SABR"
|
|
35
|
+
# MODEL_TYPE = "ShiftedSABR"
|
|
36
|
+
# MODEL_TYPE = "McShiftedSABR"
|
|
37
|
+
# MODEL_TYPE = "FbSABR"
|
|
38
|
+
# MODEL_TYPE = "McShiftedZABR"
|
|
39
|
+
# MODEL_TYPE = "McShiftedHeston"
|
|
40
|
+
GENERATE_SAMPLES = False # If false, read dataset from file
|
|
41
|
+
NUM_SAMPLES = 100 * 1000 # Relevant if GENERATE_SAMPLES is True
|
|
42
|
+
TRAIN_PERCENT = 0.90 # Proportion of dataset used for training (rest used for test)
|
|
43
|
+
TRAIN = True # Train the model (if False, read from file)
|
|
44
|
+
EPOCHS = 100 # Relevant if TRAIN is True
|
|
45
|
+
BATCH_SIZE = 1000 # Relevant if TRAIN is True
|
|
46
|
+
SHOW_VOL_CHARTS = True # Show strike ladder charts
|
|
47
|
+
SAVE_MODEL = True # Save model to files
|
|
48
|
+
|
|
49
|
+
print(">> Set up runtime configuration")
|
|
50
|
+
project_folder = os.path.join(settings.WORKFOLDER, "xsabr")
|
|
51
|
+
print("> Project folder: " + project_folder)
|
|
52
|
+
data_folder = os.path.join(project_folder, "samples")
|
|
53
|
+
print("> Data folder: " + data_folder)
|
|
54
|
+
check_directory(data_folder)
|
|
55
|
+
print("> Chosen model: " + MODEL_TYPE)
|
|
56
|
+
data_file = os.path.join(data_folder, MODEL_TYPE + "_samples.tsv")
|
|
57
|
+
model_folder = os.path.join(project_folder, "models")
|
|
58
|
+
print("> Model folder: " + model_folder)
|
|
59
|
+
|
|
60
|
+
# ################ Helper functions ###############################################################
|
|
61
|
+
def bps_rmse(y_true, y_ref):
|
|
62
|
+
""" RMSE in bps """
|
|
63
|
+
return 10000.0 * rmse(y_true, y_ref)
|
|
64
|
+
|
|
65
|
+
def tf_bps_rmse(y_true, y_ref):
|
|
66
|
+
""" RMSE in bps in tensorflow """
|
|
67
|
+
return 10000.0 * tf_rmse(y_true, y_ref)
|
|
68
|
+
|
|
69
|
+
# Generator factory
|
|
70
|
+
if MODEL_TYPE == "SABR":
|
|
71
|
+
generator = SabrGenerator()
|
|
72
|
+
elif MODEL_TYPE == "ShiftedSABR":
|
|
73
|
+
generator = ShiftedSabrGenerator()
|
|
74
|
+
elif MODEL_TYPE == "McShiftedSABR":
|
|
75
|
+
NUM_EXPIRIES = 10
|
|
76
|
+
SURFACE_SIZE = 50
|
|
77
|
+
NUM_STRIKES = int(SURFACE_SIZE / NUM_EXPIRIES)
|
|
78
|
+
NUM_MC = 50 * 1000 # 100 * 1000
|
|
79
|
+
POINTS_PER_YEAR = 20 # 25
|
|
80
|
+
generator = McShiftedSabrGenerator(NUM_EXPIRIES, NUM_STRIKES, NUM_MC, POINTS_PER_YEAR)
|
|
81
|
+
elif MODEL_TYPE == "FbSABR":
|
|
82
|
+
NUM_EXPIRIES = 10
|
|
83
|
+
SURFACE_SIZE = 50
|
|
84
|
+
NUM_STRIKES = int(SURFACE_SIZE / NUM_EXPIRIES)
|
|
85
|
+
NUM_MC = 50 * 1000 # 100 * 1000
|
|
86
|
+
POINTS_PER_YEAR = 20 # 25
|
|
87
|
+
generator = FbSabrGenerator(NUM_EXPIRIES, NUM_STRIKES, NUM_MC, POINTS_PER_YEAR)
|
|
88
|
+
elif MODEL_TYPE == "McShiftedZABR":
|
|
89
|
+
NUM_EXPIRIES = 10
|
|
90
|
+
SURFACE_SIZE = 50
|
|
91
|
+
NUM_STRIKES = int(SURFACE_SIZE / NUM_EXPIRIES)
|
|
92
|
+
NUM_MC = 50 * 1000 # 100 * 1000
|
|
93
|
+
POINTS_PER_YEAR = 20 # 25
|
|
94
|
+
generator = McShiftedZabrGenerator(NUM_EXPIRIES, NUM_STRIKES, NUM_MC, POINTS_PER_YEAR)
|
|
95
|
+
elif MODEL_TYPE == "McShiftedHeston":
|
|
96
|
+
NUM_EXPIRIES = 10
|
|
97
|
+
SURFACE_SIZE = 50
|
|
98
|
+
NUM_STRIKES = int(SURFACE_SIZE / NUM_EXPIRIES)
|
|
99
|
+
NUM_MC = 50 * 1000 # 100 * 1000
|
|
100
|
+
POINTS_PER_YEAR = 20 # 25
|
|
101
|
+
generator = McShiftedHestonGenerator(NUM_EXPIRIES, NUM_STRIKES, NUM_MC, POINTS_PER_YEAR)
|
|
102
|
+
else:
|
|
103
|
+
raise ValueError("Unknown model: " + MODEL_TYPE)
|
|
104
|
+
|
|
105
|
+
# ################ Training algorithm #############################################################
|
|
106
|
+
# Generate dataset by prices and convert to normal vols
|
|
107
|
+
if GENERATE_SAMPLES:
|
|
108
|
+
print(f">> Generating {NUM_SAMPLES:,} samples")
|
|
109
|
+
data_df = generator.generate_samples(NUM_SAMPLES)
|
|
110
|
+
print("> Convert to normal vol and cleanse data")
|
|
111
|
+
data_df = generator.to_nvol(data_df, cleanse=True)
|
|
112
|
+
print("> Output to file: " + data_file)
|
|
113
|
+
generator.to_file(data_df, data_file)
|
|
114
|
+
|
|
115
|
+
# Retrieve dataset
|
|
116
|
+
print(">> Reading dataset from file: " + data_file)
|
|
117
|
+
x_set, y_set, data_df = generator.retrieve_datasets(data_file, shuffle=True)
|
|
118
|
+
input_dim = x_set.shape[1]
|
|
119
|
+
output_dim = y_set.shape[1]
|
|
120
|
+
print("> Input dimension: " + str(input_dim))
|
|
121
|
+
print("> Output dimension: " + str(output_dim))
|
|
122
|
+
print("> Dataset extract")
|
|
123
|
+
print(data_df.head())
|
|
124
|
+
TRS = TRAIN_PERCENT * 100
|
|
125
|
+
print(f"> Splitting between training set ({TRS:.2f}%) and test set ({100 - TRS:.2f}%)")
|
|
126
|
+
x_train, y_train, x_test, y_test = prepare_sets(x_set, y_set, TRAIN_PERCENT)
|
|
127
|
+
|
|
128
|
+
# Retrieve dataset, compose and train the model on the normal vols
|
|
129
|
+
if TRAIN:
|
|
130
|
+
# Initialize the model
|
|
131
|
+
print(">> Compose ANN model")
|
|
132
|
+
hidden_layers = ['softplus', 'softplus', 'softplus']
|
|
133
|
+
NUM_NEURONS = 16
|
|
134
|
+
DROP_OUT = 0.00
|
|
135
|
+
keras_model = compose_model(input_dim, output_dim, hidden_layers, NUM_NEURONS, DROP_OUT)
|
|
136
|
+
print(f"> Hidden layer structure: {hidden_layers}")
|
|
137
|
+
print(f"> Number of neurons per layer: {NUM_NEURONS}")
|
|
138
|
+
print(f"> Drop-out rate: {DROP_OUT:.2f}")
|
|
139
|
+
|
|
140
|
+
# Learning rate scheduler
|
|
141
|
+
INIT_LR = 1e-1
|
|
142
|
+
FINAL_LR = 1e-4
|
|
143
|
+
DECAY = 0.97
|
|
144
|
+
STEPS = 100
|
|
145
|
+
lr_schedule = FlooredExponentialDecay(INIT_LR, FINAL_LR, DECAY, STEPS)
|
|
146
|
+
|
|
147
|
+
# Optimizer
|
|
148
|
+
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
|
|
149
|
+
print("> Optimizer settings")
|
|
150
|
+
optim_fields = optimizer.get_config()
|
|
151
|
+
for field, value in optim_fields.items():
|
|
152
|
+
print(field, ":", value)
|
|
153
|
+
|
|
154
|
+
# Compile
|
|
155
|
+
print("> Compile model")
|
|
156
|
+
keras_model.compile(loss=tf_bps_rmse, optimizer=optimizer)
|
|
157
|
+
# keras_model.compile(loss='mse', optimizer=optimizer)
|
|
158
|
+
model = LearningModel(keras_model)
|
|
159
|
+
|
|
160
|
+
# Callbacks
|
|
161
|
+
EPOCH_SAMPLING = 5
|
|
162
|
+
callback = RefCallback(x_test, y_test, bps_rmse, optimizer=optimizer,
|
|
163
|
+
epoch_sampling=EPOCH_SAMPLING)
|
|
164
|
+
# callback = SDevPyCallback(optimizer=optimizer, epoch_sampling=EPOCH_SAMPLING)
|
|
165
|
+
|
|
166
|
+
# Train the network
|
|
167
|
+
print(">> Training ANN model")
|
|
168
|
+
trn_timer = Stopwatch("Training")
|
|
169
|
+
trn_timer.trigger()
|
|
170
|
+
# shuffled_indices = np.random.permutation(x_train.shape[0])
|
|
171
|
+
# x_train = x_train[shuffled_indices]
|
|
172
|
+
# y_train = y_train[shuffled_indices]
|
|
173
|
+
model.train(x_train, y_train, EPOCHS, BATCH_SIZE, callback)
|
|
174
|
+
trn_timer.stop()
|
|
175
|
+
trn_timer.print()
|
|
176
|
+
|
|
177
|
+
# Save trained model to file
|
|
178
|
+
if SAVE_MODEL:
|
|
179
|
+
now = datetime.now()
|
|
180
|
+
dt_string = now.strftime("%Y%m%d-%H_%M_%S")
|
|
181
|
+
model_folder_name = os.path.join(model_folder, MODEL_TYPE + "_" + dt_string)
|
|
182
|
+
print("Saving model to: " + model_folder_name)
|
|
183
|
+
model.save(model_folder_name)
|
|
184
|
+
|
|
185
|
+
else: # Not training, so loading the model from file
|
|
186
|
+
model_folder_name = os.path.join(model_folder, MODEL_TYPE)
|
|
187
|
+
print("Loading pre-trained model from: " + model_folder_name)
|
|
188
|
+
model = load_learning_model(model_folder_name)
|
|
189
|
+
|
|
190
|
+
# ################ Performance analysis ###########################################################
|
|
191
|
+
# Analyse results
|
|
192
|
+
print(">> Analyse results")
|
|
193
|
+
|
|
194
|
+
# Check performance
|
|
195
|
+
train_pred = model.predict(x_train)
|
|
196
|
+
train_rmse = bps_rmse(train_pred, y_train)
|
|
197
|
+
print(f"RMSE on training set: {train_rmse:,.2f}")
|
|
198
|
+
|
|
199
|
+
test_pred = model.predict(x_test)
|
|
200
|
+
test_rmse = bps_rmse(test_pred, y_test)
|
|
201
|
+
print(f"RMSE on test set: {test_rmse:,.2f}")
|
|
202
|
+
|
|
203
|
+
# Generate strike spread axis
|
|
204
|
+
if SHOW_VOL_CHARTS:
|
|
205
|
+
NUM_STRIKES = 100
|
|
206
|
+
PARAMS = { 'LnVol': 0.20, 'Beta': 0.5, 'Nu': 0.55, 'Rho': -0.25, 'Gamma': 0.7, 'Kappa': 1.0,
|
|
207
|
+
'Theta': 0.05, 'Xi': 0.50 }
|
|
208
|
+
FWD = 0.028
|
|
209
|
+
|
|
210
|
+
# Any number of expiries can be calculated, but for optimum display choose no more than 6
|
|
211
|
+
EXPIRIES = np.asarray([0.25, 0.50, 0.75, 1.00, 2.00, 5.00]).reshape(-1, 1)
|
|
212
|
+
NUM_EXPIRIES = EXPIRIES.shape[0]
|
|
213
|
+
METHOD = 'Percentiles'
|
|
214
|
+
PERCENTS = np.linspace(0.01, 0.99, num=NUM_STRIKES)
|
|
215
|
+
PERCENTS = np.asarray([PERCENTS] * NUM_EXPIRIES)
|
|
216
|
+
|
|
217
|
+
strikes = generator.convert_strikes(EXPIRIES, PERCENTS, FWD, PARAMS, METHOD)
|
|
218
|
+
IS_CALL = False
|
|
219
|
+
ARE_CALLS = [[IS_CALL] * NUM_STRIKES] * NUM_EXPIRIES
|
|
220
|
+
print("Calculating chart surface with reference model")
|
|
221
|
+
ref_prices = generator.price_surface_ref(EXPIRIES, strikes, ARE_CALLS, FWD, PARAMS)
|
|
222
|
+
print("Calculating chart surface with trained model")
|
|
223
|
+
mod_prices = generator.price_surface_mod(model, EXPIRIES, strikes, IS_CALL, FWD, PARAMS)
|
|
224
|
+
print(f"Ref-Mod RMSE: {bps_rmse(ref_prices, mod_prices):.2f}")
|
|
225
|
+
|
|
226
|
+
# Available tranforms: Price, ShiftedBlackScholes, Bachelier
|
|
227
|
+
TITLE = "Smile"
|
|
228
|
+
xplt.plot_transform_surface(EXPIRIES, strikes, generator.is_call, FWD, ref_prices, mod_prices,
|
|
229
|
+
TITLE, transform="ShiftedBlackScholes")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# Show training history
|
|
233
|
+
if TRAIN:
|
|
234
|
+
hist_epochs = callback.epochs
|
|
235
|
+
hist_losses = callback.losses
|
|
236
|
+
hist_lr = callback.learning_rates
|
|
237
|
+
sampled_epochs = callback.sampled_epochs
|
|
238
|
+
test_losses = callback.test_losses
|
|
239
|
+
|
|
240
|
+
plt.figure(figsize=(14, 7))
|
|
241
|
+
plt.subplots_adjust(hspace=0.40)
|
|
242
|
+
|
|
243
|
+
plt.subplot(1, 2, 1)
|
|
244
|
+
plt.xlabel('Epoch')
|
|
245
|
+
plt.ylabel('Loss')
|
|
246
|
+
plt.yscale("log")
|
|
247
|
+
plt.plot(hist_epochs, hist_losses, label='Loss on training set')
|
|
248
|
+
plt.plot(sampled_epochs, test_losses, color='red', label='Loss on test set')
|
|
249
|
+
plt.legend(loc='upper right')
|
|
250
|
+
plt.subplot(1, 2, 2)
|
|
251
|
+
plt.xlabel('Epoch')
|
|
252
|
+
plt.ylabel('Learning rate')
|
|
253
|
+
plt.plot(hist_epochs, hist_lr)
|
|
254
|
+
|
|
255
|
+
plt.show()
|
sdevpy/settings.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
""" Global runtime settings such as workfolder path, warning configurations, etc. """
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
# Global variables
|
|
5
|
+
WORKFOLDER = r"C:\temp\sdevpy"
|
|
6
|
+
|
|
7
|
+
# Disable debug warnings in tensorflow
|
|
8
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Doing nothing for now, just to avoid warning of dummy import
|
|
12
|
+
def apply_settings():
|
|
13
|
+
""" Dummy method to apply settings when necessary """
|
|
14
|
+
return 0
|