bbstrader 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbstrader might be problematic. Click here for more details.
- bbstrader/btengine/__init__.py +1 -0
- bbstrader/btengine/backtest.py +11 -9
- bbstrader/btengine/performance.py +9 -30
- bbstrader/btengine/portfolio.py +18 -10
- bbstrader/metatrader/account.py +20 -6
- bbstrader/metatrader/rates.py +6 -9
- bbstrader/metatrader/risk.py +2 -3
- bbstrader/metatrader/trade.py +306 -219
- bbstrader/metatrader/utils.py +37 -29
- bbstrader/trading/execution.py +139 -92
- bbstrader/trading/strategies.py +13 -9
- bbstrader/tseries.py +500 -494
- {bbstrader-0.1.7.dist-info → bbstrader-0.1.9.dist-info}/METADATA +2 -1
- bbstrader-0.1.9.dist-info/RECORD +26 -0
- {bbstrader-0.1.7.dist-info → bbstrader-0.1.9.dist-info}/WHEEL +1 -1
- bbstrader-0.1.7.dist-info/RECORD +0 -26
- {bbstrader-0.1.7.dist-info → bbstrader-0.1.9.dist-info}/LICENSE +0 -0
- {bbstrader-0.1.7.dist-info → bbstrader-0.1.9.dist-info}/top_level.txt +0 -0
bbstrader/tseries.py
CHANGED
|
@@ -8,6 +8,8 @@ market analysis, and financial data exploration.
|
|
|
8
8
|
"""
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
|
+
import pprint
|
|
12
|
+
import warnings
|
|
11
13
|
import yfinance as yf
|
|
12
14
|
from arch import arch_model
|
|
13
15
|
from statsmodels.tsa.arima.model import ARIMA
|
|
@@ -25,13 +27,12 @@ from statsmodels.graphics.tsaplots import plot_acf
|
|
|
25
27
|
from itertools import combinations
|
|
26
28
|
from typing import Union, List, Tuple
|
|
27
29
|
from statsmodels.stats.diagnostic import acorr_ljungbox
|
|
28
|
-
import
|
|
29
|
-
import
|
|
30
|
+
from arch.utility.exceptions import ConvergenceWarning as ArchWarning
|
|
31
|
+
from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
|
|
30
32
|
warnings.filterwarnings("ignore")
|
|
33
|
+
warnings.filterwarnings("ignore", category=StatsWarning, module='statsmodels')
|
|
34
|
+
warnings.filterwarnings("ignore", category=ArchWarning, module='arch')
|
|
31
35
|
|
|
32
|
-
# *******************************************
|
|
33
|
-
# ARIMA AND GARCH MODELS *
|
|
34
|
-
# *******************************************
|
|
35
36
|
|
|
36
37
|
__all__ = [
|
|
37
38
|
"load_and_prepare_data",
|
|
@@ -49,6 +50,10 @@ __all__ = [
|
|
|
49
50
|
"OrnsteinUhlenbeckModel"
|
|
50
51
|
]
|
|
51
52
|
|
|
53
|
+
# *******************************************
|
|
54
|
+
# ARIMA AND GARCH MODELS *
|
|
55
|
+
# *******************************************
|
|
56
|
+
|
|
52
57
|
|
|
53
58
|
def load_and_prepare_data(df: pd.DataFrame):
|
|
54
59
|
"""
|
|
@@ -99,7 +104,7 @@ def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
|
|
|
99
104
|
"""
|
|
100
105
|
if isinstance(window_data, pd.Series):
|
|
101
106
|
window_data = window_data.values
|
|
102
|
-
|
|
107
|
+
|
|
103
108
|
window_data = window_data[~(np.isnan(window_data) | np.isinf(window_data))]
|
|
104
109
|
# Fit ARIMA model with best parameters
|
|
105
110
|
model = pm.auto_arima(
|
|
@@ -112,10 +117,6 @@ def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
|
|
|
112
117
|
stepwise=True
|
|
113
118
|
)
|
|
114
119
|
final_order = model.order
|
|
115
|
-
from arch.utility.exceptions import ConvergenceWarning as ArchConvergenceWarning
|
|
116
|
-
from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsConvergenceWarning
|
|
117
|
-
warnings.filterwarnings("ignore", category=StatsConvergenceWarning)
|
|
118
|
-
warnings.filterwarnings("ignore", category=ArchConvergenceWarning)
|
|
119
120
|
try:
|
|
120
121
|
best_arima_model = ARIMA(
|
|
121
122
|
window_data + 1e-5, order=final_order, missing='drop').fit()
|
|
@@ -183,9 +184,10 @@ def predict_next_return(arima_result, garch_result):
|
|
|
183
184
|
if not isinstance(arima_pred, np.ndarray):
|
|
184
185
|
pred = arima_pred.values[0]
|
|
185
186
|
else:
|
|
186
|
-
pred =
|
|
187
|
+
pred = arima_pred[0]
|
|
187
188
|
return pred + next_volatility
|
|
188
189
|
|
|
190
|
+
|
|
189
191
|
def get_prediction(window_data: Union[pd.Series, np.ndarray]):
|
|
190
192
|
"""
|
|
191
193
|
Orchestrator function to get the next period's return prediction.
|
|
@@ -206,156 +208,420 @@ def get_prediction(window_data: Union[pd.Series, np.ndarray]):
|
|
|
206
208
|
return prediction
|
|
207
209
|
|
|
208
210
|
|
|
209
|
-
|
|
210
|
-
# STATS TEST (Cointegration , Mean Reverting)*
|
|
211
|
-
# *********************************************
|
|
212
|
-
def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
|
|
211
|
+
class ArimaGarchModel():
|
|
213
212
|
"""
|
|
214
|
-
|
|
215
|
-
|
|
213
|
+
This class implements a time serie model
|
|
214
|
+
that combines `ARIMA (AutoRegressive Integrated Moving Average)`
|
|
215
|
+
and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
|
|
216
|
+
to predict future returns based on historical price data.
|
|
216
217
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
The model is implemented in the following steps:
|
|
219
|
+
1. Data Preparation: Load and prepare the historical price data.
|
|
220
|
+
2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
|
|
221
|
+
3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
|
|
222
|
+
4. Trading Strategy: Execute the trading strategy based on the predictions.
|
|
223
|
+
5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
|
|
222
224
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
# Download historical data
|
|
228
|
-
data = yf.download(tickers, start=start, end=end)['Adj Close']
|
|
225
|
+
Exemple:
|
|
226
|
+
>>> import yfinance as yf
|
|
227
|
+
>>> from bbstrader.strategies import ArimaGarchModel
|
|
228
|
+
>>> from bbstrader.tseries import load_and_prepare_data
|
|
229
229
|
|
|
230
|
-
|
|
231
|
-
|
|
230
|
+
>>> if __name__ == '__main__':
|
|
231
|
+
>>> # ARCH SPY Vectorize Backtest
|
|
232
|
+
>>> k = 252
|
|
233
|
+
>>> data = yf.download("SPY", start="2004-01-02", end="2015-12-31")
|
|
234
|
+
>>> arch = ArimaGarchModel("SPY", data, k=k)
|
|
235
|
+
>>> df = load_and_prepare_data(data)
|
|
236
|
+
>>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
|
|
237
|
+
>>> arch.backtest_strategy()
|
|
238
|
+
"""
|
|
232
239
|
|
|
233
|
-
|
|
234
|
-
|
|
240
|
+
def __init__(self, symbol, data, k: int = 252):
|
|
241
|
+
"""
|
|
242
|
+
Initializes the ArimaGarchStrategy class.
|
|
235
243
|
|
|
244
|
+
Args:
|
|
245
|
+
symbol (str): The ticker symbol for the financial instrument.
|
|
246
|
+
data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
247
|
+
k (int): The window size for rolling prediction in backtesting.
|
|
248
|
+
"""
|
|
249
|
+
self.symbol = symbol
|
|
250
|
+
self.data = self.load_and_prepare_data(data)
|
|
251
|
+
self.k = k
|
|
236
252
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
253
|
+
# Step 1: Data Preparation
|
|
254
|
+
def load_and_prepare_data(self, df):
|
|
255
|
+
"""
|
|
256
|
+
Prepares the dataset by calculating logarithmic returns
|
|
257
|
+
and differencing if necessary.
|
|
241
258
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
The DataFrame containing prices for each series
|
|
245
|
-
ts1 (str): The first time series column name
|
|
246
|
-
ts2 (str): The second time series column name
|
|
247
|
-
"""
|
|
248
|
-
fig, ax = plt.subplots()
|
|
249
|
-
ax.plot(df.index, df[ts1], label=ts1)
|
|
250
|
-
ax.plot(df.index, df[ts2], label=ts2)
|
|
259
|
+
Args:
|
|
260
|
+
df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
251
261
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
plt.show()
|
|
262
|
+
Returns:
|
|
263
|
+
pd.DataFrame: The dataset with additional columns
|
|
264
|
+
for log returns and differenced log returns.
|
|
265
|
+
"""
|
|
266
|
+
return load_and_prepare_data(df)
|
|
258
267
|
|
|
268
|
+
# Step 2: Modeling (ARIMA + GARCH)
|
|
269
|
+
def fit_best_arima(self, window_data):
|
|
270
|
+
"""
|
|
271
|
+
Fits the ARIMA model to the provided window of data,
|
|
272
|
+
selecting the best model based on AIC.
|
|
259
273
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
Plot a scatter plot of both time series for
|
|
263
|
-
via the provided DataFrame.
|
|
274
|
+
Args:
|
|
275
|
+
window_data (np.array): The dataset for a specific window period.
|
|
264
276
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
ts2 (str): The second time series column name
|
|
270
|
-
"""
|
|
271
|
-
plt.xlabel(f'{ts1} Price ($)')
|
|
272
|
-
plt.ylabel(f'{ts2} Price ($)')
|
|
273
|
-
plt.title(f'{ts1} and {ts2} Price Scatterplot')
|
|
274
|
-
plt.scatter(df[ts1], df[ts2])
|
|
277
|
+
Returns:
|
|
278
|
+
ARIMA model: The best fitted ARIMA model based on AIC.
|
|
279
|
+
"""
|
|
280
|
+
return fit_best_arima(window_data)
|
|
275
281
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
label='Regression Line'
|
|
280
|
-
)
|
|
281
|
-
plt.legend()
|
|
282
|
-
plt.show()
|
|
282
|
+
def fit_garch(self, window_data):
|
|
283
|
+
"""
|
|
284
|
+
Fits the GARCH model to the residuals of the best ARIMA model.
|
|
283
285
|
|
|
286
|
+
Args:
|
|
287
|
+
window_data (np.array): The dataset for a specific window period.
|
|
284
288
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
+
Returns:
|
|
290
|
+
tuple: Contains the ARIMA result and GARCH result.
|
|
291
|
+
"""
|
|
292
|
+
return fit_garch(window_data)
|
|
289
293
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
fig, ax = plt.subplots()
|
|
295
|
-
ax.plot(df.index, df["res"], label="Residuals")
|
|
294
|
+
def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
|
|
295
|
+
"""
|
|
296
|
+
Displays the ARIMA and GARCH model results, including plotting
|
|
297
|
+
ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
|
|
296
298
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
plt.title('Residual Plot')
|
|
301
|
-
plt.legend()
|
|
302
|
-
plt.show()
|
|
299
|
+
Args:
|
|
300
|
+
window_data (np.array): The dataset for a specific window period.
|
|
301
|
+
acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
|
|
303
302
|
|
|
303
|
+
test_resid (bool, optional):
|
|
304
|
+
If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
|
|
305
|
+
"""
|
|
306
|
+
arima_result = self.fit_best_arima(window_data)
|
|
307
|
+
resid = np.asarray(arima_result.resid)
|
|
308
|
+
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
309
|
+
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
310
|
+
garch_result = garch_model.fit(disp='off')
|
|
311
|
+
residuals = garch_result.resid
|
|
304
312
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
313
|
+
# TODO : Plot the ACF of the residuals
|
|
314
|
+
if acf:
|
|
315
|
+
fig = plt.figure(figsize=(12, 8))
|
|
316
|
+
# Plot the ACF of ARIMA residuals
|
|
317
|
+
ax1 = fig.add_subplot(211, ylabel='ACF')
|
|
318
|
+
plot_acf(resid, alpha=0.05, ax=ax1, title='ACF of ARIMA Residuals')
|
|
319
|
+
ax1.set_xlabel('Lags')
|
|
320
|
+
ax1.grid(True)
|
|
309
321
|
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
322
|
+
# Plot the ACF of GARCH residuals on the same axes
|
|
323
|
+
ax2 = fig.add_subplot(212, ylabel='ACF')
|
|
324
|
+
plot_acf(residuals, alpha=0.05, ax=ax2,
|
|
325
|
+
title='ACF of GARCH Residuals')
|
|
326
|
+
ax2.set_xlabel('Lags')
|
|
327
|
+
ax2.grid(True)
|
|
313
328
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
318
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
329
|
+
# Plot the figure
|
|
330
|
+
plt.tight_layout()
|
|
331
|
+
plt.show()
|
|
319
332
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
333
|
+
# TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
|
|
334
|
+
if test_resid:
|
|
335
|
+
print(arima_result.summary())
|
|
336
|
+
print(garch_result.summary())
|
|
337
|
+
bp_test = acorr_ljungbox(resid, return_df=True)
|
|
338
|
+
print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
|
|
326
339
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
340
|
+
# Step 3: Prediction
|
|
341
|
+
def predict_next_return(self, arima_result, garch_result):
|
|
342
|
+
"""
|
|
343
|
+
Predicts the next return using the ARIMA model
|
|
344
|
+
and the next volatility using the GARCH model.
|
|
330
345
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
>>> Dep. Variable: MSFT R-squared: 0.900
|
|
335
|
-
>>> Model: OLS Adj. R-squared: 0.900
|
|
336
|
-
>>> Method: Least Squares F-statistic: 2244.
|
|
337
|
-
>>> Date: Sat, 20 Jul 2024 Prob (F-statistic): 2.95e-126
|
|
338
|
-
>>> Time: 13:36:58 Log-Likelihood: -996.45
|
|
339
|
-
>>> No. Observations: 250 AIC: 1997.
|
|
340
|
-
>>> Df Residuals: 248 BIC: 2004.
|
|
341
|
-
>>> Df Model: 1
|
|
342
|
-
>>> Covariance Type: nonrobust
|
|
343
|
-
>>> ==============================================================================
|
|
344
|
-
>>> coef std err t P>|t| [0.025 0.975]
|
|
345
|
-
>>> ------------------------------------------------------------------------------
|
|
346
|
-
>>> const -74.4180 8.191 -9.085 0.000 -90.551 -58.286
|
|
347
|
-
>>> AAPL 2.2486 0.047 47.369 0.000 2.155 2.342
|
|
348
|
-
>>> ==============================================================================
|
|
349
|
-
>>> Omnibus: 4.923 Durbin-Watson: 0.121
|
|
350
|
-
>>> Prob(Omnibus): 0.085 Jarque-Bera (JB): 4.862
|
|
351
|
-
>>> Skew: 0.342 Prob(JB): 0.0879
|
|
352
|
-
>>> Kurtosis: 2.993 Cond. No. 1.71e+03
|
|
353
|
-
>>> ==============================================================================
|
|
346
|
+
Args:
|
|
347
|
+
arima_result (ARIMA model): The ARIMA model result.
|
|
348
|
+
garch_result (GARCH model): The GARCH model result.
|
|
354
349
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
350
|
+
Returns:
|
|
351
|
+
float: The predicted next return.
|
|
352
|
+
"""
|
|
353
|
+
return predict_next_return(arima_result, garch_result)
|
|
354
|
+
|
|
355
|
+
def get_prediction(self, window_data):
|
|
356
|
+
"""
|
|
357
|
+
Generates a prediction for the next return based on a window of data.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
window_data (np.array): The dataset for a specific window period.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
float: The predicted next return.
|
|
364
|
+
"""
|
|
365
|
+
return get_prediction(window_data)
|
|
366
|
+
|
|
367
|
+
def calculate_signals(self, window_data):
|
|
368
|
+
"""
|
|
369
|
+
Calculates the trading signal based on the prediction.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
window_data (np.array): The dataset for a specific window period.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
str: The trading signal ('LONG', 'SHORT', or None).
|
|
376
|
+
"""
|
|
377
|
+
prediction = self.get_prediction(window_data)
|
|
378
|
+
if prediction > 0:
|
|
379
|
+
signal = "LONG"
|
|
380
|
+
elif prediction < 0:
|
|
381
|
+
signal = "SHORT"
|
|
382
|
+
else:
|
|
383
|
+
signal = None
|
|
384
|
+
return signal
|
|
385
|
+
|
|
386
|
+
# Step 4: Trading Strategy
|
|
387
|
+
|
|
388
|
+
def execute_trading_strategy(self, predictions):
|
|
389
|
+
"""
|
|
390
|
+
Executes the trading strategy based on a list
|
|
391
|
+
of predictions, determining positions to take.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
predictions (list): A list of predicted returns.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
|
|
398
|
+
"""
|
|
399
|
+
positions = [] # Long if 1, Short if -1
|
|
400
|
+
previous_position = 0 # Initial position
|
|
401
|
+
for prediction in predictions:
|
|
402
|
+
if prediction > 0:
|
|
403
|
+
current_position = 1 # Long
|
|
404
|
+
elif prediction < 0:
|
|
405
|
+
current_position = -1 # Short
|
|
406
|
+
else:
|
|
407
|
+
current_position = previous_position # Hold previous position
|
|
408
|
+
positions.append(current_position)
|
|
409
|
+
previous_position = current_position
|
|
410
|
+
|
|
411
|
+
return positions
|
|
412
|
+
|
|
413
|
+
# Step 5: Vectorized Backtesting
|
|
414
|
+
def generate_predictions(self):
|
|
415
|
+
"""
|
|
416
|
+
Generator that yields predictions one by one.
|
|
417
|
+
"""
|
|
418
|
+
data = self.data
|
|
419
|
+
window_size = self.k
|
|
420
|
+
for i in range(window_size, len(data)):
|
|
421
|
+
print(
|
|
422
|
+
f"Processing window {i - window_size + 1}/{len(data) - window_size}...")
|
|
423
|
+
window_data = data['diff_log_return'].iloc[i-window_size:i]
|
|
424
|
+
next_return = self.get_prediction(window_data)
|
|
425
|
+
yield next_return
|
|
426
|
+
|
|
427
|
+
def backtest_strategy(self):
|
|
428
|
+
"""
|
|
429
|
+
Performs a backtest of the strategy over
|
|
430
|
+
the entire dataset, plotting cumulative returns.
|
|
431
|
+
"""
|
|
432
|
+
data = self.data
|
|
433
|
+
window_size = self.k
|
|
434
|
+
print(
|
|
435
|
+
f"Starting backtesting for {self.symbol}\n"
|
|
436
|
+
f"Window size {window_size}.\n"
|
|
437
|
+
f"Total iterations: {len(data) - window_size}.\n")
|
|
438
|
+
predictions_generator = self.generate_predictions()
|
|
439
|
+
|
|
440
|
+
positions = self.execute_trading_strategy(predictions_generator)
|
|
441
|
+
|
|
442
|
+
strategy_returns = np.array(
|
|
443
|
+
positions[:-1]) * data['log_return'].iloc[window_size+1:].values
|
|
444
|
+
buy_and_hold = data['log_return'].iloc[window_size+1:].values
|
|
445
|
+
buy_and_hold_returns = np.cumsum(buy_and_hold)
|
|
446
|
+
cumulative_returns = np.cumsum(strategy_returns)
|
|
447
|
+
dates = data.index[window_size+1:]
|
|
448
|
+
self.plot_cumulative_returns(
|
|
449
|
+
cumulative_returns, buy_and_hold_returns, dates)
|
|
450
|
+
|
|
451
|
+
print("\nBacktesting completed !!")
|
|
452
|
+
|
|
453
|
+
# Function to plot the cumulative returns
|
|
454
|
+
def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
|
|
455
|
+
"""
|
|
456
|
+
Plots the cumulative returns of the ARIMA+GARCH strategy against
|
|
457
|
+
a buy-and-hold strategy.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
strategy_returns (np.array): Cumulative returns from the strategy.
|
|
461
|
+
buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
|
|
462
|
+
dates (pd.Index): The dates corresponding to the returns.
|
|
463
|
+
"""
|
|
464
|
+
plt.figure(figsize=(14, 7))
|
|
465
|
+
plt.plot(dates, strategy_returns, label='ARIMA+GARCH ', color='blue')
|
|
466
|
+
plt.plot(dates, buy_and_hold_returns, label='Buy & Hold', color='red')
|
|
467
|
+
plt.xlabel('Time')
|
|
468
|
+
plt.ylabel('Cumulative Returns')
|
|
469
|
+
plt.title(f'ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})')
|
|
470
|
+
plt.legend()
|
|
471
|
+
plt.grid(True)
|
|
472
|
+
plt.show()
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
# *********************************************
|
|
476
|
+
# STATS TEST (Cointegration , Mean Reverting)*
|
|
477
|
+
# *********************************************
|
|
478
|
+
def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
|
|
479
|
+
"""
|
|
480
|
+
Calculates and prints the correlation matrix of the adjusted closing prices
|
|
481
|
+
for a given list of stock tickers within a specified date range.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
tickers (Union[List[str] , Tuple[str, ...]]):
|
|
485
|
+
A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
486
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
487
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
488
|
+
|
|
489
|
+
Example:
|
|
490
|
+
>>> from bbstrader.tseries import get_corr
|
|
491
|
+
>>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
492
|
+
"""
|
|
493
|
+
# Download historical data
|
|
494
|
+
data = yf.download(tickers, start=start, end=end)['Adj Close']
|
|
495
|
+
|
|
496
|
+
# Calculate correlation matrix
|
|
497
|
+
correlation_matrix = data.corr()
|
|
498
|
+
|
|
499
|
+
# Display the matrix
|
|
500
|
+
print(correlation_matrix)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
|
|
504
|
+
"""
|
|
505
|
+
Plot both time series on the same line graph for
|
|
506
|
+
the specified date range.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
df (pd.DataFrame):
|
|
510
|
+
The DataFrame containing prices for each series
|
|
511
|
+
ts1 (str): The first time series column name
|
|
512
|
+
ts2 (str): The second time series column name
|
|
513
|
+
"""
|
|
514
|
+
fig, ax = plt.subplots()
|
|
515
|
+
ax.plot(df.index, df[ts1], label=ts1)
|
|
516
|
+
ax.plot(df.index, df[ts2], label=ts2)
|
|
517
|
+
|
|
518
|
+
fig.autofmt_xdate()
|
|
519
|
+
plt.xlabel('Month/Year')
|
|
520
|
+
plt.ylabel('Price ($)')
|
|
521
|
+
plt.title(f'{ts1} and {ts2} Daily Prices ')
|
|
522
|
+
plt.legend()
|
|
523
|
+
plt.show()
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def plot_scatter_series(df: pd.DataFrame, ts1: str, ts2: str):
|
|
527
|
+
"""
|
|
528
|
+
Plot a scatter plot of both time series for
|
|
529
|
+
via the provided DataFrame.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
df (pd.DataFrame):
|
|
533
|
+
The DataFrame containing prices for each series
|
|
534
|
+
ts1 (str): The first time series column name
|
|
535
|
+
ts2 (str): The second time series column name
|
|
536
|
+
"""
|
|
537
|
+
plt.xlabel(f'{ts1} Price ($)')
|
|
538
|
+
plt.ylabel(f'{ts2} Price ($)')
|
|
539
|
+
plt.title(f'{ts1} and {ts2} Price Scatterplot')
|
|
540
|
+
plt.scatter(df[ts1], df[ts2])
|
|
541
|
+
|
|
542
|
+
# Plot the regression line
|
|
543
|
+
plt.plot(df[ts1], results.fittedvalues,
|
|
544
|
+
linestyle='--', color='red', linewidth=2,
|
|
545
|
+
label='Regression Line'
|
|
546
|
+
)
|
|
547
|
+
plt.legend()
|
|
548
|
+
plt.show()
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def plot_residuals(df: pd.DataFrame):
|
|
552
|
+
"""
|
|
553
|
+
Plot the residuals of OLS procedure for both
|
|
554
|
+
time series.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
df (pd.DataFrame):
|
|
558
|
+
The DataFrame containing prices for each series
|
|
559
|
+
"""
|
|
560
|
+
fig, ax = plt.subplots()
|
|
561
|
+
ax.plot(df.index, df["res"], label="Residuals")
|
|
562
|
+
|
|
563
|
+
fig.autofmt_xdate()
|
|
564
|
+
plt.xlabel('Month/Year')
|
|
565
|
+
plt.ylabel('Price ($)')
|
|
566
|
+
plt.title('Residual Plot')
|
|
567
|
+
plt.legend()
|
|
568
|
+
plt.show()
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def run_cadf_test(pair: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
|
|
572
|
+
"""
|
|
573
|
+
Performs the Cointegration Augmented Dickey-Fuller (CADF) test on a pair of stock tickers
|
|
574
|
+
over a specified date range to check for cointegration.
|
|
575
|
+
|
|
576
|
+
The function downloads historical adjusted closing prices for the specified pair of stock tickers,
|
|
577
|
+
calculates the optimal hedge ratio (beta) using Ordinary Least Squares (OLS) regression, plots the
|
|
578
|
+
time series and their residuals, and finally performs the CADF test on the residuals.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
pair (List[str] or Tuple[str, ...]):
|
|
582
|
+
A list or tuple containing two valid stock tickers (e.g., ['AAPL', 'MSFT']).
|
|
583
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
584
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
585
|
+
|
|
586
|
+
Example:
|
|
587
|
+
>>> from bbstrader.tseries import run_cadf_test
|
|
588
|
+
>>> run_cadf_test(['AAPL', 'MSFT'], '2023-01-01', '2023-12-31')
|
|
589
|
+
>>> Regression Metrics:
|
|
590
|
+
>>> Optimal Hedge Ratio (Beta): 2.2485845594120333
|
|
591
|
+
>>> Result Parmas:
|
|
592
|
+
|
|
593
|
+
>>> const -74.418034
|
|
594
|
+
>>> AAPL 2.248585
|
|
595
|
+
>>> dtype: float64
|
|
596
|
+
|
|
597
|
+
>>> Regression Summary:
|
|
598
|
+
>>> OLS Regression Results
|
|
599
|
+
>>> ==============================================================================
|
|
600
|
+
>>> Dep. Variable: MSFT R-squared: 0.900
|
|
601
|
+
>>> Model: OLS Adj. R-squared: 0.900
|
|
602
|
+
>>> Method: Least Squares F-statistic: 2244.
|
|
603
|
+
>>> Date: Sat, 20 Jul 2024 Prob (F-statistic): 2.95e-126
|
|
604
|
+
>>> Time: 13:36:58 Log-Likelihood: -996.45
|
|
605
|
+
>>> No. Observations: 250 AIC: 1997.
|
|
606
|
+
>>> Df Residuals: 248 BIC: 2004.
|
|
607
|
+
>>> Df Model: 1
|
|
608
|
+
>>> Covariance Type: nonrobust
|
|
609
|
+
>>> ==============================================================================
|
|
610
|
+
>>> coef std err t P>|t| [0.025 0.975]
|
|
611
|
+
>>> ------------------------------------------------------------------------------
|
|
612
|
+
>>> const -74.4180 8.191 -9.085 0.000 -90.551 -58.286
|
|
613
|
+
>>> AAPL 2.2486 0.047 47.369 0.000 2.155 2.342
|
|
614
|
+
>>> ==============================================================================
|
|
615
|
+
>>> Omnibus: 4.923 Durbin-Watson: 0.121
|
|
616
|
+
>>> Prob(Omnibus): 0.085 Jarque-Bera (JB): 4.862
|
|
617
|
+
>>> Skew: 0.342 Prob(JB): 0.0879
|
|
618
|
+
>>> Kurtosis: 2.993 Cond. No. 1.71e+03
|
|
619
|
+
>>> ==============================================================================
|
|
620
|
+
|
|
621
|
+
>>> Notes:
|
|
622
|
+
>>> [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
|
|
623
|
+
>>> [2] The condition number is large, 1.71e+03. This might indicate that there are
|
|
624
|
+
>>> strong multicollinearity or other numerical problems.
|
|
359
625
|
|
|
360
626
|
>>> Cointegration TEST Results:
|
|
361
627
|
>>> (np.float64(-3.204126144947765),
|
|
@@ -621,268 +887,138 @@ def run_kalman_filter(
|
|
|
621
887
|
draw_slope_intercept_changes(prices, state_means)
|
|
622
888
|
|
|
623
889
|
|
|
624
|
-
class
|
|
890
|
+
class KalmanFilterModel():
|
|
625
891
|
"""
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
The model is implemented in the following steps:
|
|
632
|
-
1. Data Preparation: Load and prepare the historical price data.
|
|
633
|
-
2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
|
|
634
|
-
3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
|
|
635
|
-
4. Trading Strategy: Execute the trading strategy based on the predictions.
|
|
636
|
-
5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
|
|
637
|
-
|
|
638
|
-
Exemple:
|
|
639
|
-
>>> import yfinance as yf
|
|
640
|
-
>>> from bbstrader.strategies import ArimaGarchModel
|
|
641
|
-
>>> from bbstrader.tseries import load_and_prepare_data
|
|
892
|
+
Implements a Kalman Filter model a recursive algorithm used for estimating
|
|
893
|
+
the state of a linear dynamic system from a series of noisy measurements.
|
|
894
|
+
It's designed to process market data, estimate dynamic parameters such as
|
|
895
|
+
the slope and intercept of price relationships,
|
|
896
|
+
forecast error and standard deviation of the predictions
|
|
642
897
|
|
|
643
|
-
|
|
644
|
-
>>> # ARCH SPY Vectorize Backtest
|
|
645
|
-
>>> k = 252
|
|
646
|
-
>>> data = yf.download("SPY", start="2004-01-02", end="2015-12-31")
|
|
647
|
-
>>> arch = ArimaGarchModel("SPY", data, k=k)
|
|
648
|
-
>>> df = load_and_prepare_data(data)
|
|
649
|
-
>>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
|
|
650
|
-
>>> arch.backtest_strategy()
|
|
898
|
+
You can learn more here https://en.wikipedia.org/wiki/Kalman_filter
|
|
651
899
|
"""
|
|
652
900
|
|
|
653
|
-
def __init__(self,
|
|
654
|
-
"""
|
|
655
|
-
Initializes the ArimaGarchStrategy class.
|
|
656
|
-
|
|
657
|
-
Args:
|
|
658
|
-
symbol (str): The ticker symbol for the financial instrument.
|
|
659
|
-
data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
660
|
-
k (int): The window size for rolling prediction in backtesting.
|
|
661
|
-
"""
|
|
662
|
-
self.symbol = symbol
|
|
663
|
-
self.data = self.load_and_prepare_data(data)
|
|
664
|
-
self.k = k
|
|
665
|
-
|
|
666
|
-
# Step 1: Data Preparation
|
|
667
|
-
def load_and_prepare_data(self, df):
|
|
668
|
-
"""
|
|
669
|
-
Prepares the dataset by calculating logarithmic returns
|
|
670
|
-
and differencing if necessary.
|
|
671
|
-
|
|
672
|
-
Args:
|
|
673
|
-
df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
674
|
-
|
|
675
|
-
Returns:
|
|
676
|
-
pd.DataFrame: The dataset with additional columns
|
|
677
|
-
for log returns and differenced log returns.
|
|
678
|
-
"""
|
|
679
|
-
return load_and_prepare_data(df)
|
|
680
|
-
|
|
681
|
-
# Step 2: Modeling (ARIMA + GARCH)
|
|
682
|
-
def fit_best_arima(self, window_data):
|
|
683
|
-
"""
|
|
684
|
-
Fits the ARIMA model to the provided window of data,
|
|
685
|
-
selecting the best model based on AIC.
|
|
686
|
-
|
|
687
|
-
Args:
|
|
688
|
-
window_data (np.array): The dataset for a specific window period.
|
|
689
|
-
|
|
690
|
-
Returns:
|
|
691
|
-
ARIMA model: The best fitted ARIMA model based on AIC.
|
|
692
|
-
"""
|
|
693
|
-
return fit_best_arima(window_data)
|
|
694
|
-
|
|
695
|
-
def fit_garch(self, window_data):
|
|
696
|
-
"""
|
|
697
|
-
Fits the GARCH model to the residuals of the best ARIMA model.
|
|
698
|
-
|
|
699
|
-
Args:
|
|
700
|
-
window_data (np.array): The dataset for a specific window period.
|
|
701
|
-
|
|
702
|
-
Returns:
|
|
703
|
-
tuple: Contains the ARIMA result and GARCH result.
|
|
704
|
-
"""
|
|
705
|
-
return fit_garch(window_data)
|
|
706
|
-
|
|
707
|
-
def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
|
|
901
|
+
def __init__(self, tickers: list | tuple, **kwargs):
|
|
708
902
|
"""
|
|
709
|
-
|
|
710
|
-
ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
|
|
903
|
+
Initializes the Kalman Filter strategy.
|
|
711
904
|
|
|
712
905
|
Args:
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
test_resid (bool, optional):
|
|
717
|
-
If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
|
|
718
|
-
"""
|
|
719
|
-
arima_result = self.fit_best_arima(window_data)
|
|
720
|
-
resid = np.asarray(arima_result.resid)
|
|
721
|
-
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
722
|
-
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
723
|
-
garch_result = garch_model.fit(disp='off')
|
|
724
|
-
residuals = garch_result.resid
|
|
725
|
-
|
|
726
|
-
# TODO : Plot the ACF of the residuals
|
|
727
|
-
if acf:
|
|
728
|
-
fig = plt.figure(figsize=(12, 8))
|
|
729
|
-
# Plot the ACF of ARIMA residuals
|
|
730
|
-
ax1 = fig.add_subplot(211, ylabel='ACF')
|
|
731
|
-
plot_acf(resid, alpha=0.05, ax=ax1, title='ACF of ARIMA Residuals')
|
|
732
|
-
ax1.set_xlabel('Lags')
|
|
733
|
-
ax1.grid(True)
|
|
734
|
-
|
|
735
|
-
# Plot the ACF of GARCH residuals on the same axes
|
|
736
|
-
ax2 = fig.add_subplot(212, ylabel='ACF')
|
|
737
|
-
plot_acf(residuals, alpha=0.05, ax=ax2,
|
|
738
|
-
title='ACF of GARCH Residuals')
|
|
739
|
-
ax2.set_xlabel('Lags')
|
|
740
|
-
ax2.grid(True)
|
|
741
|
-
|
|
742
|
-
# Plot the figure
|
|
743
|
-
plt.tight_layout()
|
|
744
|
-
plt.show()
|
|
745
|
-
|
|
746
|
-
# TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
|
|
747
|
-
if test_resid:
|
|
748
|
-
print(arima_result.summary())
|
|
749
|
-
print(garch_result.summary())
|
|
750
|
-
bp_test = acorr_ljungbox(resid, return_df=True)
|
|
751
|
-
print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
|
|
906
|
+
tickers :
|
|
907
|
+
A list or tuple of ticker symbols representing financial instruments.
|
|
752
908
|
|
|
753
|
-
|
|
754
|
-
|
|
909
|
+
kwargs : Keyword arguments for additional parameters,
|
|
910
|
+
specifically `delta` and `vt`
|
|
755
911
|
"""
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
912
|
+
self.tickers = tickers
|
|
913
|
+
assert self.tickers is not None
|
|
914
|
+
self.latest_prices = np.array([-1.0, -1.0])
|
|
915
|
+
self.delta = kwargs.get("delta", 1e-4)
|
|
916
|
+
self.wt = self.delta/(1-self.delta) * np.eye(2)
|
|
917
|
+
self.vt = kwargs.get("vt", 1e-3)
|
|
918
|
+
self.theta = np.zeros(2)
|
|
919
|
+
self.P = np.zeros((2, 2))
|
|
920
|
+
self.R = None
|
|
921
|
+
self.kf = self._init_kalman()
|
|
762
922
|
|
|
763
|
-
|
|
764
|
-
float: The predicted next return.
|
|
923
|
+
def _init_kalman(self):
|
|
765
924
|
"""
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
925
|
+
Initializes and returns a Kalman Filter configured
|
|
926
|
+
for the trading strategy. The filter is set up with initial
|
|
927
|
+
state and covariance, state transition matrix, process noise
|
|
928
|
+
and measurement noise covariances.
|
|
769
929
|
"""
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
930
|
+
kf = KalmanFilter(dim_x=2, dim_z=1)
|
|
931
|
+
kf.x = np.zeros((2, 1)) # Initial state
|
|
932
|
+
kf.P = self.P # Initial covariance
|
|
933
|
+
kf.F = np.eye(2) # State transition matrix
|
|
934
|
+
kf.Q = self.wt # Process noise covariance
|
|
935
|
+
kf.R = 1. # Scalar measurement noise covariance
|
|
774
936
|
|
|
775
|
-
|
|
776
|
-
float: The predicted next return.
|
|
777
|
-
"""
|
|
778
|
-
return get_prediction(window_data)
|
|
937
|
+
return kf
|
|
779
938
|
|
|
780
|
-
def
|
|
939
|
+
def calc_slope_intercep(self, prices: np.ndarray):
|
|
781
940
|
"""
|
|
782
|
-
Calculates
|
|
941
|
+
Calculates and returns the slope and intercept
|
|
942
|
+
of the relationship between the provided prices using the Kalman Filter.
|
|
943
|
+
This method updates the filter with the latest price and returns
|
|
944
|
+
the estimated slope and intercept.
|
|
783
945
|
|
|
784
946
|
Args:
|
|
785
|
-
|
|
947
|
+
prices : A numpy array of prices for two financial instruments.
|
|
786
948
|
|
|
787
|
-
Returns:
|
|
788
|
-
|
|
789
|
-
"""
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
signal = None
|
|
797
|
-
return signal
|
|
949
|
+
Returns:
|
|
950
|
+
A tuple containing the slope and intercept of the relationship
|
|
951
|
+
"""
|
|
952
|
+
kf = self.kf
|
|
953
|
+
kf.H = np.array([[prices[1], 1.0]])
|
|
954
|
+
kf.predict()
|
|
955
|
+
kf.update(prices[0])
|
|
956
|
+
slope = kf.x.copy().flatten()[0]
|
|
957
|
+
intercept = kf.x.copy().flatten()[1]
|
|
798
958
|
|
|
799
|
-
|
|
959
|
+
return slope, intercept
|
|
800
960
|
|
|
801
|
-
def
|
|
961
|
+
def calculate_etqt(self, prices: np.ndarray):
|
|
802
962
|
"""
|
|
803
|
-
|
|
804
|
-
|
|
963
|
+
Calculates the forecast error and standard deviation of the predictions
|
|
964
|
+
using the Kalman Filter.
|
|
805
965
|
|
|
806
966
|
Args:
|
|
807
|
-
|
|
967
|
+
prices : A numpy array of prices for two financial instruments.
|
|
808
968
|
|
|
809
969
|
Returns:
|
|
810
|
-
|
|
970
|
+
A tuple containing the forecast error and standard deviation of the predictions.
|
|
811
971
|
"""
|
|
812
|
-
positions = [] # Long if 1, Short if -1
|
|
813
|
-
previous_position = 0 # Initial position
|
|
814
|
-
for prediction in predictions:
|
|
815
|
-
if prediction > 0:
|
|
816
|
-
current_position = 1 # Long
|
|
817
|
-
elif prediction < 0:
|
|
818
|
-
current_position = -1 # Short
|
|
819
|
-
else:
|
|
820
|
-
current_position = previous_position # Hold previous position
|
|
821
|
-
positions.append(current_position)
|
|
822
|
-
previous_position = current_position
|
|
823
972
|
|
|
824
|
-
|
|
973
|
+
self.latest_prices[0] = prices[0]
|
|
974
|
+
self.latest_prices[1] = prices[1]
|
|
825
975
|
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
"""
|
|
829
|
-
Generator that yields predictions one by one.
|
|
830
|
-
"""
|
|
831
|
-
data = self.data
|
|
832
|
-
window_size = self.k
|
|
833
|
-
for i in range(window_size, len(data)):
|
|
834
|
-
print(
|
|
835
|
-
f"Processing window {i - window_size + 1}/{len(data) - window_size}...")
|
|
836
|
-
window_data = data['diff_log_return'].iloc[i-window_size:i]
|
|
837
|
-
next_return = self.get_prediction(window_data)
|
|
838
|
-
yield next_return
|
|
976
|
+
if all(self.latest_prices > -1.0):
|
|
977
|
+
slope, intercept = self.calc_slope_intercep(self.latest_prices)
|
|
839
978
|
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
Performs a backtest of the strategy over
|
|
843
|
-
the entire dataset, plotting cumulative returns.
|
|
844
|
-
"""
|
|
845
|
-
data = self.data
|
|
846
|
-
window_size = self.k
|
|
847
|
-
print(
|
|
848
|
-
f"Starting backtesting for {self.symbol}\n"
|
|
849
|
-
f"Window size {window_size}.\n"
|
|
850
|
-
f"Total iterations: {len(data) - window_size}.\n")
|
|
851
|
-
predictions_generator = self.generate_predictions()
|
|
979
|
+
self.theta[0] = slope
|
|
980
|
+
self.theta[1] = intercept
|
|
852
981
|
|
|
853
|
-
|
|
982
|
+
# Create the observation matrix of the latest prices
|
|
983
|
+
# of Y and the intercept value (1.0) as well as the
|
|
984
|
+
# scalar value of the latest price from X
|
|
985
|
+
F = np.asarray([self.latest_prices[0], 1.0]).reshape((1, 2))
|
|
986
|
+
y = self.latest_prices[1]
|
|
854
987
|
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
cumulative_returns, buy_and_hold_returns, dates)
|
|
988
|
+
# The prior value of the states {\theta_t} is
|
|
989
|
+
# distributed as a multivariate Gaussian with
|
|
990
|
+
# mean a_t and variance-covariance {R_t}
|
|
991
|
+
if self.R is not None:
|
|
992
|
+
self.R = self.C + self.wt
|
|
993
|
+
else:
|
|
994
|
+
self.R = np.zeros((2, 2))
|
|
863
995
|
|
|
864
|
-
|
|
996
|
+
# Calculate the Kalman Filter update
|
|
997
|
+
# ---------------------------------
|
|
998
|
+
# Calculate prediction of new observation
|
|
999
|
+
# as well as forecast error of that prediction
|
|
1000
|
+
yhat = F.dot(self.theta)
|
|
1001
|
+
et = y - yhat
|
|
865
1002
|
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
1003
|
+
# {Q_t} is the variance of the prediction of
|
|
1004
|
+
# observations and hence sqrt_Qt is the
|
|
1005
|
+
# standard deviation of the predictions
|
|
1006
|
+
Qt = F.dot(self.R).dot(F.T) + self.vt
|
|
1007
|
+
sqrt_Qt = np.sqrt(Qt)
|
|
871
1008
|
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
plt.show()
|
|
1009
|
+
# The posterior value of the states {\theta_t} is
|
|
1010
|
+
# distributed as a multivariate Gaussian with mean
|
|
1011
|
+
# {m_t} and variance-covariance {C_t}
|
|
1012
|
+
At = self.R.dot(F.T) / Qt
|
|
1013
|
+
self.theta = self.theta + At.flatten() * et
|
|
1014
|
+
self.C = self.R - At * F.dot(self.R)
|
|
1015
|
+
return (et, sqrt_Qt)
|
|
1016
|
+
else:
|
|
1017
|
+
return None
|
|
1018
|
+
|
|
1019
|
+
# ******************************************
|
|
1020
|
+
# ORNSTEIN UHLENBECK PROCESS *
|
|
1021
|
+
# ******************************************
|
|
886
1022
|
|
|
887
1023
|
|
|
888
1024
|
class OrnsteinUhlenbeck():
|
|
@@ -1044,133 +1180,3 @@ class OrnsteinUhlenbeck():
|
|
|
1044
1180
|
self.sigma_hat * dW_matrix[:, t]
|
|
1045
1181
|
)
|
|
1046
1182
|
return simulations_matrix
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
class KalmanFilterModel():
|
|
1050
|
-
"""
|
|
1051
|
-
Implements a Kalman Filter model a recursive algorithm used for estimating
|
|
1052
|
-
the state of a linear dynamic system from a series of noisy measurements.
|
|
1053
|
-
It's designed to process market data, estimate dynamic parameters such as
|
|
1054
|
-
the slope and intercept of price relationships,
|
|
1055
|
-
forecast error and standard deviation of the predictions
|
|
1056
|
-
|
|
1057
|
-
You can learn more here https://en.wikipedia.org/wiki/Kalman_filter
|
|
1058
|
-
"""
|
|
1059
|
-
|
|
1060
|
-
def __init__(self, tickers: list | tuple, **kwargs):
|
|
1061
|
-
"""
|
|
1062
|
-
Initializes the Kalman Filter strategy.
|
|
1063
|
-
|
|
1064
|
-
Args:
|
|
1065
|
-
tickers :
|
|
1066
|
-
A list or tuple of ticker symbols representing financial instruments.
|
|
1067
|
-
|
|
1068
|
-
kwargs : Keyword arguments for additional parameters,
|
|
1069
|
-
specifically `delta` and `vt`
|
|
1070
|
-
"""
|
|
1071
|
-
self.tickers = tickers
|
|
1072
|
-
assert self.tickers is not None
|
|
1073
|
-
self.latest_prices = np.array([-1.0, -1.0])
|
|
1074
|
-
self.delta = kwargs.get("delta", 1e-4)
|
|
1075
|
-
self.wt = self.delta/(1-self.delta) * np.eye(2)
|
|
1076
|
-
self.vt = kwargs.get("vt", 1e-3)
|
|
1077
|
-
self.theta = np.zeros(2)
|
|
1078
|
-
self.P = np.zeros((2, 2))
|
|
1079
|
-
self.R = None
|
|
1080
|
-
self.kf = self._init_kalman()
|
|
1081
|
-
|
|
1082
|
-
def _init_kalman(self):
|
|
1083
|
-
"""
|
|
1084
|
-
Initializes and returns a Kalman Filter configured
|
|
1085
|
-
for the trading strategy. The filter is set up with initial
|
|
1086
|
-
state and covariance, state transition matrix, process noise
|
|
1087
|
-
and measurement noise covariances.
|
|
1088
|
-
"""
|
|
1089
|
-
kf = KalmanFilter(dim_x=2, dim_z=1)
|
|
1090
|
-
kf.x = np.zeros((2, 1)) # Initial state
|
|
1091
|
-
kf.P = self.P # Initial covariance
|
|
1092
|
-
kf.F = np.eye(2) # State transition matrix
|
|
1093
|
-
kf.Q = self.wt # Process noise covariance
|
|
1094
|
-
kf.R = 1. # Scalar measurement noise covariance
|
|
1095
|
-
|
|
1096
|
-
return kf
|
|
1097
|
-
|
|
1098
|
-
def calc_slope_intercep(self, prices: np.ndarray):
|
|
1099
|
-
"""
|
|
1100
|
-
Calculates and returns the slope and intercept
|
|
1101
|
-
of the relationship between the provided prices using the Kalman Filter.
|
|
1102
|
-
This method updates the filter with the latest price and returns
|
|
1103
|
-
the estimated slope and intercept.
|
|
1104
|
-
|
|
1105
|
-
Args:
|
|
1106
|
-
prices : A numpy array of prices for two financial instruments.
|
|
1107
|
-
|
|
1108
|
-
Returns:
|
|
1109
|
-
A tuple containing the slope and intercept of the relationship
|
|
1110
|
-
"""
|
|
1111
|
-
kf = self.kf
|
|
1112
|
-
kf.H = np.array([[prices[1], 1.0]])
|
|
1113
|
-
kf.predict()
|
|
1114
|
-
kf.update(prices[0])
|
|
1115
|
-
slope = kf.x.copy().flatten()[0]
|
|
1116
|
-
intercept = kf.x.copy().flatten()[1]
|
|
1117
|
-
|
|
1118
|
-
return slope, intercept
|
|
1119
|
-
|
|
1120
|
-
def calculate_etqt(self, prices: np.ndarray):
|
|
1121
|
-
"""
|
|
1122
|
-
Calculates the forecast error and standard deviation of the predictions
|
|
1123
|
-
using the Kalman Filter.
|
|
1124
|
-
|
|
1125
|
-
Args:
|
|
1126
|
-
prices : A numpy array of prices for two financial instruments.
|
|
1127
|
-
|
|
1128
|
-
Returns:
|
|
1129
|
-
A tuple containing the forecast error and standard deviation of the predictions.
|
|
1130
|
-
"""
|
|
1131
|
-
|
|
1132
|
-
self.latest_prices[0] = prices[0]
|
|
1133
|
-
self.latest_prices[1] = prices[1]
|
|
1134
|
-
|
|
1135
|
-
if all(self.latest_prices > -1.0):
|
|
1136
|
-
slope, intercept = self.calc_slope_intercep(self.latest_prices)
|
|
1137
|
-
|
|
1138
|
-
self.theta[0] = slope
|
|
1139
|
-
self.theta[1] = intercept
|
|
1140
|
-
|
|
1141
|
-
# Create the observation matrix of the latest prices
|
|
1142
|
-
# of Y and the intercept value (1.0) as well as the
|
|
1143
|
-
# scalar value of the latest price from X
|
|
1144
|
-
F = np.asarray([self.latest_prices[0], 1.0]).reshape((1, 2))
|
|
1145
|
-
y = self.latest_prices[1]
|
|
1146
|
-
|
|
1147
|
-
# The prior value of the states {\theta_t} is
|
|
1148
|
-
# distributed as a multivariate Gaussian with
|
|
1149
|
-
# mean a_t and variance-covariance {R_t}
|
|
1150
|
-
if self.R is not None:
|
|
1151
|
-
self.R = self.C + self.wt
|
|
1152
|
-
else:
|
|
1153
|
-
self.R = np.zeros((2, 2))
|
|
1154
|
-
|
|
1155
|
-
# Calculate the Kalman Filter update
|
|
1156
|
-
# ---------------------------------
|
|
1157
|
-
# Calculate prediction of new observation
|
|
1158
|
-
# as well as forecast error of that prediction
|
|
1159
|
-
yhat = F.dot(self.theta)
|
|
1160
|
-
et = y - yhat
|
|
1161
|
-
|
|
1162
|
-
# {Q_t} is the variance of the prediction of
|
|
1163
|
-
# observations and hence sqrt_Qt is the
|
|
1164
|
-
# standard deviation of the predictions
|
|
1165
|
-
Qt = F.dot(self.R).dot(F.T) + self.vt
|
|
1166
|
-
sqrt_Qt = np.sqrt(Qt)
|
|
1167
|
-
|
|
1168
|
-
# The posterior value of the states {\theta_t} is
|
|
1169
|
-
# distributed as a multivariate Gaussian with mean
|
|
1170
|
-
# {m_t} and variance-covariance {C_t}
|
|
1171
|
-
At = self.R.dot(F.T) / Qt
|
|
1172
|
-
self.theta = self.theta + At.flatten() * et
|
|
1173
|
-
self.C = self.R - At * F.dot(self.R)
|
|
1174
|
-
return (et, sqrt_Qt)
|
|
1175
|
-
else:
|
|
1176
|
-
return None
|