bbstrader 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbstrader might be problematic. Click here for more details.
- bbstrader/__init__.py +1 -1
- bbstrader/__main__.py +19 -13
- bbstrader/btengine/backtest.py +7 -8
- bbstrader/btengine/execution.py +2 -2
- bbstrader/btengine/strategy.py +68 -17
- bbstrader/config.py +2 -2
- bbstrader/core/data.py +92 -29
- bbstrader/metatrader/account.py +81 -16
- bbstrader/metatrader/copier.py +594 -195
- bbstrader/metatrader/risk.py +1 -0
- bbstrader/metatrader/scripts.py +53 -13
- bbstrader/metatrader/trade.py +79 -67
- bbstrader/metatrader/utils.py +3 -0
- bbstrader/models/__init__.py +0 -1
- bbstrader/models/ml.py +55 -26
- bbstrader/models/nlp.py +182 -74
- bbstrader/models/optimization.py +1 -1
- bbstrader/models/risk.py +16 -386
- bbstrader/trading/execution.py +70 -41
- bbstrader/trading/strategies.py +9 -592
- bbstrader/tseries.py +39 -709
- {bbstrader-0.3.0.dist-info → bbstrader-0.3.2.dist-info}/METADATA +36 -44
- bbstrader-0.3.2.dist-info/RECORD +47 -0
- bbstrader-0.3.0.dist-info/RECORD +0 -47
- {bbstrader-0.3.0.dist-info → bbstrader-0.3.2.dist-info}/WHEEL +0 -0
- {bbstrader-0.3.0.dist-info → bbstrader-0.3.2.dist-info}/entry_points.txt +0 -0
- {bbstrader-0.3.0.dist-info → bbstrader-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {bbstrader-0.3.0.dist-info → bbstrader-0.3.2.dist-info}/top_level.txt +0 -0
bbstrader/tseries.py
CHANGED
|
@@ -1,36 +1,24 @@
|
|
|
1
1
|
"""
|
|
2
2
|
The `tseries` module is a designed for conducting
|
|
3
|
-
|
|
4
|
-
It leverages statistical models and algorithms to perform
|
|
5
|
-
tasks such as cointegration testing, volatility modeling,
|
|
6
|
-
and filter-based estimation to assist in trading strategy development,
|
|
7
|
-
market analysis, and financial data exploration.
|
|
3
|
+
some simple time series analysis in financial markets.
|
|
8
4
|
"""
|
|
9
5
|
|
|
10
6
|
import pprint
|
|
11
7
|
import warnings
|
|
12
|
-
from itertools import combinations
|
|
13
8
|
from typing import List, Tuple, Union
|
|
14
9
|
|
|
15
10
|
import matplotlib.pyplot as plt
|
|
16
11
|
import numpy as np
|
|
17
12
|
import pandas as pd
|
|
18
|
-
import pmdarima as pm
|
|
19
13
|
import seaborn as sns
|
|
20
14
|
import statsmodels.api as sm
|
|
21
15
|
import statsmodels.tsa.stattools as ts
|
|
22
16
|
import yfinance as yf
|
|
23
|
-
from arch import arch_model
|
|
24
17
|
from filterpy.kalman import KalmanFilter
|
|
25
|
-
from hurst import compute_Hc
|
|
26
18
|
from pykalman import KalmanFilter as PyKalmanFilter
|
|
27
|
-
from scipy.optimize import minimize
|
|
28
19
|
from sklearn.linear_model import LogisticRegressionCV
|
|
29
20
|
from sklearn.model_selection import GridSearchCV
|
|
30
21
|
from sklearn.tree import DecisionTreeClassifier
|
|
31
|
-
from statsmodels.graphics.tsaplots import plot_acf
|
|
32
|
-
from statsmodels.stats.diagnostic import acorr_ljungbox
|
|
33
|
-
from statsmodels.tsa.arima.model import ARIMA
|
|
34
22
|
from statsmodels.tsa.stattools import adfuller, coint
|
|
35
23
|
from statsmodels.tsa.vector_ar.var_model import VAR
|
|
36
24
|
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
|
@@ -40,19 +28,8 @@ warnings.filterwarnings("ignore")
|
|
|
40
28
|
|
|
41
29
|
|
|
42
30
|
__all__ = [
|
|
43
|
-
"load_and_prepare_data",
|
|
44
|
-
"fit_best_arima",
|
|
45
|
-
"fit_garch",
|
|
46
|
-
"predict_next_return",
|
|
47
|
-
"get_prediction",
|
|
48
|
-
"get_corr",
|
|
49
|
-
"run_cadf_test",
|
|
50
|
-
"run_hurst_test",
|
|
51
|
-
"run_coint_test",
|
|
52
31
|
"run_kalman_filter",
|
|
53
|
-
"ArimaGarchModel",
|
|
54
32
|
"KalmanFilterModel",
|
|
55
|
-
"OrnsteinUhlenbeck",
|
|
56
33
|
"remove_correlated_assets",
|
|
57
34
|
"check_stationarity",
|
|
58
35
|
"remove_stationary_assets",
|
|
@@ -70,458 +47,54 @@ __all__ = [
|
|
|
70
47
|
# *******************************************
|
|
71
48
|
|
|
72
49
|
|
|
73
|
-
def load_and_prepare_data(df
|
|
74
|
-
""
|
|
75
|
-
Prepares financial time series data for analysis.
|
|
76
|
-
|
|
77
|
-
This function takes a pandas DataFrame containing financial data,
|
|
78
|
-
calculates logarithmic returns, and the first difference
|
|
79
|
-
of these logarithmic returns. It handles missing values
|
|
80
|
-
by filling them with zeros.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
df (pd.DataFrame): DataFrame containing at least
|
|
84
|
-
a `Close` column with closing prices of a financial asset.
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
pd.DataFrame: DataFrame with additional
|
|
88
|
-
columns for logarithmic returns (`log_return`)
|
|
89
|
-
and the first difference of logarithmic returns (`diff_log_return`),
|
|
90
|
-
with `NaN` values filled with `0`.
|
|
91
|
-
"""
|
|
92
|
-
# Load data
|
|
93
|
-
data = df.copy()
|
|
94
|
-
# Calculate logarithmic returns
|
|
95
|
-
data["log_return"] = np.log(data["Close"] / data["Close"].shift(1))
|
|
96
|
-
# Differencing if necessary
|
|
97
|
-
data["diff_log_return"] = data["log_return"].diff()
|
|
98
|
-
# Drop NaN values
|
|
99
|
-
data.fillna(0, inplace=True)
|
|
100
|
-
return data
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
|
|
104
|
-
"""
|
|
105
|
-
Identifies and fits the best `ARIMA` model
|
|
106
|
-
based on the Akaike Information Criterion `(AIC)`.
|
|
107
|
-
|
|
108
|
-
Iterates through different combinations of `p` and `q`
|
|
109
|
-
parameters (within specified ranges) for the ARIMA model,
|
|
110
|
-
fits them to the provided data, and selects the combination
|
|
111
|
-
with the lowest `AIC` value.
|
|
50
|
+
def load_and_prepare_data(df):
|
|
51
|
+
warnings.warn("`load_and_prepare_data` is removed.", DeprecationWarning)
|
|
112
52
|
|
|
113
|
-
Args:
|
|
114
|
-
window_data (pd.Series or np.ndarray):
|
|
115
|
-
Time series data to fit the `ARIMA` model on.
|
|
116
53
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
window_data = window_data.values
|
|
122
|
-
|
|
123
|
-
window_data = window_data[~(np.isnan(window_data) | np.isinf(window_data))]
|
|
124
|
-
# Fit ARIMA model with best parameters
|
|
125
|
-
model = pm.auto_arima(
|
|
126
|
-
window_data,
|
|
127
|
-
start_p=1,
|
|
128
|
-
start_q=1,
|
|
129
|
-
max_p=6,
|
|
130
|
-
max_q=6,
|
|
131
|
-
seasonal=False,
|
|
132
|
-
stepwise=True,
|
|
54
|
+
def fit_best_arima(window_data):
|
|
55
|
+
warnings.warn(
|
|
56
|
+
"`fit_best_arima` is deprecated, use `pmdarima.auto_arima` instead.",
|
|
57
|
+
DeprecationWarning,
|
|
133
58
|
)
|
|
134
|
-
final_order = model.order
|
|
135
|
-
from arch.utility.exceptions import ConvergenceWarning as ArchWarning
|
|
136
|
-
from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
|
|
137
|
-
|
|
138
|
-
with warnings.catch_warnings():
|
|
139
|
-
warnings.filterwarnings("ignore", category=StatsWarning, module="statsmodels")
|
|
140
|
-
warnings.filterwarnings("ignore", category=ArchWarning, module="arch")
|
|
141
|
-
try:
|
|
142
|
-
best_arima_model = ARIMA(
|
|
143
|
-
window_data + 1e-5, order=final_order, missing="drop"
|
|
144
|
-
).fit()
|
|
145
|
-
return best_arima_model
|
|
146
|
-
except np.linalg.LinAlgError:
|
|
147
|
-
# Catch specific linear algebra errors
|
|
148
|
-
print("LinAlgError occurred, skipping this data point.")
|
|
149
|
-
return None
|
|
150
|
-
except Exception as e:
|
|
151
|
-
# Catch any other unexpected errors and log them
|
|
152
|
-
print(f"An error occurred: {e}")
|
|
153
|
-
return None
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def fit_garch(window_data: Union[pd.Series, np.ndarray]):
|
|
157
|
-
"""
|
|
158
|
-
Fits an `ARIMA` model to the data to get residuals,
|
|
159
|
-
then fits a `GARCH(1,1)` model on these residuals.
|
|
160
|
-
|
|
161
|
-
Utilizes the residuals from the best `ARIMA` model fit to
|
|
162
|
-
then model volatility using a `GARCH(1,1)` model.
|
|
163
59
|
|
|
164
|
-
Args:
|
|
165
|
-
window_data (pd.Series or np.ndarray):
|
|
166
|
-
Time series data for which to fit the `ARIMA` and `GARCH` models.
|
|
167
60
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
if arima_result is None:
|
|
174
|
-
return None, None
|
|
175
|
-
resid = np.asarray(arima_result.resid)
|
|
176
|
-
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
177
|
-
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
178
|
-
garch_result = garch_model.fit(disp="off")
|
|
179
|
-
return arima_result, garch_result
|
|
61
|
+
def fit_garch(window_data):
|
|
62
|
+
warnings.warn(
|
|
63
|
+
"`fit_garch` is deprecated, use `arch.arch_model` instead.",
|
|
64
|
+
DeprecationWarning,
|
|
65
|
+
)
|
|
180
66
|
|
|
181
67
|
|
|
182
68
|
def predict_next_return(arima_result, garch_result):
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
with the next period volatility forecast from the `GARCH` model
|
|
188
|
-
to predict the next return value.
|
|
189
|
-
|
|
190
|
-
Args:
|
|
191
|
-
arima_result (ARIMA result object): The fitted `ARIMA` model result.
|
|
192
|
-
garch_result (ARCH result object): The fitted `GARCH` model result.
|
|
193
|
-
|
|
194
|
-
Returns:
|
|
195
|
-
float: The predicted next return, adjusted for predicted volatility.
|
|
196
|
-
"""
|
|
197
|
-
if arima_result is None or garch_result is None:
|
|
198
|
-
return 0
|
|
199
|
-
# Predict next value with ARIMA
|
|
200
|
-
arima_pred = arima_result.forecast(steps=1)
|
|
201
|
-
# Predict next volatility with GARCH
|
|
202
|
-
garch_pred = garch_result.forecast(horizon=1)
|
|
203
|
-
next_volatility = garch_pred.variance.iloc[-1, 0]
|
|
204
|
-
|
|
205
|
-
# Combine predictions (return + volatility)
|
|
206
|
-
if not isinstance(arima_pred, np.ndarray):
|
|
207
|
-
pred = arima_pred.values[0]
|
|
208
|
-
else:
|
|
209
|
-
pred = arima_pred[0]
|
|
210
|
-
return pred + next_volatility
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
def get_prediction(window_data: Union[pd.Series, np.ndarray]):
|
|
214
|
-
"""
|
|
215
|
-
Orchestrator function to get the next period's return prediction.
|
|
216
|
-
|
|
217
|
-
This function ties together the process of fitting
|
|
218
|
-
both `ARIMA` and `GARCH` models on the provided data
|
|
219
|
-
and then predicting the next period's return using these models.
|
|
69
|
+
warnings.warn(
|
|
70
|
+
"`predict_next_return` is deprecated.",
|
|
71
|
+
DeprecationWarning,
|
|
72
|
+
)
|
|
220
73
|
|
|
221
|
-
Args:
|
|
222
|
-
window_data (Union[pd.Series , np.ndarray]):
|
|
223
|
-
Time series data to fit the models and predict the next return.
|
|
224
74
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
return prediction
|
|
75
|
+
def get_prediction(window_data):
|
|
76
|
+
warnings.warn(
|
|
77
|
+
"`get_prediction` is deprecated, ",
|
|
78
|
+
DeprecationWarning,
|
|
79
|
+
)
|
|
231
80
|
|
|
232
81
|
|
|
233
82
|
class ArimaGarchModel:
|
|
234
|
-
"""
|
|
235
|
-
This class implements a time serie model
|
|
236
|
-
that combines `ARIMA (AutoRegressive Integrated Moving Average)`
|
|
237
|
-
and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
|
|
238
|
-
to predict future returns based on historical price data.
|
|
239
|
-
|
|
240
|
-
The model is implemented in the following steps:
|
|
241
|
-
1. Data Preparation: Load and prepare the historical price data.
|
|
242
|
-
2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
|
|
243
|
-
3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
|
|
244
|
-
4. Trading Strategy: Execute the trading strategy based on the predictions.
|
|
245
|
-
5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
|
|
246
|
-
|
|
247
|
-
Exemple:
|
|
248
|
-
>>> import yfinance as yf
|
|
249
|
-
>>> from bbstrader.tseries import ArimaGarchModel
|
|
250
|
-
>>> from bbstrader.tseries import load_and_prepare_data
|
|
251
|
-
|
|
252
|
-
>>> if __name__ == '__main__':
|
|
253
|
-
>>> # ARCH SPY Vectorize Backtest
|
|
254
|
-
>>> k = 252
|
|
255
|
-
>>> data = yf.download("SPY", start="2010-01-02", end="2015-12-31")
|
|
256
|
-
>>> arch = ArimaGarchModel("SPY", data, k=k)
|
|
257
|
-
>>> df = load_and_prepare_data(data)
|
|
258
|
-
>>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
|
|
259
|
-
>>> arch.backtest_strategy()
|
|
260
|
-
"""
|
|
261
|
-
|
|
262
83
|
def __init__(self, symbol, data, k: int = 252):
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
Args:
|
|
267
|
-
symbol (str): The ticker symbol for the financial instrument.
|
|
268
|
-
data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
269
|
-
k (int): The window size for rolling prediction in backtesting.
|
|
270
|
-
"""
|
|
271
|
-
self.symbol = symbol
|
|
272
|
-
self.data = self.load_and_prepare_data(data)
|
|
273
|
-
self.k = k
|
|
274
|
-
|
|
275
|
-
# Step 1: Data Preparation
|
|
276
|
-
def load_and_prepare_data(self, df):
|
|
277
|
-
"""
|
|
278
|
-
Prepares the dataset by calculating logarithmic returns
|
|
279
|
-
and differencing if necessary.
|
|
280
|
-
|
|
281
|
-
Args:
|
|
282
|
-
df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
283
|
-
|
|
284
|
-
Returns:
|
|
285
|
-
pd.DataFrame: The dataset with additional columns
|
|
286
|
-
for log returns and differenced log returns.
|
|
287
|
-
"""
|
|
288
|
-
return load_and_prepare_data(df)
|
|
289
|
-
|
|
290
|
-
# Step 2: Modeling (ARIMA + GARCH)
|
|
291
|
-
def fit_best_arima(self, window_data):
|
|
292
|
-
"""
|
|
293
|
-
Fits the ARIMA model to the provided window of data,
|
|
294
|
-
selecting the best model based on AIC.
|
|
295
|
-
|
|
296
|
-
Args:
|
|
297
|
-
window_data (np.array): The dataset for a specific window period.
|
|
298
|
-
|
|
299
|
-
Returns:
|
|
300
|
-
ARIMA model: The best fitted ARIMA model based on AIC.
|
|
301
|
-
"""
|
|
302
|
-
return fit_best_arima(window_data)
|
|
303
|
-
|
|
304
|
-
def fit_garch(self, window_data):
|
|
305
|
-
"""
|
|
306
|
-
Fits the GARCH model to the residuals of the best ARIMA model.
|
|
307
|
-
|
|
308
|
-
Args:
|
|
309
|
-
window_data (np.array): The dataset for a specific window period.
|
|
310
|
-
|
|
311
|
-
Returns:
|
|
312
|
-
tuple: Contains the ARIMA result and GARCH result.
|
|
313
|
-
"""
|
|
314
|
-
return fit_garch(window_data)
|
|
315
|
-
|
|
316
|
-
def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
|
|
317
|
-
"""
|
|
318
|
-
Displays the ARIMA and GARCH model results, including plotting
|
|
319
|
-
ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
|
|
320
|
-
|
|
321
|
-
Args:
|
|
322
|
-
window_data (np.array): The dataset for a specific window period.
|
|
323
|
-
acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
|
|
324
|
-
|
|
325
|
-
test_resid (bool, optional):
|
|
326
|
-
If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
|
|
327
|
-
"""
|
|
328
|
-
arima_result = self.fit_best_arima(window_data)
|
|
329
|
-
resid = np.asarray(arima_result.resid)
|
|
330
|
-
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
331
|
-
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
332
|
-
garch_result = garch_model.fit(disp="off")
|
|
333
|
-
residuals = garch_result.resid
|
|
334
|
-
|
|
335
|
-
# TODO : Plot the ACF of the residuals
|
|
336
|
-
if acf:
|
|
337
|
-
fig = plt.figure(figsize=(12, 8))
|
|
338
|
-
# Plot the ACF of ARIMA residuals
|
|
339
|
-
ax1 = fig.add_subplot(211, ylabel="ACF")
|
|
340
|
-
plot_acf(resid, alpha=0.05, ax=ax1, title="ACF of ARIMA Residuals")
|
|
341
|
-
ax1.set_xlabel("Lags")
|
|
342
|
-
ax1.grid(True)
|
|
343
|
-
|
|
344
|
-
# Plot the ACF of GARCH residuals on the same axes
|
|
345
|
-
ax2 = fig.add_subplot(212, ylabel="ACF")
|
|
346
|
-
plot_acf(residuals, alpha=0.05, ax=ax2, title="ACF of GARCH Residuals")
|
|
347
|
-
ax2.set_xlabel("Lags")
|
|
348
|
-
ax2.grid(True)
|
|
349
|
-
|
|
350
|
-
# Plot the figure
|
|
351
|
-
plt.tight_layout()
|
|
352
|
-
plt.show()
|
|
353
|
-
|
|
354
|
-
# TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
|
|
355
|
-
if test_resid:
|
|
356
|
-
print(arima_result.summary())
|
|
357
|
-
print(garch_result.summary())
|
|
358
|
-
bp_test = acorr_ljungbox(resid, return_df=True)
|
|
359
|
-
print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
|
|
360
|
-
|
|
361
|
-
# Step 3: Prediction
|
|
362
|
-
def predict_next_return(self, arima_result, garch_result):
|
|
363
|
-
"""
|
|
364
|
-
Predicts the next return using the ARIMA model
|
|
365
|
-
and the next volatility using the GARCH model.
|
|
366
|
-
|
|
367
|
-
Args:
|
|
368
|
-
arima_result (ARIMA model): The ARIMA model result.
|
|
369
|
-
garch_result (GARCH model): The GARCH model result.
|
|
370
|
-
|
|
371
|
-
Returns:
|
|
372
|
-
float: The predicted next return.
|
|
373
|
-
"""
|
|
374
|
-
return predict_next_return(arima_result, garch_result)
|
|
375
|
-
|
|
376
|
-
def get_prediction(self, window_data):
|
|
377
|
-
"""
|
|
378
|
-
Generates a prediction for the next return based on a window of data.
|
|
379
|
-
|
|
380
|
-
Args:
|
|
381
|
-
window_data (np.array): The dataset for a specific window period.
|
|
382
|
-
|
|
383
|
-
Returns:
|
|
384
|
-
float: The predicted next return.
|
|
385
|
-
"""
|
|
386
|
-
return get_prediction(window_data)
|
|
387
|
-
|
|
388
|
-
def calculate_signals(self, window_data):
|
|
389
|
-
"""
|
|
390
|
-
Calculates the trading signal based on the prediction.
|
|
391
|
-
|
|
392
|
-
Args:
|
|
393
|
-
window_data (np.array): The dataset for a specific window period.
|
|
394
|
-
|
|
395
|
-
Returns:
|
|
396
|
-
str: The trading signal ('LONG', 'SHORT', or None).
|
|
397
|
-
"""
|
|
398
|
-
prediction = self.get_prediction(window_data)
|
|
399
|
-
if prediction > 0:
|
|
400
|
-
signal = "LONG"
|
|
401
|
-
elif prediction < 0:
|
|
402
|
-
signal = "SHORT"
|
|
403
|
-
else:
|
|
404
|
-
signal = None
|
|
405
|
-
return signal
|
|
406
|
-
|
|
407
|
-
# Step 4: Trading Strategy
|
|
408
|
-
|
|
409
|
-
def execute_trading_strategy(self, predictions):
|
|
410
|
-
"""
|
|
411
|
-
Executes the trading strategy based on a list
|
|
412
|
-
of predictions, determining positions to take.
|
|
413
|
-
|
|
414
|
-
Args:
|
|
415
|
-
predictions (list): A list of predicted returns.
|
|
416
|
-
|
|
417
|
-
Returns:
|
|
418
|
-
list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
|
|
419
|
-
"""
|
|
420
|
-
positions = [] # Long if 1, Short if -1
|
|
421
|
-
previous_position = 0 # Initial position
|
|
422
|
-
for prediction in predictions:
|
|
423
|
-
if prediction > 0:
|
|
424
|
-
current_position = 1 # Long
|
|
425
|
-
elif prediction < 0:
|
|
426
|
-
current_position = -1 # Short
|
|
427
|
-
else:
|
|
428
|
-
current_position = previous_position # Hold previous position
|
|
429
|
-
positions.append(current_position)
|
|
430
|
-
previous_position = current_position
|
|
431
|
-
|
|
432
|
-
return positions
|
|
433
|
-
|
|
434
|
-
# Step 5: Vectorized Backtesting
|
|
435
|
-
def generate_predictions(self):
|
|
436
|
-
"""
|
|
437
|
-
Generator that yields predictions one by one.
|
|
438
|
-
"""
|
|
439
|
-
data = self.data
|
|
440
|
-
window_size = self.k
|
|
441
|
-
for i in range(window_size, len(data)):
|
|
442
|
-
print(
|
|
443
|
-
f"Processing window {i - window_size + 1}/{len(data) - window_size}..."
|
|
444
|
-
)
|
|
445
|
-
window_data = data["diff_log_return"].iloc[i - window_size : i]
|
|
446
|
-
next_return = self.get_prediction(window_data)
|
|
447
|
-
yield next_return
|
|
448
|
-
|
|
449
|
-
def backtest_strategy(self):
|
|
450
|
-
"""
|
|
451
|
-
Performs a backtest of the strategy over
|
|
452
|
-
the entire dataset, plotting cumulative returns.
|
|
453
|
-
"""
|
|
454
|
-
data = self.data
|
|
455
|
-
window_size = self.k
|
|
456
|
-
print(
|
|
457
|
-
f"Starting backtesting for {self.symbol}\n"
|
|
458
|
-
f"Window size {window_size}.\n"
|
|
459
|
-
f"Total iterations: {len(data) - window_size}.\n"
|
|
84
|
+
warnings.warn(
|
|
85
|
+
"`ArimaGarchModel` is deprecated, use `pmdarima.auto_arima` and `arch.arch_model` instead.",
|
|
86
|
+
DeprecationWarning,
|
|
460
87
|
)
|
|
461
|
-
predictions_generator = self.generate_predictions()
|
|
462
|
-
|
|
463
|
-
positions = self.execute_trading_strategy(predictions_generator)
|
|
464
|
-
|
|
465
|
-
strategy_returns = (
|
|
466
|
-
np.array(positions[:-1]) * data["log_return"].iloc[window_size + 1 :].values
|
|
467
|
-
)
|
|
468
|
-
buy_and_hold = data["log_return"].iloc[window_size + 1 :].values
|
|
469
|
-
buy_and_hold_returns = np.cumsum(buy_and_hold)
|
|
470
|
-
cumulative_returns = np.cumsum(strategy_returns)
|
|
471
|
-
dates = data.index[window_size + 1 :]
|
|
472
|
-
self.plot_cumulative_returns(cumulative_returns, buy_and_hold_returns, dates)
|
|
473
|
-
|
|
474
|
-
print("\nBacktesting completed !!")
|
|
475
|
-
|
|
476
|
-
# Function to plot the cumulative returns
|
|
477
|
-
def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
|
|
478
|
-
"""
|
|
479
|
-
Plots the cumulative returns of the ARIMA+GARCH strategy against
|
|
480
|
-
a buy-and-hold strategy.
|
|
481
|
-
|
|
482
|
-
Args:
|
|
483
|
-
strategy_returns (np.array): Cumulative returns from the strategy.
|
|
484
|
-
buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
|
|
485
|
-
dates (pd.Index): The dates corresponding to the returns.
|
|
486
|
-
"""
|
|
487
|
-
plt.figure(figsize=(14, 7))
|
|
488
|
-
plt.plot(dates, strategy_returns, label="ARIMA+GARCH ", color="blue")
|
|
489
|
-
plt.plot(dates, buy_and_hold_returns, label="Buy & Hold", color="red")
|
|
490
|
-
plt.xlabel("Time")
|
|
491
|
-
plt.ylabel("Cumulative Returns")
|
|
492
|
-
plt.title(f"ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})")
|
|
493
|
-
plt.legend()
|
|
494
|
-
plt.grid(True)
|
|
495
|
-
plt.show()
|
|
496
88
|
|
|
497
89
|
|
|
498
90
|
# *********************************************
|
|
499
91
|
# STATS TEST (Cointegration , Mean Reverting)*
|
|
500
92
|
# *********************************************
|
|
501
93
|
def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
Args:
|
|
507
|
-
tickers (Union[List[str] , Tuple[str, ...]]):
|
|
508
|
-
A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
509
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
510
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
511
|
-
|
|
512
|
-
Example:
|
|
513
|
-
>>> from bbstrader.tseries import get_corr
|
|
514
|
-
>>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
515
|
-
"""
|
|
516
|
-
# Download historical data
|
|
517
|
-
data = yf.download(tickers, start=start, end=end, multi_level_index=False, auto_adjust=True)
|
|
518
|
-
data = data["Adj Close"] if "Adj Close" in data.columns else data["Close"]
|
|
519
|
-
|
|
520
|
-
# Calculate correlation matrix
|
|
521
|
-
correlation_matrix = data.corr()
|
|
522
|
-
|
|
523
|
-
# Display the matrix
|
|
524
|
-
print(correlation_matrix)
|
|
94
|
+
warnings.warn(
|
|
95
|
+
"`get_corr` is deprecated, use pandas DataFrame's `corr` method instead.",
|
|
96
|
+
DeprecationWarning,
|
|
97
|
+
)
|
|
525
98
|
|
|
526
99
|
|
|
527
100
|
def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
|
|
@@ -722,119 +295,22 @@ def run_cadf_test(
|
|
|
722
295
|
pprint.pprint(cadf)
|
|
723
296
|
|
|
724
297
|
|
|
725
|
-
def _hurst(ts):
|
|
726
|
-
"""
|
|
727
|
-
Returns the Hurst Exponent of the time series vector ts,
|
|
728
|
-
"""
|
|
729
|
-
# Create the range of lag values
|
|
730
|
-
lags = range(2, 100)
|
|
731
|
-
|
|
732
|
-
# Calculate the array of the variances of the lagged differences
|
|
733
|
-
tau = [np.sqrt(np.std(np.subtract(ts[lag:], ts[:-lag]))) for lag in lags]
|
|
734
|
-
|
|
735
|
-
# Use a linear fit to estimate the Hurst Exponent
|
|
736
|
-
poly = np.polyfit(np.log(lags), np.log(tau), 1)
|
|
737
|
-
|
|
738
|
-
# Return the Hurst exponent from the polyfit output
|
|
739
|
-
return poly[0] * 2.0
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
# Function to calculate Hurst Exponent
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
def hurst(time_series):
|
|
746
|
-
H, c, data_range = compute_Hc(time_series, kind="price", simplified=True)
|
|
747
|
-
return H
|
|
748
|
-
|
|
749
|
-
|
|
750
298
|
def run_hurst_test(symbol: str, start: str, end: str):
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
Mean-Reverting, and Trending).
|
|
755
|
-
|
|
756
|
-
The Hurst Exponent is used to determine the long-term memory of a time series.
|
|
757
|
-
|
|
758
|
-
Args:
|
|
759
|
-
symbol (str): A valid stock ticker symbol (e.g., 'AAPL').
|
|
760
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
761
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
762
|
-
|
|
763
|
-
Example:
|
|
764
|
-
>>> from bbstrader.tseries import run_hurst_test
|
|
765
|
-
|
|
766
|
-
>>> run_hurst_test('AAPL', '2023-01-01', '2023-12-31')
|
|
767
|
-
"""
|
|
768
|
-
data = yf.download(
|
|
769
|
-
symbol,
|
|
770
|
-
start=start,
|
|
771
|
-
end=end,
|
|
772
|
-
progress=False,
|
|
773
|
-
multi_level_index=False,
|
|
774
|
-
auto_adjust=True,
|
|
299
|
+
warnings.warn(
|
|
300
|
+
"`run_hurst_test` is deprecated, use `hurst.compute_Hc` instead.",
|
|
301
|
+
DeprecationWarning,
|
|
775
302
|
)
|
|
776
303
|
|
|
777
|
-
# Create a Geometric Brownian Motion, Mean-Reverting, and Trending Series
|
|
778
|
-
gbm = np.log(np.cumsum(np.random.randn(100000)) + 1000)
|
|
779
|
-
mr = np.log(np.random.randn(100000) + 1000)
|
|
780
|
-
tr = np.log(np.cumsum(np.random.randn(100000) + 1) + 1000)
|
|
781
|
-
|
|
782
|
-
# Output the Hurst Exponent for each of the series
|
|
783
|
-
print(f"\nHurst(GBM): {_hurst(gbm)}")
|
|
784
|
-
print(f"Hurst(MR): {_hurst(mr)}")
|
|
785
|
-
print(f"Hurst(TR): {_hurst(tr)}")
|
|
786
|
-
print(f"Hurst({symbol}): {hurst(data['Close'])}\n")
|
|
787
|
-
|
|
788
304
|
|
|
789
305
|
def test_cointegration(ticker1, ticker2, start, end):
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
end=end,
|
|
795
|
-
progress=False,
|
|
796
|
-
multi_level_index=False,
|
|
797
|
-
auto_adjust=True,
|
|
798
|
-
)["Close"].dropna()
|
|
799
|
-
|
|
800
|
-
# Perform Johansen cointegration test
|
|
801
|
-
result = coint_johansen(stock_data_pair, det_order=0, k_ar_diff=1)
|
|
802
|
-
|
|
803
|
-
# Get the cointegration rank
|
|
804
|
-
traces_stats = result.lr1
|
|
805
|
-
print(f"\nTraces Stats: \n{traces_stats}")
|
|
806
|
-
|
|
807
|
-
# Get the critical values for 95% confidence level
|
|
808
|
-
critical_values = result.cvt
|
|
809
|
-
print(f"\nCritical Values: \n{critical_values}")
|
|
810
|
-
|
|
811
|
-
# Compare the cointegration rank with critical values
|
|
812
|
-
if traces_stats[0] > critical_values[:, 1].all():
|
|
813
|
-
print(f"\n{ticker1} and {ticker2} are cointegrated.\n")
|
|
814
|
-
else:
|
|
815
|
-
print(f"\nNo cointegration found for {ticker1} and {ticker2}.\n")
|
|
306
|
+
warnings.warn(
|
|
307
|
+
"`test_cointegration` is deprecated, see statsmodels.tsa.stattools.coint instead.",
|
|
308
|
+
DeprecationWarning,
|
|
309
|
+
)
|
|
816
310
|
|
|
817
311
|
|
|
818
312
|
def run_coint_test(tickers: List[str], start: str, end: str) -> None:
|
|
819
|
-
|
|
820
|
-
Performs pairwise cointegration tests on a list of stock tickers over a specified date range.
|
|
821
|
-
|
|
822
|
-
For each unique pair of tickers, the function downloads historical adjusted closing prices and
|
|
823
|
-
tests for cointegration.
|
|
824
|
-
|
|
825
|
-
Args:
|
|
826
|
-
tickers (List[str]): A list of valid stock ticker symbols (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
827
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
828
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
829
|
-
|
|
830
|
-
Example:
|
|
831
|
-
>>> from bbstrader.tseries import run_coint_test
|
|
832
|
-
|
|
833
|
-
>>> run_coint_test(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
834
|
-
"""
|
|
835
|
-
# Loop through ticker combinations
|
|
836
|
-
for ticker1, ticker2 in combinations(tickers, 2):
|
|
837
|
-
test_cointegration(ticker1, ticker2, start, end)
|
|
313
|
+
test_cointegration()
|
|
838
314
|
|
|
839
315
|
|
|
840
316
|
# *********************************
|
|
@@ -1086,158 +562,12 @@ class KalmanFilterModel:
|
|
|
1086
562
|
return None
|
|
1087
563
|
|
|
1088
564
|
|
|
1089
|
-
# ******************************************
|
|
1090
|
-
# ORNSTEIN UHLENBECK PROCESS *
|
|
1091
|
-
# ******************************************
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
565
|
class OrnsteinUhlenbeck:
|
|
1095
|
-
"""
|
|
1096
|
-
The Ornstein-Uhlenbeck process is a mathematical model
|
|
1097
|
-
used to describe the behavior of a mean-reverting stochastic process.
|
|
1098
|
-
We use it to model the price dynamics of an asset that tends
|
|
1099
|
-
to revert to a long-term mean.
|
|
1100
|
-
|
|
1101
|
-
We Estimate the drift (θ), volatility (σ), and long-term mean (μ)
|
|
1102
|
-
based on historical price data; then we Simulate the OU process
|
|
1103
|
-
using the estimated parameters.
|
|
1104
|
-
|
|
1105
|
-
https://en.wikipedia.org/wiki/Ornstein%E2%80%93Uhlenbeck_process
|
|
1106
|
-
"""
|
|
1107
|
-
|
|
1108
566
|
def __init__(self, prices: np.ndarray, returns: bool = True, timeframe: str = "D1"):
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
Args:
|
|
1113
|
-
prices (np.ndarray) : Historical close prices.
|
|
1114
|
-
|
|
1115
|
-
retrurns (bool) : Use it to indicate weither
|
|
1116
|
-
you want to simulate the returns or your raw data
|
|
1117
|
-
|
|
1118
|
-
timeframe (str) : The time frame for the Historical prices
|
|
1119
|
-
(1m, 5m, 15m, 30m, 1h, 4h, D1)
|
|
1120
|
-
"""
|
|
1121
|
-
self.prices = prices
|
|
1122
|
-
if returns:
|
|
1123
|
-
series = pd.Series(self.prices)
|
|
1124
|
-
self.returns = series.pct_change().dropna().values
|
|
1125
|
-
else:
|
|
1126
|
-
self.returns = self.prices
|
|
1127
|
-
|
|
1128
|
-
time_frame_mapping = {
|
|
1129
|
-
"1m": 1 / (24 * 60), # 1 minute intervals
|
|
1130
|
-
"5m": 5 / (24 * 60), # 5 minute intervals
|
|
1131
|
-
"15m": 15 / (24 * 60), # 15 minute intervals
|
|
1132
|
-
"30m": 30 / (24 * 60), # 30 minute intervals
|
|
1133
|
-
"1h": 1 / 24, # 1 hour intervals
|
|
1134
|
-
"4h": 4 / 24, # 4 hour intervals
|
|
1135
|
-
"D1": 1, # Daily intervals
|
|
1136
|
-
}
|
|
1137
|
-
if timeframe not in time_frame_mapping:
|
|
1138
|
-
raise ValueError("Unsupported time frame")
|
|
1139
|
-
self.tf = time_frame_mapping[timeframe]
|
|
1140
|
-
|
|
1141
|
-
params = self.estimate_parameters()
|
|
1142
|
-
self.mu_hat = params[0] # Mean (μ)
|
|
1143
|
-
self.theta_hat = params[1] # Drift (θ)
|
|
1144
|
-
self.sigma_hat = params[2] # Volatility (σ)
|
|
1145
|
-
print(f"Estimated μ: {self.mu_hat}")
|
|
1146
|
-
print(f"Estimated θ: {self.theta_hat}")
|
|
1147
|
-
print(f"Estimated σ: {self.sigma_hat}")
|
|
1148
|
-
|
|
1149
|
-
def ornstein_uhlenbeck(self, mu, theta, sigma, dt, X0, n):
|
|
1150
|
-
"""
|
|
1151
|
-
Simulates the Ornstein-Uhlenbeck process.
|
|
1152
|
-
|
|
1153
|
-
Args:
|
|
1154
|
-
mu (float): Estimated long-term mean.
|
|
1155
|
-
theta (float): Estimated drift.
|
|
1156
|
-
sigma (float): Estimated volatility.
|
|
1157
|
-
dt (float): Time step.
|
|
1158
|
-
X0 (float): Initial value.
|
|
1159
|
-
n (int): Number of time steps.
|
|
1160
|
-
|
|
1161
|
-
Returns:
|
|
1162
|
-
np.ndarray : Simulated process.
|
|
1163
|
-
"""
|
|
1164
|
-
x = np.zeros(n)
|
|
1165
|
-
x[0] = X0
|
|
1166
|
-
for t in range(1, n):
|
|
1167
|
-
dW = np.random.normal(loc=0, scale=np.sqrt(dt))
|
|
1168
|
-
# O-U process differential equation
|
|
1169
|
-
x[t] = x[t - 1] + (theta * (mu - x[t - 1]) * dt) + (sigma * dW)
|
|
1170
|
-
# dW is a Wiener process
|
|
1171
|
-
# (theta * (mu - x[t-1]) * dt) represents the mean-reverting tendency
|
|
1172
|
-
# (sigma * dW) represents the random volatility
|
|
1173
|
-
return x
|
|
1174
|
-
|
|
1175
|
-
def estimate_parameters(self):
|
|
1176
|
-
"""
|
|
1177
|
-
Estimates the mean-reverting parameters (μ, θ, σ)
|
|
1178
|
-
using the negative log-likelihood.
|
|
1179
|
-
|
|
1180
|
-
Returns:
|
|
1181
|
-
Tuple: Estimated μ, θ, and σ.
|
|
1182
|
-
"""
|
|
1183
|
-
initial_guess = [0, 0.1, np.std(self.returns)]
|
|
1184
|
-
result = minimize(self._neg_log_likelihood, initial_guess, args=(self.returns,))
|
|
1185
|
-
mu, theta, sigma = result.x
|
|
1186
|
-
return mu, theta, sigma
|
|
1187
|
-
|
|
1188
|
-
def _neg_log_likelihood(self, params, returns):
|
|
1189
|
-
"""
|
|
1190
|
-
Calculates the negative
|
|
1191
|
-
log-likelihood for parameter estimation.
|
|
1192
|
-
|
|
1193
|
-
Args:
|
|
1194
|
-
params (list): List of parameters [mu, theta, sigma].
|
|
1195
|
-
returns (np.ndarray): Historical returns.
|
|
1196
|
-
|
|
1197
|
-
Returns:
|
|
1198
|
-
float: Negative log-likelihood.
|
|
1199
|
-
"""
|
|
1200
|
-
mu, theta, sigma = params
|
|
1201
|
-
dt = self.tf
|
|
1202
|
-
n = len(returns)
|
|
1203
|
-
ou_simulated = self.ornstein_uhlenbeck(mu, theta, sigma, dt, 0, n + 1)
|
|
1204
|
-
residuals = ou_simulated[1 : n + 1] - returns
|
|
1205
|
-
neg_ll = 0.5 * np.sum(residuals**2) / sigma**2 + 0.5 * n * np.log(
|
|
1206
|
-
2 * np.pi * sigma**2
|
|
567
|
+
warnings.warn(
|
|
568
|
+
"`OrnsteinUhlenbeck` is deprecated, use `statsmodels.tsa` instead.",
|
|
569
|
+
DeprecationWarning,
|
|
1207
570
|
)
|
|
1208
|
-
return neg_ll
|
|
1209
|
-
|
|
1210
|
-
def simulate_process(self, returns=None, n=100, p=None):
|
|
1211
|
-
"""
|
|
1212
|
-
Simulates the OU process multiple times .
|
|
1213
|
-
|
|
1214
|
-
Args:
|
|
1215
|
-
returns (np.ndarray): Historical returns.
|
|
1216
|
-
n (int): Number of simulations to perform.
|
|
1217
|
-
p (int): Number of time steps.
|
|
1218
|
-
|
|
1219
|
-
Returns:
|
|
1220
|
-
np.ndarray: 2D array representing simulated processes.
|
|
1221
|
-
"""
|
|
1222
|
-
if returns is None:
|
|
1223
|
-
returns = self.returns
|
|
1224
|
-
if p is not None:
|
|
1225
|
-
T = p
|
|
1226
|
-
else:
|
|
1227
|
-
T = len(returns)
|
|
1228
|
-
dt = self.tf
|
|
1229
|
-
|
|
1230
|
-
dW_matrix = np.random.normal(loc=0, scale=np.sqrt(dt), size=(n, T))
|
|
1231
|
-
simulations_matrix = np.zeros((n, T))
|
|
1232
|
-
simulations_matrix[:, 0] = returns[-1]
|
|
1233
|
-
|
|
1234
|
-
for t in range(1, T):
|
|
1235
|
-
simulations_matrix[:, t] = (
|
|
1236
|
-
simulations_matrix[:, t - 1]
|
|
1237
|
-
+ self.theta_hat * (self.mu_hat - simulations_matrix[:, t - 1]) * dt
|
|
1238
|
-
+ self.sigma_hat * dW_matrix[:, t]
|
|
1239
|
-
)
|
|
1240
|
-
return simulations_matrix
|
|
1241
571
|
|
|
1242
572
|
|
|
1243
573
|
def remove_correlated_assets(df: pd.DataFrame, cutoff=0.99):
|