bbstrader 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbstrader might be problematic. Click here for more details.
- bbstrader/__init__.py +1 -1
- bbstrader/__main__.py +7 -5
- bbstrader/btengine/backtest.py +7 -8
- bbstrader/btengine/data.py +3 -3
- bbstrader/btengine/execution.py +2 -2
- bbstrader/btengine/strategy.py +70 -17
- bbstrader/config.py +2 -2
- bbstrader/core/data.py +3 -1
- bbstrader/core/scripts.py +62 -19
- bbstrader/metatrader/account.py +108 -23
- bbstrader/metatrader/copier.py +753 -280
- bbstrader/metatrader/rates.py +2 -2
- bbstrader/metatrader/risk.py +1 -0
- bbstrader/metatrader/scripts.py +35 -9
- bbstrader/metatrader/trade.py +60 -43
- bbstrader/metatrader/utils.py +3 -5
- bbstrader/models/__init__.py +0 -1
- bbstrader/models/ml.py +55 -26
- bbstrader/models/nlp.py +159 -89
- bbstrader/models/optimization.py +1 -1
- bbstrader/models/risk.py +16 -386
- bbstrader/trading/execution.py +109 -50
- bbstrader/trading/strategies.py +9 -592
- bbstrader/tseries.py +39 -711
- {bbstrader-0.3.1.dist-info → bbstrader-0.3.3.dist-info}/METADATA +36 -41
- bbstrader-0.3.3.dist-info/RECORD +47 -0
- bbstrader-0.3.1.dist-info/RECORD +0 -47
- {bbstrader-0.3.1.dist-info → bbstrader-0.3.3.dist-info}/WHEEL +0 -0
- {bbstrader-0.3.1.dist-info → bbstrader-0.3.3.dist-info}/entry_points.txt +0 -0
- {bbstrader-0.3.1.dist-info → bbstrader-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {bbstrader-0.3.1.dist-info → bbstrader-0.3.3.dist-info}/top_level.txt +0 -0
bbstrader/tseries.py
CHANGED
|
@@ -1,58 +1,33 @@
|
|
|
1
1
|
"""
|
|
2
2
|
The `tseries` module is a designed for conducting
|
|
3
|
-
|
|
4
|
-
It leverages statistical models and algorithms to perform
|
|
5
|
-
tasks such as cointegration testing, volatility modeling,
|
|
6
|
-
and filter-based estimation to assist in trading strategy development,
|
|
7
|
-
market analysis, and financial data exploration.
|
|
3
|
+
some simple time series analysis in financial markets.
|
|
8
4
|
"""
|
|
9
5
|
|
|
10
6
|
import pprint
|
|
11
7
|
import warnings
|
|
12
|
-
from itertools import combinations
|
|
13
8
|
from typing import List, Tuple, Union
|
|
14
9
|
|
|
15
10
|
import matplotlib.pyplot as plt
|
|
16
11
|
import numpy as np
|
|
17
12
|
import pandas as pd
|
|
18
|
-
import pmdarima as pm
|
|
19
13
|
import seaborn as sns
|
|
20
14
|
import statsmodels.api as sm
|
|
21
15
|
import statsmodels.tsa.stattools as ts
|
|
22
16
|
import yfinance as yf
|
|
23
|
-
from arch import arch_model
|
|
24
17
|
from filterpy.kalman import KalmanFilter
|
|
25
|
-
from hurst import compute_Hc
|
|
26
18
|
from pykalman import KalmanFilter as PyKalmanFilter
|
|
27
|
-
from scipy.optimize import minimize
|
|
28
19
|
from sklearn.linear_model import LogisticRegressionCV
|
|
29
20
|
from sklearn.model_selection import GridSearchCV
|
|
30
21
|
from sklearn.tree import DecisionTreeClassifier
|
|
31
|
-
from statsmodels.graphics.tsaplots import plot_acf
|
|
32
|
-
from statsmodels.stats.diagnostic import acorr_ljungbox
|
|
33
|
-
from statsmodels.tsa.arima.model import ARIMA
|
|
34
22
|
from statsmodels.tsa.stattools import adfuller, coint
|
|
35
23
|
from statsmodels.tsa.vector_ar.var_model import VAR
|
|
36
24
|
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
|
37
25
|
from tqdm import tqdm
|
|
38
26
|
|
|
39
|
-
warnings.filterwarnings("ignore")
|
|
40
|
-
|
|
41
27
|
|
|
42
28
|
__all__ = [
|
|
43
|
-
"load_and_prepare_data",
|
|
44
|
-
"fit_best_arima",
|
|
45
|
-
"fit_garch",
|
|
46
|
-
"predict_next_return",
|
|
47
|
-
"get_prediction",
|
|
48
|
-
"get_corr",
|
|
49
|
-
"run_cadf_test",
|
|
50
|
-
"run_hurst_test",
|
|
51
|
-
"run_coint_test",
|
|
52
29
|
"run_kalman_filter",
|
|
53
|
-
"ArimaGarchModel",
|
|
54
30
|
"KalmanFilterModel",
|
|
55
|
-
"OrnsteinUhlenbeck",
|
|
56
31
|
"remove_correlated_assets",
|
|
57
32
|
"check_stationarity",
|
|
58
33
|
"remove_stationary_assets",
|
|
@@ -70,458 +45,54 @@ __all__ = [
|
|
|
70
45
|
# *******************************************
|
|
71
46
|
|
|
72
47
|
|
|
73
|
-
def load_and_prepare_data(df
|
|
74
|
-
""
|
|
75
|
-
Prepares financial time series data for analysis.
|
|
76
|
-
|
|
77
|
-
This function takes a pandas DataFrame containing financial data,
|
|
78
|
-
calculates logarithmic returns, and the first difference
|
|
79
|
-
of these logarithmic returns. It handles missing values
|
|
80
|
-
by filling them with zeros.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
df (pd.DataFrame): DataFrame containing at least
|
|
84
|
-
a `Close` column with closing prices of a financial asset.
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
pd.DataFrame: DataFrame with additional
|
|
88
|
-
columns for logarithmic returns (`log_return`)
|
|
89
|
-
and the first difference of logarithmic returns (`diff_log_return`),
|
|
90
|
-
with `NaN` values filled with `0`.
|
|
91
|
-
"""
|
|
92
|
-
# Load data
|
|
93
|
-
data = df.copy()
|
|
94
|
-
# Calculate logarithmic returns
|
|
95
|
-
data["log_return"] = np.log(data["Close"] / data["Close"].shift(1))
|
|
96
|
-
# Differencing if necessary
|
|
97
|
-
data["diff_log_return"] = data["log_return"].diff()
|
|
98
|
-
# Drop NaN values
|
|
99
|
-
data.fillna(0, inplace=True)
|
|
100
|
-
return data
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
|
|
104
|
-
"""
|
|
105
|
-
Identifies and fits the best `ARIMA` model
|
|
106
|
-
based on the Akaike Information Criterion `(AIC)`.
|
|
107
|
-
|
|
108
|
-
Iterates through different combinations of `p` and `q`
|
|
109
|
-
parameters (within specified ranges) for the ARIMA model,
|
|
110
|
-
fits them to the provided data, and selects the combination
|
|
111
|
-
with the lowest `AIC` value.
|
|
48
|
+
def load_and_prepare_data(df):
|
|
49
|
+
warnings.warn("`load_and_prepare_data` is removed.", DeprecationWarning)
|
|
112
50
|
|
|
113
|
-
Args:
|
|
114
|
-
window_data (pd.Series or np.ndarray):
|
|
115
|
-
Time series data to fit the `ARIMA` model on.
|
|
116
51
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
window_data = window_data.values
|
|
122
|
-
|
|
123
|
-
window_data = window_data[~(np.isnan(window_data) | np.isinf(window_data))]
|
|
124
|
-
# Fit ARIMA model with best parameters
|
|
125
|
-
model = pm.auto_arima(
|
|
126
|
-
window_data,
|
|
127
|
-
start_p=1,
|
|
128
|
-
start_q=1,
|
|
129
|
-
max_p=6,
|
|
130
|
-
max_q=6,
|
|
131
|
-
seasonal=False,
|
|
132
|
-
stepwise=True,
|
|
52
|
+
def fit_best_arima(window_data):
|
|
53
|
+
warnings.warn(
|
|
54
|
+
"`fit_best_arima` is deprecated, use `pmdarima.auto_arima` instead.",
|
|
55
|
+
DeprecationWarning,
|
|
133
56
|
)
|
|
134
|
-
final_order = model.order
|
|
135
|
-
from arch.utility.exceptions import ConvergenceWarning as ArchWarning
|
|
136
|
-
from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
|
|
137
|
-
|
|
138
|
-
with warnings.catch_warnings():
|
|
139
|
-
warnings.filterwarnings("ignore", category=StatsWarning, module="statsmodels")
|
|
140
|
-
warnings.filterwarnings("ignore", category=ArchWarning, module="arch")
|
|
141
|
-
try:
|
|
142
|
-
best_arima_model = ARIMA(
|
|
143
|
-
window_data + 1e-5, order=final_order, missing="drop"
|
|
144
|
-
).fit()
|
|
145
|
-
return best_arima_model
|
|
146
|
-
except np.linalg.LinAlgError:
|
|
147
|
-
# Catch specific linear algebra errors
|
|
148
|
-
print("LinAlgError occurred, skipping this data point.")
|
|
149
|
-
return None
|
|
150
|
-
except Exception as e:
|
|
151
|
-
# Catch any other unexpected errors and log them
|
|
152
|
-
print(f"An error occurred: {e}")
|
|
153
|
-
return None
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def fit_garch(window_data: Union[pd.Series, np.ndarray]):
|
|
157
|
-
"""
|
|
158
|
-
Fits an `ARIMA` model to the data to get residuals,
|
|
159
|
-
then fits a `GARCH(1,1)` model on these residuals.
|
|
160
57
|
|
|
161
|
-
Utilizes the residuals from the best `ARIMA` model fit to
|
|
162
|
-
then model volatility using a `GARCH(1,1)` model.
|
|
163
58
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
tuple: A tuple containing the `ARIMA` result
|
|
170
|
-
object and the `GARCH` result object.
|
|
171
|
-
"""
|
|
172
|
-
arima_result = fit_best_arima(window_data)
|
|
173
|
-
if arima_result is None:
|
|
174
|
-
return None, None
|
|
175
|
-
resid = np.asarray(arima_result.resid)
|
|
176
|
-
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
177
|
-
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
178
|
-
garch_result = garch_model.fit(disp="off")
|
|
179
|
-
return arima_result, garch_result
|
|
59
|
+
def fit_garch(window_data):
|
|
60
|
+
warnings.warn(
|
|
61
|
+
"`fit_garch` is deprecated, use `arch.arch_model` instead.",
|
|
62
|
+
DeprecationWarning,
|
|
63
|
+
)
|
|
180
64
|
|
|
181
65
|
|
|
182
66
|
def predict_next_return(arima_result, garch_result):
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
with the next period volatility forecast from the `GARCH` model
|
|
188
|
-
to predict the next return value.
|
|
189
|
-
|
|
190
|
-
Args:
|
|
191
|
-
arima_result (ARIMA result object): The fitted `ARIMA` model result.
|
|
192
|
-
garch_result (ARCH result object): The fitted `GARCH` model result.
|
|
193
|
-
|
|
194
|
-
Returns:
|
|
195
|
-
float: The predicted next return, adjusted for predicted volatility.
|
|
196
|
-
"""
|
|
197
|
-
if arima_result is None or garch_result is None:
|
|
198
|
-
return 0
|
|
199
|
-
# Predict next value with ARIMA
|
|
200
|
-
arima_pred = arima_result.forecast(steps=1)
|
|
201
|
-
# Predict next volatility with GARCH
|
|
202
|
-
garch_pred = garch_result.forecast(horizon=1)
|
|
203
|
-
next_volatility = garch_pred.variance.iloc[-1, 0]
|
|
204
|
-
|
|
205
|
-
# Combine predictions (return + volatility)
|
|
206
|
-
if not isinstance(arima_pred, np.ndarray):
|
|
207
|
-
pred = arima_pred.values[0]
|
|
208
|
-
else:
|
|
209
|
-
pred = arima_pred[0]
|
|
210
|
-
return pred + next_volatility
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
def get_prediction(window_data: Union[pd.Series, np.ndarray]):
|
|
214
|
-
"""
|
|
215
|
-
Orchestrator function to get the next period's return prediction.
|
|
67
|
+
warnings.warn(
|
|
68
|
+
"`predict_next_return` is deprecated.",
|
|
69
|
+
DeprecationWarning,
|
|
70
|
+
)
|
|
216
71
|
|
|
217
|
-
This function ties together the process of fitting
|
|
218
|
-
both `ARIMA` and `GARCH` models on the provided data
|
|
219
|
-
and then predicting the next period's return using these models.
|
|
220
72
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
float: Predicted next return value.
|
|
227
|
-
"""
|
|
228
|
-
arima_result, garch_result = fit_garch(window_data)
|
|
229
|
-
prediction = predict_next_return(arima_result, garch_result)
|
|
230
|
-
return prediction
|
|
73
|
+
def get_prediction(window_data):
|
|
74
|
+
warnings.warn(
|
|
75
|
+
"`get_prediction` is deprecated, ",
|
|
76
|
+
DeprecationWarning,
|
|
77
|
+
)
|
|
231
78
|
|
|
232
79
|
|
|
233
80
|
class ArimaGarchModel:
|
|
234
|
-
"""
|
|
235
|
-
This class implements a time serie model
|
|
236
|
-
that combines `ARIMA (AutoRegressive Integrated Moving Average)`
|
|
237
|
-
and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
|
|
238
|
-
to predict future returns based on historical price data.
|
|
239
|
-
|
|
240
|
-
The model is implemented in the following steps:
|
|
241
|
-
1. Data Preparation: Load and prepare the historical price data.
|
|
242
|
-
2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
|
|
243
|
-
3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
|
|
244
|
-
4. Trading Strategy: Execute the trading strategy based on the predictions.
|
|
245
|
-
5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
|
|
246
|
-
|
|
247
|
-
Exemple:
|
|
248
|
-
>>> import yfinance as yf
|
|
249
|
-
>>> from bbstrader.tseries import ArimaGarchModel
|
|
250
|
-
>>> from bbstrader.tseries import load_and_prepare_data
|
|
251
|
-
|
|
252
|
-
>>> if __name__ == '__main__':
|
|
253
|
-
>>> # ARCH SPY Vectorize Backtest
|
|
254
|
-
>>> k = 252
|
|
255
|
-
>>> data = yf.download("SPY", start="2010-01-02", end="2015-12-31")
|
|
256
|
-
>>> arch = ArimaGarchModel("SPY", data, k=k)
|
|
257
|
-
>>> df = load_and_prepare_data(data)
|
|
258
|
-
>>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
|
|
259
|
-
>>> arch.backtest_strategy()
|
|
260
|
-
"""
|
|
261
|
-
|
|
262
81
|
def __init__(self, symbol, data, k: int = 252):
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
Args:
|
|
267
|
-
symbol (str): The ticker symbol for the financial instrument.
|
|
268
|
-
data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
269
|
-
k (int): The window size for rolling prediction in backtesting.
|
|
270
|
-
"""
|
|
271
|
-
self.symbol = symbol
|
|
272
|
-
self.data = self.load_and_prepare_data(data)
|
|
273
|
-
self.k = k
|
|
274
|
-
|
|
275
|
-
# Step 1: Data Preparation
|
|
276
|
-
def load_and_prepare_data(self, df):
|
|
277
|
-
"""
|
|
278
|
-
Prepares the dataset by calculating logarithmic returns
|
|
279
|
-
and differencing if necessary.
|
|
280
|
-
|
|
281
|
-
Args:
|
|
282
|
-
df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
|
|
283
|
-
|
|
284
|
-
Returns:
|
|
285
|
-
pd.DataFrame: The dataset with additional columns
|
|
286
|
-
for log returns and differenced log returns.
|
|
287
|
-
"""
|
|
288
|
-
return load_and_prepare_data(df)
|
|
289
|
-
|
|
290
|
-
# Step 2: Modeling (ARIMA + GARCH)
|
|
291
|
-
def fit_best_arima(self, window_data):
|
|
292
|
-
"""
|
|
293
|
-
Fits the ARIMA model to the provided window of data,
|
|
294
|
-
selecting the best model based on AIC.
|
|
295
|
-
|
|
296
|
-
Args:
|
|
297
|
-
window_data (np.array): The dataset for a specific window period.
|
|
298
|
-
|
|
299
|
-
Returns:
|
|
300
|
-
ARIMA model: The best fitted ARIMA model based on AIC.
|
|
301
|
-
"""
|
|
302
|
-
return fit_best_arima(window_data)
|
|
303
|
-
|
|
304
|
-
def fit_garch(self, window_data):
|
|
305
|
-
"""
|
|
306
|
-
Fits the GARCH model to the residuals of the best ARIMA model.
|
|
307
|
-
|
|
308
|
-
Args:
|
|
309
|
-
window_data (np.array): The dataset for a specific window period.
|
|
310
|
-
|
|
311
|
-
Returns:
|
|
312
|
-
tuple: Contains the ARIMA result and GARCH result.
|
|
313
|
-
"""
|
|
314
|
-
return fit_garch(window_data)
|
|
315
|
-
|
|
316
|
-
def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
|
|
317
|
-
"""
|
|
318
|
-
Displays the ARIMA and GARCH model results, including plotting
|
|
319
|
-
ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
|
|
320
|
-
|
|
321
|
-
Args:
|
|
322
|
-
window_data (np.array): The dataset for a specific window period.
|
|
323
|
-
acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
|
|
324
|
-
|
|
325
|
-
test_resid (bool, optional):
|
|
326
|
-
If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
|
|
327
|
-
"""
|
|
328
|
-
arima_result = self.fit_best_arima(window_data)
|
|
329
|
-
resid = np.asarray(arima_result.resid)
|
|
330
|
-
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
331
|
-
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
332
|
-
garch_result = garch_model.fit(disp="off")
|
|
333
|
-
residuals = garch_result.resid
|
|
334
|
-
|
|
335
|
-
# TODO : Plot the ACF of the residuals
|
|
336
|
-
if acf:
|
|
337
|
-
fig = plt.figure(figsize=(12, 8))
|
|
338
|
-
# Plot the ACF of ARIMA residuals
|
|
339
|
-
ax1 = fig.add_subplot(211, ylabel="ACF")
|
|
340
|
-
plot_acf(resid, alpha=0.05, ax=ax1, title="ACF of ARIMA Residuals")
|
|
341
|
-
ax1.set_xlabel("Lags")
|
|
342
|
-
ax1.grid(True)
|
|
343
|
-
|
|
344
|
-
# Plot the ACF of GARCH residuals on the same axes
|
|
345
|
-
ax2 = fig.add_subplot(212, ylabel="ACF")
|
|
346
|
-
plot_acf(residuals, alpha=0.05, ax=ax2, title="ACF of GARCH Residuals")
|
|
347
|
-
ax2.set_xlabel("Lags")
|
|
348
|
-
ax2.grid(True)
|
|
349
|
-
|
|
350
|
-
# Plot the figure
|
|
351
|
-
plt.tight_layout()
|
|
352
|
-
plt.show()
|
|
353
|
-
|
|
354
|
-
# TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
|
|
355
|
-
if test_resid:
|
|
356
|
-
print(arima_result.summary())
|
|
357
|
-
print(garch_result.summary())
|
|
358
|
-
bp_test = acorr_ljungbox(resid, return_df=True)
|
|
359
|
-
print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
|
|
360
|
-
|
|
361
|
-
# Step 3: Prediction
|
|
362
|
-
def predict_next_return(self, arima_result, garch_result):
|
|
363
|
-
"""
|
|
364
|
-
Predicts the next return using the ARIMA model
|
|
365
|
-
and the next volatility using the GARCH model.
|
|
366
|
-
|
|
367
|
-
Args:
|
|
368
|
-
arima_result (ARIMA model): The ARIMA model result.
|
|
369
|
-
garch_result (GARCH model): The GARCH model result.
|
|
370
|
-
|
|
371
|
-
Returns:
|
|
372
|
-
float: The predicted next return.
|
|
373
|
-
"""
|
|
374
|
-
return predict_next_return(arima_result, garch_result)
|
|
375
|
-
|
|
376
|
-
def get_prediction(self, window_data):
|
|
377
|
-
"""
|
|
378
|
-
Generates a prediction for the next return based on a window of data.
|
|
379
|
-
|
|
380
|
-
Args:
|
|
381
|
-
window_data (np.array): The dataset for a specific window period.
|
|
382
|
-
|
|
383
|
-
Returns:
|
|
384
|
-
float: The predicted next return.
|
|
385
|
-
"""
|
|
386
|
-
return get_prediction(window_data)
|
|
387
|
-
|
|
388
|
-
def calculate_signals(self, window_data):
|
|
389
|
-
"""
|
|
390
|
-
Calculates the trading signal based on the prediction.
|
|
391
|
-
|
|
392
|
-
Args:
|
|
393
|
-
window_data (np.array): The dataset for a specific window period.
|
|
394
|
-
|
|
395
|
-
Returns:
|
|
396
|
-
str: The trading signal ('LONG', 'SHORT', or None).
|
|
397
|
-
"""
|
|
398
|
-
prediction = self.get_prediction(window_data)
|
|
399
|
-
if prediction > 0:
|
|
400
|
-
signal = "LONG"
|
|
401
|
-
elif prediction < 0:
|
|
402
|
-
signal = "SHORT"
|
|
403
|
-
else:
|
|
404
|
-
signal = None
|
|
405
|
-
return signal
|
|
406
|
-
|
|
407
|
-
# Step 4: Trading Strategy
|
|
408
|
-
|
|
409
|
-
def execute_trading_strategy(self, predictions):
|
|
410
|
-
"""
|
|
411
|
-
Executes the trading strategy based on a list
|
|
412
|
-
of predictions, determining positions to take.
|
|
413
|
-
|
|
414
|
-
Args:
|
|
415
|
-
predictions (list): A list of predicted returns.
|
|
416
|
-
|
|
417
|
-
Returns:
|
|
418
|
-
list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
|
|
419
|
-
"""
|
|
420
|
-
positions = [] # Long if 1, Short if -1
|
|
421
|
-
previous_position = 0 # Initial position
|
|
422
|
-
for prediction in predictions:
|
|
423
|
-
if prediction > 0:
|
|
424
|
-
current_position = 1 # Long
|
|
425
|
-
elif prediction < 0:
|
|
426
|
-
current_position = -1 # Short
|
|
427
|
-
else:
|
|
428
|
-
current_position = previous_position # Hold previous position
|
|
429
|
-
positions.append(current_position)
|
|
430
|
-
previous_position = current_position
|
|
431
|
-
|
|
432
|
-
return positions
|
|
433
|
-
|
|
434
|
-
# Step 5: Vectorized Backtesting
|
|
435
|
-
def generate_predictions(self):
|
|
436
|
-
"""
|
|
437
|
-
Generator that yields predictions one by one.
|
|
438
|
-
"""
|
|
439
|
-
data = self.data
|
|
440
|
-
window_size = self.k
|
|
441
|
-
for i in range(window_size, len(data)):
|
|
442
|
-
print(
|
|
443
|
-
f"Processing window {i - window_size + 1}/{len(data) - window_size}..."
|
|
444
|
-
)
|
|
445
|
-
window_data = data["diff_log_return"].iloc[i - window_size : i]
|
|
446
|
-
next_return = self.get_prediction(window_data)
|
|
447
|
-
yield next_return
|
|
448
|
-
|
|
449
|
-
def backtest_strategy(self):
|
|
450
|
-
"""
|
|
451
|
-
Performs a backtest of the strategy over
|
|
452
|
-
the entire dataset, plotting cumulative returns.
|
|
453
|
-
"""
|
|
454
|
-
data = self.data
|
|
455
|
-
window_size = self.k
|
|
456
|
-
print(
|
|
457
|
-
f"Starting backtesting for {self.symbol}\n"
|
|
458
|
-
f"Window size {window_size}.\n"
|
|
459
|
-
f"Total iterations: {len(data) - window_size}.\n"
|
|
82
|
+
warnings.warn(
|
|
83
|
+
"`ArimaGarchModel` is deprecated, use `pmdarima.auto_arima` and `arch.arch_model` instead.",
|
|
84
|
+
DeprecationWarning,
|
|
460
85
|
)
|
|
461
|
-
predictions_generator = self.generate_predictions()
|
|
462
|
-
|
|
463
|
-
positions = self.execute_trading_strategy(predictions_generator)
|
|
464
|
-
|
|
465
|
-
strategy_returns = (
|
|
466
|
-
np.array(positions[:-1]) * data["log_return"].iloc[window_size + 1 :].values
|
|
467
|
-
)
|
|
468
|
-
buy_and_hold = data["log_return"].iloc[window_size + 1 :].values
|
|
469
|
-
buy_and_hold_returns = np.cumsum(buy_and_hold)
|
|
470
|
-
cumulative_returns = np.cumsum(strategy_returns)
|
|
471
|
-
dates = data.index[window_size + 1 :]
|
|
472
|
-
self.plot_cumulative_returns(cumulative_returns, buy_and_hold_returns, dates)
|
|
473
|
-
|
|
474
|
-
print("\nBacktesting completed !!")
|
|
475
|
-
|
|
476
|
-
# Function to plot the cumulative returns
|
|
477
|
-
def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
|
|
478
|
-
"""
|
|
479
|
-
Plots the cumulative returns of the ARIMA+GARCH strategy against
|
|
480
|
-
a buy-and-hold strategy.
|
|
481
|
-
|
|
482
|
-
Args:
|
|
483
|
-
strategy_returns (np.array): Cumulative returns from the strategy.
|
|
484
|
-
buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
|
|
485
|
-
dates (pd.Index): The dates corresponding to the returns.
|
|
486
|
-
"""
|
|
487
|
-
plt.figure(figsize=(14, 7))
|
|
488
|
-
plt.plot(dates, strategy_returns, label="ARIMA+GARCH ", color="blue")
|
|
489
|
-
plt.plot(dates, buy_and_hold_returns, label="Buy & Hold", color="red")
|
|
490
|
-
plt.xlabel("Time")
|
|
491
|
-
plt.ylabel("Cumulative Returns")
|
|
492
|
-
plt.title(f"ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})")
|
|
493
|
-
plt.legend()
|
|
494
|
-
plt.grid(True)
|
|
495
|
-
plt.show()
|
|
496
86
|
|
|
497
87
|
|
|
498
88
|
# *********************************************
|
|
499
89
|
# STATS TEST (Cointegration , Mean Reverting)*
|
|
500
90
|
# *********************************************
|
|
501
91
|
def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
Args:
|
|
507
|
-
tickers (Union[List[str] , Tuple[str, ...]]):
|
|
508
|
-
A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
509
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
510
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
511
|
-
|
|
512
|
-
Example:
|
|
513
|
-
>>> from bbstrader.tseries import get_corr
|
|
514
|
-
>>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
515
|
-
"""
|
|
516
|
-
# Download historical data
|
|
517
|
-
data = yf.download(tickers, start=start, end=end, multi_level_index=False, auto_adjust=True)
|
|
518
|
-
data = data["Adj Close"] if "Adj Close" in data.columns else data["Close"]
|
|
519
|
-
|
|
520
|
-
# Calculate correlation matrix
|
|
521
|
-
correlation_matrix = data.corr()
|
|
522
|
-
|
|
523
|
-
# Display the matrix
|
|
524
|
-
print(correlation_matrix)
|
|
92
|
+
warnings.warn(
|
|
93
|
+
"`get_corr` is deprecated, use pandas DataFrame's `corr` method instead.",
|
|
94
|
+
DeprecationWarning,
|
|
95
|
+
)
|
|
525
96
|
|
|
526
97
|
|
|
527
98
|
def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
|
|
@@ -722,119 +293,22 @@ def run_cadf_test(
|
|
|
722
293
|
pprint.pprint(cadf)
|
|
723
294
|
|
|
724
295
|
|
|
725
|
-
def _hurst(ts):
|
|
726
|
-
"""
|
|
727
|
-
Returns the Hurst Exponent of the time series vector ts,
|
|
728
|
-
"""
|
|
729
|
-
# Create the range of lag values
|
|
730
|
-
lags = range(2, 100)
|
|
731
|
-
|
|
732
|
-
# Calculate the array of the variances of the lagged differences
|
|
733
|
-
tau = [np.sqrt(np.std(np.subtract(ts[lag:], ts[:-lag]))) for lag in lags]
|
|
734
|
-
|
|
735
|
-
# Use a linear fit to estimate the Hurst Exponent
|
|
736
|
-
poly = np.polyfit(np.log(lags), np.log(tau), 1)
|
|
737
|
-
|
|
738
|
-
# Return the Hurst exponent from the polyfit output
|
|
739
|
-
return poly[0] * 2.0
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
# Function to calculate Hurst Exponent
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
def hurst(time_series):
|
|
746
|
-
H, c, data_range = compute_Hc(time_series, kind="price", simplified=True)
|
|
747
|
-
return H
|
|
748
|
-
|
|
749
|
-
|
|
750
296
|
def run_hurst_test(symbol: str, start: str, end: str):
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
Mean-Reverting, and Trending).
|
|
755
|
-
|
|
756
|
-
The Hurst Exponent is used to determine the long-term memory of a time series.
|
|
757
|
-
|
|
758
|
-
Args:
|
|
759
|
-
symbol (str): A valid stock ticker symbol (e.g., 'AAPL').
|
|
760
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
761
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
762
|
-
|
|
763
|
-
Example:
|
|
764
|
-
>>> from bbstrader.tseries import run_hurst_test
|
|
765
|
-
|
|
766
|
-
>>> run_hurst_test('AAPL', '2023-01-01', '2023-12-31')
|
|
767
|
-
"""
|
|
768
|
-
data = yf.download(
|
|
769
|
-
symbol,
|
|
770
|
-
start=start,
|
|
771
|
-
end=end,
|
|
772
|
-
progress=False,
|
|
773
|
-
multi_level_index=False,
|
|
774
|
-
auto_adjust=True,
|
|
297
|
+
warnings.warn(
|
|
298
|
+
"`run_hurst_test` is deprecated, use `hurst.compute_Hc` instead.",
|
|
299
|
+
DeprecationWarning,
|
|
775
300
|
)
|
|
776
301
|
|
|
777
|
-
# Create a Geometric Brownian Motion, Mean-Reverting, and Trending Series
|
|
778
|
-
gbm = np.log(np.cumsum(np.random.randn(100000)) + 1000)
|
|
779
|
-
mr = np.log(np.random.randn(100000) + 1000)
|
|
780
|
-
tr = np.log(np.cumsum(np.random.randn(100000) + 1) + 1000)
|
|
781
|
-
|
|
782
|
-
# Output the Hurst Exponent for each of the series
|
|
783
|
-
print(f"\nHurst(GBM): {_hurst(gbm)}")
|
|
784
|
-
print(f"Hurst(MR): {_hurst(mr)}")
|
|
785
|
-
print(f"Hurst(TR): {_hurst(tr)}")
|
|
786
|
-
print(f"Hurst({symbol}): {hurst(data['Close'])}\n")
|
|
787
|
-
|
|
788
302
|
|
|
789
303
|
def test_cointegration(ticker1, ticker2, start, end):
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
end=end,
|
|
795
|
-
progress=False,
|
|
796
|
-
multi_level_index=False,
|
|
797
|
-
auto_adjust=True,
|
|
798
|
-
)["Close"].dropna()
|
|
799
|
-
|
|
800
|
-
# Perform Johansen cointegration test
|
|
801
|
-
result = coint_johansen(stock_data_pair, det_order=0, k_ar_diff=1)
|
|
802
|
-
|
|
803
|
-
# Get the cointegration rank
|
|
804
|
-
traces_stats = result.lr1
|
|
805
|
-
print(f"\nTraces Stats: \n{traces_stats}")
|
|
806
|
-
|
|
807
|
-
# Get the critical values for 95% confidence level
|
|
808
|
-
critical_values = result.cvt
|
|
809
|
-
print(f"\nCritical Values: \n{critical_values}")
|
|
810
|
-
|
|
811
|
-
# Compare the cointegration rank with critical values
|
|
812
|
-
if traces_stats[0] > critical_values[:, 1].all():
|
|
813
|
-
print(f"\n{ticker1} and {ticker2} are cointegrated.\n")
|
|
814
|
-
else:
|
|
815
|
-
print(f"\nNo cointegration found for {ticker1} and {ticker2}.\n")
|
|
304
|
+
warnings.warn(
|
|
305
|
+
"`test_cointegration` is deprecated, see statsmodels.tsa.stattools.coint instead.",
|
|
306
|
+
DeprecationWarning,
|
|
307
|
+
)
|
|
816
308
|
|
|
817
309
|
|
|
818
310
|
def run_coint_test(tickers: List[str], start: str, end: str) -> None:
|
|
819
|
-
|
|
820
|
-
Performs pairwise cointegration tests on a list of stock tickers over a specified date range.
|
|
821
|
-
|
|
822
|
-
For each unique pair of tickers, the function downloads historical adjusted closing prices and
|
|
823
|
-
tests for cointegration.
|
|
824
|
-
|
|
825
|
-
Args:
|
|
826
|
-
tickers (List[str]): A list of valid stock ticker symbols (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
827
|
-
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
828
|
-
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
829
|
-
|
|
830
|
-
Example:
|
|
831
|
-
>>> from bbstrader.tseries import run_coint_test
|
|
832
|
-
|
|
833
|
-
>>> run_coint_test(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
834
|
-
"""
|
|
835
|
-
# Loop through ticker combinations
|
|
836
|
-
for ticker1, ticker2 in combinations(tickers, 2):
|
|
837
|
-
test_cointegration(ticker1, ticker2, start, end)
|
|
311
|
+
test_cointegration()
|
|
838
312
|
|
|
839
313
|
|
|
840
314
|
# *********************************
|
|
@@ -1086,158 +560,12 @@ class KalmanFilterModel:
|
|
|
1086
560
|
return None
|
|
1087
561
|
|
|
1088
562
|
|
|
1089
|
-
# ******************************************
|
|
1090
|
-
# ORNSTEIN UHLENBECK PROCESS *
|
|
1091
|
-
# ******************************************
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
563
|
class OrnsteinUhlenbeck:
|
|
1095
|
-
"""
|
|
1096
|
-
The Ornstein-Uhlenbeck process is a mathematical model
|
|
1097
|
-
used to describe the behavior of a mean-reverting stochastic process.
|
|
1098
|
-
We use it to model the price dynamics of an asset that tends
|
|
1099
|
-
to revert to a long-term mean.
|
|
1100
|
-
|
|
1101
|
-
We Estimate the drift (θ), volatility (σ), and long-term mean (μ)
|
|
1102
|
-
based on historical price data; then we Simulate the OU process
|
|
1103
|
-
using the estimated parameters.
|
|
1104
|
-
|
|
1105
|
-
https://en.wikipedia.org/wiki/Ornstein%E2%80%93Uhlenbeck_process
|
|
1106
|
-
"""
|
|
1107
|
-
|
|
1108
564
|
def __init__(self, prices: np.ndarray, returns: bool = True, timeframe: str = "D1"):
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
Args:
|
|
1113
|
-
prices (np.ndarray) : Historical close prices.
|
|
1114
|
-
|
|
1115
|
-
retrurns (bool) : Use it to indicate weither
|
|
1116
|
-
you want to simulate the returns or your raw data
|
|
1117
|
-
|
|
1118
|
-
timeframe (str) : The time frame for the Historical prices
|
|
1119
|
-
(1m, 5m, 15m, 30m, 1h, 4h, D1)
|
|
1120
|
-
"""
|
|
1121
|
-
self.prices = prices
|
|
1122
|
-
if returns:
|
|
1123
|
-
series = pd.Series(self.prices)
|
|
1124
|
-
self.returns = series.pct_change().dropna().values
|
|
1125
|
-
else:
|
|
1126
|
-
self.returns = self.prices
|
|
1127
|
-
|
|
1128
|
-
time_frame_mapping = {
|
|
1129
|
-
"1m": 1 / (24 * 60), # 1 minute intervals
|
|
1130
|
-
"5m": 5 / (24 * 60), # 5 minute intervals
|
|
1131
|
-
"15m": 15 / (24 * 60), # 15 minute intervals
|
|
1132
|
-
"30m": 30 / (24 * 60), # 30 minute intervals
|
|
1133
|
-
"1h": 1 / 24, # 1 hour intervals
|
|
1134
|
-
"4h": 4 / 24, # 4 hour intervals
|
|
1135
|
-
"D1": 1, # Daily intervals
|
|
1136
|
-
}
|
|
1137
|
-
if timeframe not in time_frame_mapping:
|
|
1138
|
-
raise ValueError("Unsupported time frame")
|
|
1139
|
-
self.tf = time_frame_mapping[timeframe]
|
|
1140
|
-
|
|
1141
|
-
params = self.estimate_parameters()
|
|
1142
|
-
self.mu_hat = params[0] # Mean (μ)
|
|
1143
|
-
self.theta_hat = params[1] # Drift (θ)
|
|
1144
|
-
self.sigma_hat = params[2] # Volatility (σ)
|
|
1145
|
-
print(f"Estimated μ: {self.mu_hat}")
|
|
1146
|
-
print(f"Estimated θ: {self.theta_hat}")
|
|
1147
|
-
print(f"Estimated σ: {self.sigma_hat}")
|
|
1148
|
-
|
|
1149
|
-
def ornstein_uhlenbeck(self, mu, theta, sigma, dt, X0, n):
|
|
1150
|
-
"""
|
|
1151
|
-
Simulates the Ornstein-Uhlenbeck process.
|
|
1152
|
-
|
|
1153
|
-
Args:
|
|
1154
|
-
mu (float): Estimated long-term mean.
|
|
1155
|
-
theta (float): Estimated drift.
|
|
1156
|
-
sigma (float): Estimated volatility.
|
|
1157
|
-
dt (float): Time step.
|
|
1158
|
-
X0 (float): Initial value.
|
|
1159
|
-
n (int): Number of time steps.
|
|
1160
|
-
|
|
1161
|
-
Returns:
|
|
1162
|
-
np.ndarray : Simulated process.
|
|
1163
|
-
"""
|
|
1164
|
-
x = np.zeros(n)
|
|
1165
|
-
x[0] = X0
|
|
1166
|
-
for t in range(1, n):
|
|
1167
|
-
dW = np.random.normal(loc=0, scale=np.sqrt(dt))
|
|
1168
|
-
# O-U process differential equation
|
|
1169
|
-
x[t] = x[t - 1] + (theta * (mu - x[t - 1]) * dt) + (sigma * dW)
|
|
1170
|
-
# dW is a Wiener process
|
|
1171
|
-
# (theta * (mu - x[t-1]) * dt) represents the mean-reverting tendency
|
|
1172
|
-
# (sigma * dW) represents the random volatility
|
|
1173
|
-
return x
|
|
1174
|
-
|
|
1175
|
-
def estimate_parameters(self):
|
|
1176
|
-
"""
|
|
1177
|
-
Estimates the mean-reverting parameters (μ, θ, σ)
|
|
1178
|
-
using the negative log-likelihood.
|
|
1179
|
-
|
|
1180
|
-
Returns:
|
|
1181
|
-
Tuple: Estimated μ, θ, and σ.
|
|
1182
|
-
"""
|
|
1183
|
-
initial_guess = [0, 0.1, np.std(self.returns)]
|
|
1184
|
-
result = minimize(self._neg_log_likelihood, initial_guess, args=(self.returns,))
|
|
1185
|
-
mu, theta, sigma = result.x
|
|
1186
|
-
return mu, theta, sigma
|
|
1187
|
-
|
|
1188
|
-
def _neg_log_likelihood(self, params, returns):
|
|
1189
|
-
"""
|
|
1190
|
-
Calculates the negative
|
|
1191
|
-
log-likelihood for parameter estimation.
|
|
1192
|
-
|
|
1193
|
-
Args:
|
|
1194
|
-
params (list): List of parameters [mu, theta, sigma].
|
|
1195
|
-
returns (np.ndarray): Historical returns.
|
|
1196
|
-
|
|
1197
|
-
Returns:
|
|
1198
|
-
float: Negative log-likelihood.
|
|
1199
|
-
"""
|
|
1200
|
-
mu, theta, sigma = params
|
|
1201
|
-
dt = self.tf
|
|
1202
|
-
n = len(returns)
|
|
1203
|
-
ou_simulated = self.ornstein_uhlenbeck(mu, theta, sigma, dt, 0, n + 1)
|
|
1204
|
-
residuals = ou_simulated[1 : n + 1] - returns
|
|
1205
|
-
neg_ll = 0.5 * np.sum(residuals**2) / sigma**2 + 0.5 * n * np.log(
|
|
1206
|
-
2 * np.pi * sigma**2
|
|
565
|
+
warnings.warn(
|
|
566
|
+
"`OrnsteinUhlenbeck` is deprecated, use `statsmodels.tsa` instead.",
|
|
567
|
+
DeprecationWarning,
|
|
1207
568
|
)
|
|
1208
|
-
return neg_ll
|
|
1209
|
-
|
|
1210
|
-
def simulate_process(self, returns=None, n=100, p=None):
|
|
1211
|
-
"""
|
|
1212
|
-
Simulates the OU process multiple times .
|
|
1213
|
-
|
|
1214
|
-
Args:
|
|
1215
|
-
returns (np.ndarray): Historical returns.
|
|
1216
|
-
n (int): Number of simulations to perform.
|
|
1217
|
-
p (int): Number of time steps.
|
|
1218
|
-
|
|
1219
|
-
Returns:
|
|
1220
|
-
np.ndarray: 2D array representing simulated processes.
|
|
1221
|
-
"""
|
|
1222
|
-
if returns is None:
|
|
1223
|
-
returns = self.returns
|
|
1224
|
-
if p is not None:
|
|
1225
|
-
T = p
|
|
1226
|
-
else:
|
|
1227
|
-
T = len(returns)
|
|
1228
|
-
dt = self.tf
|
|
1229
|
-
|
|
1230
|
-
dW_matrix = np.random.normal(loc=0, scale=np.sqrt(dt), size=(n, T))
|
|
1231
|
-
simulations_matrix = np.zeros((n, T))
|
|
1232
|
-
simulations_matrix[:, 0] = returns[-1]
|
|
1233
|
-
|
|
1234
|
-
for t in range(1, T):
|
|
1235
|
-
simulations_matrix[:, t] = (
|
|
1236
|
-
simulations_matrix[:, t - 1]
|
|
1237
|
-
+ self.theta_hat * (self.mu_hat - simulations_matrix[:, t - 1]) * dt
|
|
1238
|
-
+ self.sigma_hat * dW_matrix[:, t]
|
|
1239
|
-
)
|
|
1240
|
-
return simulations_matrix
|
|
1241
569
|
|
|
1242
570
|
|
|
1243
571
|
def remove_correlated_assets(df: pd.DataFrame, cutoff=0.99):
|