bbstrader 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbstrader might be problematic. Click here for more details.
- bbstrader/__ini__.py +17 -0
- bbstrader/btengine/__init__.py +50 -0
- bbstrader/btengine/backtest.py +900 -0
- bbstrader/btengine/data.py +374 -0
- bbstrader/btengine/event.py +201 -0
- bbstrader/btengine/execution.py +83 -0
- bbstrader/btengine/performance.py +309 -0
- bbstrader/btengine/portfolio.py +326 -0
- bbstrader/btengine/strategy.py +31 -0
- bbstrader/metatrader/__init__.py +6 -0
- bbstrader/metatrader/account.py +1038 -0
- bbstrader/metatrader/rates.py +226 -0
- bbstrader/metatrader/risk.py +626 -0
- bbstrader/metatrader/trade.py +1296 -0
- bbstrader/metatrader/utils.py +669 -0
- bbstrader/models/__init__.py +6 -0
- bbstrader/models/risk.py +349 -0
- bbstrader/strategies.py +681 -0
- bbstrader/trading/__init__.py +4 -0
- bbstrader/trading/execution.py +965 -0
- bbstrader/trading/run.py +131 -0
- bbstrader/trading/utils.py +153 -0
- bbstrader/tseries.py +592 -0
- bbstrader-0.0.1.dist-info/LICENSE +21 -0
- bbstrader-0.0.1.dist-info/METADATA +132 -0
- bbstrader-0.0.1.dist-info/RECORD +28 -0
- bbstrader-0.0.1.dist-info/WHEEL +5 -0
- bbstrader-0.0.1.dist-info/top_level.txt +1 -0
bbstrader/tseries.py
ADDED
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
"""
|
|
2
|
+
The `tseries` module is a designed for conducting
|
|
3
|
+
advanced time series analysis in financial markets.
|
|
4
|
+
It leverages statistical models and algorithms to perform
|
|
5
|
+
tasks such as cointegration testing, volatility modeling,
|
|
6
|
+
and filter-based estimation to assist in trading strategy development,
|
|
7
|
+
market analysis, and financial data exploration.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import yfinance as yf
|
|
13
|
+
from arch import arch_model
|
|
14
|
+
from statsmodels.tsa.arima.model import ARIMA
|
|
15
|
+
import pmdarima as pm
|
|
16
|
+
import matplotlib.pyplot as plt
|
|
17
|
+
import statsmodels.api as sm
|
|
18
|
+
import statsmodels.tsa.stattools as ts
|
|
19
|
+
from numpy import cumsum, log, polyfit, sqrt, std, subtract
|
|
20
|
+
from numpy.random import randn
|
|
21
|
+
from hurst import compute_Hc
|
|
22
|
+
from filterpy.kalman import KalmanFilter
|
|
23
|
+
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
|
24
|
+
from itertools import combinations
|
|
25
|
+
from typing import Union, List, Tuple
|
|
26
|
+
import pprint
|
|
27
|
+
import warnings
|
|
28
|
+
warnings.filterwarnings("ignore")
|
|
29
|
+
|
|
30
|
+
# *******************************************
|
|
31
|
+
# ARIMA AND GARCH MODELS *
|
|
32
|
+
# *******************************************
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"load_and_prepare_data",
|
|
36
|
+
"fit_best_arima",
|
|
37
|
+
"fit_garch",
|
|
38
|
+
"predict_next_return",
|
|
39
|
+
"get_prediction",
|
|
40
|
+
"get_corr",
|
|
41
|
+
"run_cadf_test",
|
|
42
|
+
"run_hurst_test",
|
|
43
|
+
"run_coint_test",
|
|
44
|
+
"run_kalman_filter"
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
def load_and_prepare_data(df: pd.DataFrame):
|
|
48
|
+
"""
|
|
49
|
+
Prepares financial time series data for analysis.
|
|
50
|
+
|
|
51
|
+
This function takes a pandas DataFrame containing financial data,
|
|
52
|
+
calculates logarithmic returns, and the first difference
|
|
53
|
+
of these logarithmic returns. It handles missing values
|
|
54
|
+
by filling them with zeros.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
df (pd.DataFrame): DataFrame containing at least
|
|
58
|
+
a `Close` column with closing prices of a financial asset.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
pd.DataFrame: DataFrame with additional
|
|
62
|
+
columns for logarithmic returns (`log_return`)
|
|
63
|
+
and the first difference of logarithmic returns (`diff_log_return`),
|
|
64
|
+
with `NaN` values filled with `0`.
|
|
65
|
+
"""
|
|
66
|
+
# Load data
|
|
67
|
+
data = df.copy()
|
|
68
|
+
# Calculate logarithmic returns
|
|
69
|
+
data['log_return'] = np.log(data['Close'] / data['Close'].shift(1))
|
|
70
|
+
# Differencing if necessary
|
|
71
|
+
data['diff_log_return'] = data['log_return'].diff()
|
|
72
|
+
# Drop NaN values
|
|
73
|
+
data.fillna(0, inplace=True)
|
|
74
|
+
return data
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def fit_best_arima(window_data: pd.Series | np.ndarray):
|
|
78
|
+
"""
|
|
79
|
+
Identifies and fits the best `ARIMA` model
|
|
80
|
+
based on the Akaike Information Criterion `(AIC)`.
|
|
81
|
+
|
|
82
|
+
Iterates through different combinations of `p` and `q`
|
|
83
|
+
parameters (within specified ranges) for the ARIMA model,
|
|
84
|
+
fits them to the provided data, and selects the combination
|
|
85
|
+
with the lowest `AIC` value.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
window_data (pd.Series or np.ndarray):
|
|
89
|
+
Time series data to fit the `ARIMA` model on.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
ARIMA result object: The fitted `ARIMA` model with the lowest `AIC`.
|
|
93
|
+
"""
|
|
94
|
+
model = pm.auto_arima(
|
|
95
|
+
window_data,
|
|
96
|
+
start_p=1,
|
|
97
|
+
start_q=1,
|
|
98
|
+
max_p=6,
|
|
99
|
+
max_q=6,
|
|
100
|
+
seasonal=False,
|
|
101
|
+
stepwise=True
|
|
102
|
+
)
|
|
103
|
+
final_order = model.order
|
|
104
|
+
import warnings
|
|
105
|
+
from statsmodels.tools.sm_exceptions import ConvergenceWarning
|
|
106
|
+
warnings.filterwarnings("ignore", category=ConvergenceWarning)
|
|
107
|
+
best_arima_model = ARIMA(
|
|
108
|
+
window_data, order=final_order, missing='drop').fit()
|
|
109
|
+
return best_arima_model
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def fit_garch(window_data: pd.Series | np.ndarray):
|
|
113
|
+
"""
|
|
114
|
+
Fits an `ARIMA` model to the data to get residuals,
|
|
115
|
+
then fits a `GARCH(1,1)` model on these residuals.
|
|
116
|
+
|
|
117
|
+
Utilizes the residuals from the best `ARIMA` model fit to
|
|
118
|
+
then model volatility using a `GARCH(1,1)` model.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
window_data (pd.Series or np.ndarray):
|
|
122
|
+
Time series data for which to fit the `ARIMA` and `GARCH` models.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
tuple: A tuple containing the `ARIMA` result
|
|
126
|
+
object and the `GARCH` result object.
|
|
127
|
+
"""
|
|
128
|
+
arima_result = fit_best_arima(window_data)
|
|
129
|
+
resid = np.asarray(arima_result.resid)
|
|
130
|
+
resid = resid[~(np.isnan(resid) | np.isinf(resid))]
|
|
131
|
+
garch_model = arch_model(resid, p=1, q=1, rescale=False)
|
|
132
|
+
garch_result = garch_model.fit(disp='off')
|
|
133
|
+
return arima_result, garch_result
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def predict_next_return(arima_result, garch_result):
|
|
137
|
+
"""
|
|
138
|
+
Predicts the next return value using fitted `ARIMA` and `GARCH` models.
|
|
139
|
+
|
|
140
|
+
Combines the next period forecast from the `ARIMA` model
|
|
141
|
+
with the next period volatility forecast from the `GARCH` model
|
|
142
|
+
to predict the next return value.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
arima_result (ARIMA result object): The fitted `ARIMA` model result.
|
|
146
|
+
garch_result (ARCH result object): The fitted `GARCH` model result.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
float: The predicted next return, adjusted for predicted volatility.
|
|
150
|
+
"""
|
|
151
|
+
# Predict next value with ARIMA
|
|
152
|
+
arima_pred = arima_result.forecast(steps=1)
|
|
153
|
+
# Predict next volatility with GARCH
|
|
154
|
+
garch_pred = garch_result.forecast(horizon=1)
|
|
155
|
+
next_volatility = garch_pred.variance.iloc[-1, 0]
|
|
156
|
+
|
|
157
|
+
# Combine predictions (return + volatility)
|
|
158
|
+
next_return = arima_pred.values[0] + next_volatility
|
|
159
|
+
return next_return
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def get_prediction(window_data: pd.Series | np.ndarray):
|
|
163
|
+
"""
|
|
164
|
+
Orchestrator function to get the next period's return prediction.
|
|
165
|
+
|
|
166
|
+
This function ties together the process of fitting
|
|
167
|
+
both `ARIMA` and `GARCH` models on the provided data
|
|
168
|
+
and then predicting the next period's return using these models.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
window_data (pd.Series or np.ndarray):
|
|
172
|
+
Time series data to fit the models and predict the next return.
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
float: Predicted next return value.
|
|
176
|
+
"""
|
|
177
|
+
arima_result, garch_result = fit_garch(window_data)
|
|
178
|
+
prediction = predict_next_return(arima_result, garch_result)
|
|
179
|
+
return prediction
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# *********************************************
|
|
183
|
+
# STATS TEST (Cointegration , Mean Reverting)*
|
|
184
|
+
# *********************************************
|
|
185
|
+
def get_corr(tickers: List[str] | Tuple[str, ...], start: str, end: str) -> None:
|
|
186
|
+
"""
|
|
187
|
+
Calculates and prints the correlation matrix of the adjusted closing prices
|
|
188
|
+
for a given list of stock tickers within a specified date range.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
tickers (List[str] | Tuple[str, ...]):
|
|
192
|
+
A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
193
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
194
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
>>> from bbstrader.tseries import get_corr
|
|
198
|
+
>>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
199
|
+
"""
|
|
200
|
+
# Download historical data
|
|
201
|
+
data = yf.download(tickers, start=start, end=end)['Adj Close']
|
|
202
|
+
|
|
203
|
+
# Calculate correlation matrix
|
|
204
|
+
correlation_matrix = data.corr()
|
|
205
|
+
|
|
206
|
+
# Display the matrix
|
|
207
|
+
print(correlation_matrix)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
|
|
211
|
+
"""
|
|
212
|
+
Plot both time series on the same line graph for
|
|
213
|
+
the specified date range.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
df (pd.DataFrame):
|
|
217
|
+
The DataFrame containing prices for each series
|
|
218
|
+
ts1 (str): The first time series column name
|
|
219
|
+
ts2 (str): The second time series column name
|
|
220
|
+
"""
|
|
221
|
+
fig, ax = plt.subplots()
|
|
222
|
+
ax.plot(df.index, df[ts1], label=ts1)
|
|
223
|
+
ax.plot(df.index, df[ts2], label=ts2)
|
|
224
|
+
|
|
225
|
+
fig.autofmt_xdate()
|
|
226
|
+
plt.xlabel('Month/Year')
|
|
227
|
+
plt.ylabel('Price ($)')
|
|
228
|
+
plt.title(f'{ts1} and {ts2} Daily Prices ')
|
|
229
|
+
plt.legend()
|
|
230
|
+
plt.show()
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def plot_scatter_series(df: pd.DataFrame, ts1: str, ts2: str):
|
|
234
|
+
"""
|
|
235
|
+
Plot a scatter plot of both time series for
|
|
236
|
+
via the provided DataFrame.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
df (pd.DataFrame):
|
|
240
|
+
The DataFrame containing prices for each series
|
|
241
|
+
ts1 (str): The first time series column name
|
|
242
|
+
ts2 (str): The second time series column name
|
|
243
|
+
"""
|
|
244
|
+
plt.xlabel(f'{ts1} Price ($)')
|
|
245
|
+
plt.ylabel(f'{ts2} Price ($)')
|
|
246
|
+
plt.title(f'{ts1} and {ts2} Price Scatterplot')
|
|
247
|
+
plt.scatter(df[ts1], df[ts2])
|
|
248
|
+
|
|
249
|
+
# Plot the regression line
|
|
250
|
+
plt.plot(df[ts1], results.fittedvalues,
|
|
251
|
+
linestyle='--', color='red', linewidth=2,
|
|
252
|
+
label='Regression Line'
|
|
253
|
+
)
|
|
254
|
+
plt.legend()
|
|
255
|
+
plt.show()
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def plot_residuals(df: pd.DataFrame):
|
|
259
|
+
"""
|
|
260
|
+
Plot the residuals of OLS procedure for both
|
|
261
|
+
time series.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
df (pd.DataFrame):
|
|
265
|
+
The DataFrame containing prices for each series
|
|
266
|
+
"""
|
|
267
|
+
fig, ax = plt.subplots()
|
|
268
|
+
ax.plot(df.index, df["res"], label="Residuals")
|
|
269
|
+
|
|
270
|
+
fig.autofmt_xdate()
|
|
271
|
+
plt.xlabel('Month/Year')
|
|
272
|
+
plt.ylabel('Price ($)')
|
|
273
|
+
plt.title('Residual Plot')
|
|
274
|
+
plt.legend()
|
|
275
|
+
plt.show()
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def run_cadf_test(pair: List[str] | Tuple[str, ...], start: str, end: str) -> None:
|
|
279
|
+
"""
|
|
280
|
+
Performs the Cointegration Augmented Dickey-Fuller (CADF) test on a pair of stock tickers
|
|
281
|
+
over a specified date range to check for cointegration.
|
|
282
|
+
|
|
283
|
+
The function downloads historical adjusted closing prices for the specified pair of stock tickers,
|
|
284
|
+
calculates the optimal hedge ratio (beta) using Ordinary Least Squares (OLS) regression, plots the
|
|
285
|
+
time series and their residuals, and finally performs the CADF test on the residuals.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
pair (List[str] or Tuple[str, ...]):
|
|
289
|
+
A list or tuple containing two valid stock tickers (e.g., ['AAPL', 'MSFT']).
|
|
290
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
291
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
292
|
+
|
|
293
|
+
Example:
|
|
294
|
+
>>> from bbstrader.tseries import run_cadf_test
|
|
295
|
+
>>> run_cadf_test(['AAPL', 'MSFT'], '2023-01-01', '2023-12-31')
|
|
296
|
+
>>> Regression Metrics:
|
|
297
|
+
>>> Optimal Hedge Ratio (Beta): 2.2485845594120333
|
|
298
|
+
>>> Result Parmas:
|
|
299
|
+
|
|
300
|
+
>>> const -74.418034
|
|
301
|
+
>>> AAPL 2.248585
|
|
302
|
+
>>> dtype: float64
|
|
303
|
+
|
|
304
|
+
>>> Regression Summary:
|
|
305
|
+
>>> OLS Regression Results
|
|
306
|
+
>>> ==============================================================================
|
|
307
|
+
>>> Dep. Variable: MSFT R-squared: 0.900
|
|
308
|
+
>>> Model: OLS Adj. R-squared: 0.900
|
|
309
|
+
>>> Method: Least Squares F-statistic: 2244.
|
|
310
|
+
>>> Date: Sat, 20 Jul 2024 Prob (F-statistic): 2.95e-126
|
|
311
|
+
>>> Time: 13:36:58 Log-Likelihood: -996.45
|
|
312
|
+
>>> No. Observations: 250 AIC: 1997.
|
|
313
|
+
>>> Df Residuals: 248 BIC: 2004.
|
|
314
|
+
>>> Df Model: 1
|
|
315
|
+
>>> Covariance Type: nonrobust
|
|
316
|
+
>>> ==============================================================================
|
|
317
|
+
>>> coef std err t P>|t| [0.025 0.975]
|
|
318
|
+
>>> ------------------------------------------------------------------------------
|
|
319
|
+
>>> const -74.4180 8.191 -9.085 0.000 -90.551 -58.286
|
|
320
|
+
>>> AAPL 2.2486 0.047 47.369 0.000 2.155 2.342
|
|
321
|
+
>>> ==============================================================================
|
|
322
|
+
>>> Omnibus: 4.923 Durbin-Watson: 0.121
|
|
323
|
+
>>> Prob(Omnibus): 0.085 Jarque-Bera (JB): 4.862
|
|
324
|
+
>>> Skew: 0.342 Prob(JB): 0.0879
|
|
325
|
+
>>> Kurtosis: 2.993 Cond. No. 1.71e+03
|
|
326
|
+
>>> ==============================================================================
|
|
327
|
+
|
|
328
|
+
>>> Notes:
|
|
329
|
+
>>> [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
|
|
330
|
+
>>> [2] The condition number is large, 1.71e+03. This might indicate that there are
|
|
331
|
+
>>> strong multicollinearity or other numerical problems.
|
|
332
|
+
|
|
333
|
+
>>> Cointegration TEST Results:
|
|
334
|
+
>>> (np.float64(-3.204126144947765),
|
|
335
|
+
>>> np.float64(0.019747080611767602),
|
|
336
|
+
>>> 0,
|
|
337
|
+
>>> 249,
|
|
338
|
+
>>> {'1%': np.float64(-3.4568881317725864),
|
|
339
|
+
>>> '10%': np.float64(-2.5729936189738876),
|
|
340
|
+
>>> '5%': np.float64(-2.8732185133016057)},
|
|
341
|
+
>>> np.float64(1364.3866758546171))
|
|
342
|
+
"""
|
|
343
|
+
# Download historical data for required stocks
|
|
344
|
+
p0, p1 = pair[0], pair[1]
|
|
345
|
+
_p0 = yf.download(p0, start=start, end=end)
|
|
346
|
+
_p1 = yf.download(p1, start=start, end=end)
|
|
347
|
+
df = pd.DataFrame(index=_p0.index)
|
|
348
|
+
df[p0] = _p0["Adj Close"]
|
|
349
|
+
df[p1] = _p1["Adj Close"]
|
|
350
|
+
df = df.dropna()
|
|
351
|
+
|
|
352
|
+
# Calculate optimal hedge ratio "beta"
|
|
353
|
+
# using statsmodels OLS
|
|
354
|
+
X = sm.add_constant(df[p0])
|
|
355
|
+
y = df[p1]
|
|
356
|
+
model = sm.OLS(y, X)
|
|
357
|
+
global results
|
|
358
|
+
results = model.fit()
|
|
359
|
+
beta_hr = results.params[p0]
|
|
360
|
+
|
|
361
|
+
# Plot the two time series with regression line
|
|
362
|
+
plot_price_series(df, p0, p1)
|
|
363
|
+
|
|
364
|
+
# Display a scatter plot of the two time series
|
|
365
|
+
# with regression line
|
|
366
|
+
plot_scatter_series(df, p0, p1)
|
|
367
|
+
|
|
368
|
+
# Calculate the residuals of the linear combination
|
|
369
|
+
df["res"] = results.resid
|
|
370
|
+
plot_residuals(df)
|
|
371
|
+
|
|
372
|
+
# Display regression metrics
|
|
373
|
+
print("\nRegression Metrics:")
|
|
374
|
+
print(f"Optimal Hedge Ratio (Beta): {beta_hr}")
|
|
375
|
+
print(f'Result Parmas: \n')
|
|
376
|
+
print(results.params)
|
|
377
|
+
print("\nRegression Summary:")
|
|
378
|
+
print(results.summary())
|
|
379
|
+
|
|
380
|
+
# Calculate and output the CADF test on the residuals
|
|
381
|
+
print("\nCointegration TEST Results:")
|
|
382
|
+
cadf = ts.adfuller(df["res"], autolag='AIC')
|
|
383
|
+
pprint.pprint(cadf)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _hurst(ts):
|
|
387
|
+
"""
|
|
388
|
+
Returns the Hurst Exponent of the time series vector ts,
|
|
389
|
+
"""
|
|
390
|
+
# Create the range of lag values
|
|
391
|
+
lags = range(2, 100)
|
|
392
|
+
|
|
393
|
+
# Calculate the array of the variances of the lagged differences
|
|
394
|
+
tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
|
|
395
|
+
|
|
396
|
+
# Use a linear fit to estimate the Hurst Exponent
|
|
397
|
+
poly = polyfit(log(lags), log(tau), 1)
|
|
398
|
+
|
|
399
|
+
# Return the Hurst exponent from the polyfit output
|
|
400
|
+
return poly[0] * 2.0
|
|
401
|
+
|
|
402
|
+
# Function to calculate Hurst Exponent
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def hurst(time_series):
|
|
406
|
+
H, c, data_range = compute_Hc(time_series, kind='price', simplified=True)
|
|
407
|
+
return H
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def run_hurst_test(symbol: str, start: str, end: str):
|
|
411
|
+
"""
|
|
412
|
+
Calculates and prints the Hurst Exponent for a given stock's adjusted closing prices
|
|
413
|
+
within a specified date range, and for three generated series (Geometric Brownian Motion,
|
|
414
|
+
Mean-Reverting, and Trending).
|
|
415
|
+
|
|
416
|
+
The Hurst Exponent is used to determine the long-term memory of a time series.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
symbol (str): A valid stock ticker symbol (e.g., 'AAPL').
|
|
420
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
421
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
422
|
+
|
|
423
|
+
Example:
|
|
424
|
+
>>> from bbstrader.tseries import run_hurst_test
|
|
425
|
+
|
|
426
|
+
>>> run_hurst_test('AAPL', '2023-01-01', '2023-12-31')
|
|
427
|
+
"""
|
|
428
|
+
data = yf.download(symbol, start=start, end=end)
|
|
429
|
+
|
|
430
|
+
# Create a Geometric Brownian Motion, Mean-Reverting, and Trending Series
|
|
431
|
+
gbm = log(cumsum(randn(100000))+1000)
|
|
432
|
+
mr = log(randn(100000)+1000)
|
|
433
|
+
tr = log(cumsum(randn(100000)+1)+1000)
|
|
434
|
+
|
|
435
|
+
# Output the Hurst Exponent for each of the series
|
|
436
|
+
print(f"\nHurst(GBM): {_hurst(gbm)}")
|
|
437
|
+
print(f"Hurst(MR): {_hurst(mr)}")
|
|
438
|
+
print(f"Hurst(TR): {_hurst(tr)}")
|
|
439
|
+
print(f"Hurst({symbol}): {hurst(data['Adj Close'])}\n")
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def test_cointegration(ticker1, ticker2, start, end):
|
|
443
|
+
# Download historical data
|
|
444
|
+
stock_data_pair = yf.download(
|
|
445
|
+
[ticker1, ticker2], start=start, end=end
|
|
446
|
+
)['Adj Close'].dropna()
|
|
447
|
+
|
|
448
|
+
# Perform Johansen cointegration test
|
|
449
|
+
result = coint_johansen(stock_data_pair, det_order=0, k_ar_diff=1)
|
|
450
|
+
|
|
451
|
+
# Get the cointegration rank
|
|
452
|
+
traces_stats = result.lr1
|
|
453
|
+
print(f"\nTraces Stats: \n{traces_stats}")
|
|
454
|
+
|
|
455
|
+
# Get the critical values for 95% confidence level
|
|
456
|
+
critical_values = result.cvt
|
|
457
|
+
print(f"\nCritical Values: \n{critical_values}")
|
|
458
|
+
|
|
459
|
+
# Compare the cointegration rank with critical values
|
|
460
|
+
if traces_stats[0] > critical_values[:, 1].all():
|
|
461
|
+
print(f"\n{ticker1} and {ticker2} are cointegrated.\n")
|
|
462
|
+
else:
|
|
463
|
+
print(f"\nNo cointegration found for {ticker1} and {ticker2}.\n")
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def run_coint_test(tickers: List[str], start: str, end: str) -> None:
|
|
467
|
+
"""
|
|
468
|
+
Performs pairwise cointegration tests on a list of stock tickers over a specified date range.
|
|
469
|
+
|
|
470
|
+
For each unique pair of tickers, the function downloads historical adjusted closing prices and
|
|
471
|
+
tests for cointegration.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
tickers (List[str]): A list of valid stock ticker symbols (e.g., ['AAPL', 'MSFT', 'GOOG']).
|
|
475
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
476
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
477
|
+
|
|
478
|
+
Example:
|
|
479
|
+
>>> from bbstrader.tseries import run_coint_test
|
|
480
|
+
|
|
481
|
+
>>> run_coint_test(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
|
|
482
|
+
"""
|
|
483
|
+
# Loop through ticker combinations
|
|
484
|
+
for ticker1, ticker2 in combinations(tickers, 2):
|
|
485
|
+
test_cointegration(ticker1, ticker2, start, end)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
# *********************************
|
|
489
|
+
# KALMAN FILTER *
|
|
490
|
+
# *********************************
|
|
491
|
+
def draw_date_coloured_scatterplot(etfs, prices):
|
|
492
|
+
"""
|
|
493
|
+
Create a scatterplot of the two ETF prices, which is
|
|
494
|
+
coloured by the date of the price to indicate the
|
|
495
|
+
changing relationship between the sets of prices
|
|
496
|
+
"""
|
|
497
|
+
plen = len(prices)
|
|
498
|
+
colour_map = plt.cm.get_cmap('YlOrRd')
|
|
499
|
+
colours = np.linspace(0.1, 1, plen)
|
|
500
|
+
|
|
501
|
+
scatterplot = plt.scatter(
|
|
502
|
+
prices[etfs[0]], prices[etfs[1]],
|
|
503
|
+
s=30, c=colours, cmap=colour_map,
|
|
504
|
+
edgecolor='k', alpha=0.8
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
colourbar = plt.colorbar(scatterplot)
|
|
508
|
+
colourbar.ax.set_yticklabels(
|
|
509
|
+
[str(p.date()) for p in prices[::plen//9].index]
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
plt.xlabel(prices.columns[0])
|
|
513
|
+
plt.ylabel(prices.columns[1])
|
|
514
|
+
plt.show()
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def calc_slope_intercept_kalman(etfs, prices):
|
|
518
|
+
"""
|
|
519
|
+
Utilize the Kalman Filter from the filterpy library
|
|
520
|
+
to calculate the slope and intercept of the regressed
|
|
521
|
+
ETF prices.
|
|
522
|
+
"""
|
|
523
|
+
delta = 1e-5
|
|
524
|
+
trans_cov = delta / (1 - delta) * np.eye(2)
|
|
525
|
+
|
|
526
|
+
kf = KalmanFilter(dim_x=2, dim_z=1)
|
|
527
|
+
kf.x = np.zeros((2, 1)) # Initial state
|
|
528
|
+
kf.P = np.ones((2, 2)) * 1000. # Initial covariance,
|
|
529
|
+
# large to represent high uncertainty
|
|
530
|
+
kf.F = np.eye(2) # State transition matrix
|
|
531
|
+
kf.Q = trans_cov # Process noise covariance
|
|
532
|
+
kf.R = 1. # Scalar measurement noise covariance
|
|
533
|
+
|
|
534
|
+
state_means, state_covs = [], []
|
|
535
|
+
for time, z in enumerate(prices[etfs[1]].values):
|
|
536
|
+
# Dynamically update the observation matrix H
|
|
537
|
+
# to include the current independent variable
|
|
538
|
+
kf.H = np.array([[prices[etfs[0]][time], 1.0]])
|
|
539
|
+
kf.predict()
|
|
540
|
+
kf.update(z)
|
|
541
|
+
state_means.append(kf.x.copy())
|
|
542
|
+
state_covs.append(kf.P.copy())
|
|
543
|
+
|
|
544
|
+
return np.array(state_means), np.array(state_covs)
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def draw_slope_intercept_changes(prices, state_means):
|
|
548
|
+
"""
|
|
549
|
+
Plot the slope and intercept of the regressed ETF prices
|
|
550
|
+
between the two ETFs, with the changing values of the
|
|
551
|
+
Kalman Filter over time.
|
|
552
|
+
"""
|
|
553
|
+
print(f"First Slops : {state_means[0, 0]}")
|
|
554
|
+
print(f"First intercept : {state_means[0, 1]}")
|
|
555
|
+
pd.DataFrame({
|
|
556
|
+
'slope': state_means[:, 0].flatten(),
|
|
557
|
+
'intercept': state_means[:, 1].flatten()
|
|
558
|
+
}, index=prices.index
|
|
559
|
+
).plot(subplots=True)
|
|
560
|
+
plt.show()
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def run_kalman_filter(etfs: List[str] | Tuple[str, ...], start: str, end: str) -> None:
|
|
564
|
+
"""
|
|
565
|
+
Applies a Kalman filter to a pair of ETF adjusted closing prices within a specified date range
|
|
566
|
+
to estimate the slope and intercept over time.
|
|
567
|
+
|
|
568
|
+
The function downloads historical adjusted closing prices for the specified pair of ETFs,
|
|
569
|
+
visualizes their price relationship, calculates the Kalman filter estimates for the slope and
|
|
570
|
+
intercept, and visualizes the changes in these estimates over time.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
etfs (List[str] or Tuple[str, ...]):
|
|
574
|
+
A list or tuple containing two valid ETF tickers (e.g., ['SPY', 'QQQ']).
|
|
575
|
+
start (str): The start date for the historical data in 'YYYY-MM-DD' format.
|
|
576
|
+
end (str): The end date for the historical data in 'YYYY-MM-DD' format.
|
|
577
|
+
|
|
578
|
+
Example:
|
|
579
|
+
>>> from bbstrader.tseries import run_kalman_filter
|
|
580
|
+
|
|
581
|
+
>>> run_kalman_filter(['SPY', 'QQQ'], '2023-01-01', '2023-12-31')
|
|
582
|
+
"""
|
|
583
|
+
etf_df1 = yf.download(etfs[0], start, end)
|
|
584
|
+
etf_df2 = yf.download(etfs[1], start, end)
|
|
585
|
+
|
|
586
|
+
prices = pd.DataFrame(index=etf_df1.index)
|
|
587
|
+
prices[etfs[0]] = etf_df1["Adj Close"]
|
|
588
|
+
prices[etfs[1]] = etf_df2["Adj Close"]
|
|
589
|
+
|
|
590
|
+
draw_date_coloured_scatterplot(etfs, prices)
|
|
591
|
+
state_means, state_covs = calc_slope_intercept_kalman(etfs, prices)
|
|
592
|
+
draw_slope_intercept_changes(prices, state_means)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023-2024 Bertin Balouki SIMYELI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|