bbstrader 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/tseries.py CHANGED
@@ -6,9 +6,10 @@ tasks such as cointegration testing, volatility modeling,
6
6
  and filter-based estimation to assist in trading strategy development,
7
7
  market analysis, and financial data exploration.
8
8
  """
9
-
10
9
  import numpy as np
11
10
  import pandas as pd
11
+ import pprint
12
+ import warnings
12
13
  import yfinance as yf
13
14
  from arch import arch_model
14
15
  from statsmodels.tsa.arima.model import ARIMA
@@ -19,17 +20,19 @@ import statsmodels.tsa.stattools as ts
19
20
  from numpy import cumsum, log, polyfit, sqrt, std, subtract
20
21
  from numpy.random import randn
21
22
  from hurst import compute_Hc
23
+ from scipy.optimize import minimize
22
24
  from filterpy.kalman import KalmanFilter
23
25
  from statsmodels.tsa.vector_ar.vecm import coint_johansen
26
+ from statsmodels.graphics.tsaplots import plot_acf
24
27
  from itertools import combinations
25
28
  from typing import Union, List, Tuple
26
- import pprint
27
- import warnings
29
+ from statsmodels.stats.diagnostic import acorr_ljungbox
30
+ from arch.utility.exceptions import ConvergenceWarning as ArchWarning
31
+ from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
28
32
  warnings.filterwarnings("ignore")
33
+ warnings.filterwarnings("ignore", category=StatsWarning, module='statsmodels')
34
+ warnings.filterwarnings("ignore", category=ArchWarning, module='arch')
29
35
 
30
- # *******************************************
31
- # ARIMA AND GARCH MODELS *
32
- # *******************************************
33
36
 
34
37
  __all__ = [
35
38
  "load_and_prepare_data",
@@ -41,9 +44,17 @@ __all__ = [
41
44
  "run_cadf_test",
42
45
  "run_hurst_test",
43
46
  "run_coint_test",
44
- "run_kalman_filter"
47
+ "run_kalman_filter",
48
+ "ArimaGarchModel",
49
+ "KalmanFilterModel",
50
+ "OrnsteinUhlenbeckModel"
45
51
  ]
46
52
 
53
+ # *******************************************
54
+ # ARIMA AND GARCH MODELS *
55
+ # *******************************************
56
+
57
+
47
58
  def load_and_prepare_data(df: pd.DataFrame):
48
59
  """
49
60
  Prepares financial time series data for analysis.
@@ -74,7 +85,7 @@ def load_and_prepare_data(df: pd.DataFrame):
74
85
  return data
75
86
 
76
87
 
77
- def fit_best_arima(window_data: Union[pd.Series , np.ndarray]):
88
+ def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
78
89
  """
79
90
  Identifies and fits the best `ARIMA` model
80
91
  based on the Akaike Information Criterion `(AIC)`.
@@ -91,6 +102,11 @@ def fit_best_arima(window_data: Union[pd.Series , np.ndarray]):
91
102
  Returns:
92
103
  ARIMA result object: The fitted `ARIMA` model with the lowest `AIC`.
93
104
  """
105
+ if isinstance(window_data, pd.Series):
106
+ window_data = window_data.values
107
+
108
+ window_data = window_data[~(np.isnan(window_data) | np.isinf(window_data))]
109
+ # Fit ARIMA model with best parameters
94
110
  model = pm.auto_arima(
95
111
  window_data,
96
112
  start_p=1,
@@ -101,15 +117,21 @@ def fit_best_arima(window_data: Union[pd.Series , np.ndarray]):
101
117
  stepwise=True
102
118
  )
103
119
  final_order = model.order
104
- import warnings
105
- from statsmodels.tools.sm_exceptions import ConvergenceWarning
106
- warnings.filterwarnings("ignore", category=ConvergenceWarning)
107
- best_arima_model = ARIMA(
108
- window_data, order=final_order, missing='drop').fit()
109
- return best_arima_model
110
-
111
-
112
- def fit_garch(window_data: Union[pd.Series , np.ndarray]):
120
+ try:
121
+ best_arima_model = ARIMA(
122
+ window_data + 1e-5, order=final_order, missing='drop').fit()
123
+ return best_arima_model
124
+ except np.linalg.LinAlgError:
125
+ # Catch specific linear algebra errors
126
+ print("LinAlgError occurred, skipping this data point.")
127
+ return None
128
+ except Exception as e:
129
+ # Catch any other unexpected errors and log them
130
+ print(f"An error occurred: {e}")
131
+ return None
132
+
133
+
134
+ def fit_garch(window_data: Union[pd.Series, np.ndarray]):
113
135
  """
114
136
  Fits an `ARIMA` model to the data to get residuals,
115
137
  then fits a `GARCH(1,1)` model on these residuals.
@@ -126,6 +148,8 @@ def fit_garch(window_data: Union[pd.Series , np.ndarray]):
126
148
  object and the `GARCH` result object.
127
149
  """
128
150
  arima_result = fit_best_arima(window_data)
151
+ if arima_result is None:
152
+ return None, None
129
153
  resid = np.asarray(arima_result.resid)
130
154
  resid = resid[~(np.isnan(resid) | np.isinf(resid))]
131
155
  garch_model = arch_model(resid, p=1, q=1, rescale=False)
@@ -148,6 +172,8 @@ def predict_next_return(arima_result, garch_result):
148
172
  Returns:
149
173
  float: The predicted next return, adjusted for predicted volatility.
150
174
  """
175
+ if arima_result is None or garch_result is None:
176
+ return 0
151
177
  # Predict next value with ARIMA
152
178
  arima_pred = arima_result.forecast(steps=1)
153
179
  # Predict next volatility with GARCH
@@ -155,11 +181,14 @@ def predict_next_return(arima_result, garch_result):
155
181
  next_volatility = garch_pred.variance.iloc[-1, 0]
156
182
 
157
183
  # Combine predictions (return + volatility)
158
- next_return = arima_pred.values[0] + next_volatility
159
- return next_return
184
+ if not isinstance(arima_pred, np.ndarray):
185
+ pred = arima_pred.values[0]
186
+ else:
187
+ pred = arima_pred[0]
188
+ return pred + next_volatility
160
189
 
161
190
 
162
- def get_prediction(window_data: Union[pd.Series , np.ndarray]):
191
+ def get_prediction(window_data: Union[pd.Series, np.ndarray]):
163
192
  """
164
193
  Orchestrator function to get the next period's return prediction.
165
194
 
@@ -179,10 +208,274 @@ def get_prediction(window_data: Union[pd.Series , np.ndarray]):
179
208
  return prediction
180
209
 
181
210
 
211
+ class ArimaGarchModel():
212
+ """
213
+ This class implements a time serie model
214
+ that combines `ARIMA (AutoRegressive Integrated Moving Average)`
215
+ and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
216
+ to predict future returns based on historical price data.
217
+
218
+ The model is implemented in the following steps:
219
+ 1. Data Preparation: Load and prepare the historical price data.
220
+ 2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
221
+ 3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
222
+ 4. Trading Strategy: Execute the trading strategy based on the predictions.
223
+ 5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
224
+
225
+ Exemple:
226
+ >>> import yfinance as yf
227
+ >>> from bbstrader.strategies import ArimaGarchModel
228
+ >>> from bbstrader.tseries import load_and_prepare_data
229
+
230
+ >>> if __name__ == '__main__':
231
+ >>> # ARCH SPY Vectorize Backtest
232
+ >>> k = 252
233
+ >>> data = yf.download("SPY", start="2004-01-02", end="2015-12-31")
234
+ >>> arch = ArimaGarchModel("SPY", data, k=k)
235
+ >>> df = load_and_prepare_data(data)
236
+ >>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
237
+ >>> arch.backtest_strategy()
238
+ """
239
+
240
+ def __init__(self, symbol, data, k: int = 252):
241
+ """
242
+ Initializes the ArimaGarchStrategy class.
243
+
244
+ Args:
245
+ symbol (str): The ticker symbol for the financial instrument.
246
+ data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
247
+ k (int): The window size for rolling prediction in backtesting.
248
+ """
249
+ self.symbol = symbol
250
+ self.data = self.load_and_prepare_data(data)
251
+ self.k = k
252
+
253
+ # Step 1: Data Preparation
254
+ def load_and_prepare_data(self, df):
255
+ """
256
+ Prepares the dataset by calculating logarithmic returns
257
+ and differencing if necessary.
258
+
259
+ Args:
260
+ df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
261
+
262
+ Returns:
263
+ pd.DataFrame: The dataset with additional columns
264
+ for log returns and differenced log returns.
265
+ """
266
+ return load_and_prepare_data(df)
267
+
268
+ # Step 2: Modeling (ARIMA + GARCH)
269
+ def fit_best_arima(self, window_data):
270
+ """
271
+ Fits the ARIMA model to the provided window of data,
272
+ selecting the best model based on AIC.
273
+
274
+ Args:
275
+ window_data (np.array): The dataset for a specific window period.
276
+
277
+ Returns:
278
+ ARIMA model: The best fitted ARIMA model based on AIC.
279
+ """
280
+ return fit_best_arima(window_data)
281
+
282
+ def fit_garch(self, window_data):
283
+ """
284
+ Fits the GARCH model to the residuals of the best ARIMA model.
285
+
286
+ Args:
287
+ window_data (np.array): The dataset for a specific window period.
288
+
289
+ Returns:
290
+ tuple: Contains the ARIMA result and GARCH result.
291
+ """
292
+ return fit_garch(window_data)
293
+
294
+ def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
295
+ """
296
+ Displays the ARIMA and GARCH model results, including plotting
297
+ ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
298
+
299
+ Args:
300
+ window_data (np.array): The dataset for a specific window period.
301
+ acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
302
+
303
+ test_resid (bool, optional):
304
+ If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
305
+ """
306
+ arima_result = self.fit_best_arima(window_data)
307
+ resid = np.asarray(arima_result.resid)
308
+ resid = resid[~(np.isnan(resid) | np.isinf(resid))]
309
+ garch_model = arch_model(resid, p=1, q=1, rescale=False)
310
+ garch_result = garch_model.fit(disp='off')
311
+ residuals = garch_result.resid
312
+
313
+ # TODO : Plot the ACF of the residuals
314
+ if acf:
315
+ fig = plt.figure(figsize=(12, 8))
316
+ # Plot the ACF of ARIMA residuals
317
+ ax1 = fig.add_subplot(211, ylabel='ACF')
318
+ plot_acf(resid, alpha=0.05, ax=ax1, title='ACF of ARIMA Residuals')
319
+ ax1.set_xlabel('Lags')
320
+ ax1.grid(True)
321
+
322
+ # Plot the ACF of GARCH residuals on the same axes
323
+ ax2 = fig.add_subplot(212, ylabel='ACF')
324
+ plot_acf(residuals, alpha=0.05, ax=ax2,
325
+ title='ACF of GARCH Residuals')
326
+ ax2.set_xlabel('Lags')
327
+ ax2.grid(True)
328
+
329
+ # Plot the figure
330
+ plt.tight_layout()
331
+ plt.show()
332
+
333
+ # TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
334
+ if test_resid:
335
+ print(arima_result.summary())
336
+ print(garch_result.summary())
337
+ bp_test = acorr_ljungbox(resid, return_df=True)
338
+ print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
339
+
340
+ # Step 3: Prediction
341
+ def predict_next_return(self, arima_result, garch_result):
342
+ """
343
+ Predicts the next return using the ARIMA model
344
+ and the next volatility using the GARCH model.
345
+
346
+ Args:
347
+ arima_result (ARIMA model): The ARIMA model result.
348
+ garch_result (GARCH model): The GARCH model result.
349
+
350
+ Returns:
351
+ float: The predicted next return.
352
+ """
353
+ return predict_next_return(arima_result, garch_result)
354
+
355
+ def get_prediction(self, window_data):
356
+ """
357
+ Generates a prediction for the next return based on a window of data.
358
+
359
+ Args:
360
+ window_data (np.array): The dataset for a specific window period.
361
+
362
+ Returns:
363
+ float: The predicted next return.
364
+ """
365
+ return get_prediction(window_data)
366
+
367
+ def calculate_signals(self, window_data):
368
+ """
369
+ Calculates the trading signal based on the prediction.
370
+
371
+ Args:
372
+ window_data (np.array): The dataset for a specific window period.
373
+
374
+ Returns:
375
+ str: The trading signal ('LONG', 'SHORT', or None).
376
+ """
377
+ prediction = self.get_prediction(window_data)
378
+ if prediction > 0:
379
+ signal = "LONG"
380
+ elif prediction < 0:
381
+ signal = "SHORT"
382
+ else:
383
+ signal = None
384
+ return signal
385
+
386
+ # Step 4: Trading Strategy
387
+
388
+ def execute_trading_strategy(self, predictions):
389
+ """
390
+ Executes the trading strategy based on a list
391
+ of predictions, determining positions to take.
392
+
393
+ Args:
394
+ predictions (list): A list of predicted returns.
395
+
396
+ Returns:
397
+ list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
398
+ """
399
+ positions = [] # Long if 1, Short if -1
400
+ previous_position = 0 # Initial position
401
+ for prediction in predictions:
402
+ if prediction > 0:
403
+ current_position = 1 # Long
404
+ elif prediction < 0:
405
+ current_position = -1 # Short
406
+ else:
407
+ current_position = previous_position # Hold previous position
408
+ positions.append(current_position)
409
+ previous_position = current_position
410
+
411
+ return positions
412
+
413
+ # Step 5: Vectorized Backtesting
414
+ def generate_predictions(self):
415
+ """
416
+ Generator that yields predictions one by one.
417
+ """
418
+ data = self.data
419
+ window_size = self.k
420
+ for i in range(window_size, len(data)):
421
+ print(
422
+ f"Processing window {i - window_size + 1}/{len(data) - window_size}...")
423
+ window_data = data['diff_log_return'].iloc[i-window_size:i]
424
+ next_return = self.get_prediction(window_data)
425
+ yield next_return
426
+
427
+ def backtest_strategy(self):
428
+ """
429
+ Performs a backtest of the strategy over
430
+ the entire dataset, plotting cumulative returns.
431
+ """
432
+ data = self.data
433
+ window_size = self.k
434
+ print(
435
+ f"Starting backtesting for {self.symbol}\n"
436
+ f"Window size {window_size}.\n"
437
+ f"Total iterations: {len(data) - window_size}.\n")
438
+ predictions_generator = self.generate_predictions()
439
+
440
+ positions = self.execute_trading_strategy(predictions_generator)
441
+
442
+ strategy_returns = np.array(
443
+ positions[:-1]) * data['log_return'].iloc[window_size+1:].values
444
+ buy_and_hold = data['log_return'].iloc[window_size+1:].values
445
+ buy_and_hold_returns = np.cumsum(buy_and_hold)
446
+ cumulative_returns = np.cumsum(strategy_returns)
447
+ dates = data.index[window_size+1:]
448
+ self.plot_cumulative_returns(
449
+ cumulative_returns, buy_and_hold_returns, dates)
450
+
451
+ print("\nBacktesting completed !!")
452
+
453
+ # Function to plot the cumulative returns
454
+ def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
455
+ """
456
+ Plots the cumulative returns of the ARIMA+GARCH strategy against
457
+ a buy-and-hold strategy.
458
+
459
+ Args:
460
+ strategy_returns (np.array): Cumulative returns from the strategy.
461
+ buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
462
+ dates (pd.Index): The dates corresponding to the returns.
463
+ """
464
+ plt.figure(figsize=(14, 7))
465
+ plt.plot(dates, strategy_returns, label='ARIMA+GARCH ', color='blue')
466
+ plt.plot(dates, buy_and_hold_returns, label='Buy & Hold', color='red')
467
+ plt.xlabel('Time')
468
+ plt.ylabel('Cumulative Returns')
469
+ plt.title(f'ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})')
470
+ plt.legend()
471
+ plt.grid(True)
472
+ plt.show()
473
+
474
+
182
475
  # *********************************************
183
476
  # STATS TEST (Cointegration , Mean Reverting)*
184
477
  # *********************************************
185
- def get_corr(tickers: Union[List[str] , Tuple[str, ...]], start: str, end: str) -> None:
478
+ def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
186
479
  """
187
480
  Calculates and prints the correlation matrix of the adjusted closing prices
188
481
  for a given list of stock tickers within a specified date range.
@@ -275,7 +568,7 @@ def plot_residuals(df: pd.DataFrame):
275
568
  plt.show()
276
569
 
277
570
 
278
- def run_cadf_test(pair: Union[List[str] , Tuple[str, ...]], start: str, end: str) -> None:
571
+ def run_cadf_test(pair: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
279
572
  """
280
573
  Performs the Cointegration Augmented Dickey-Fuller (CADF) test on a pair of stock tickers
281
574
  over a specified date range to check for cointegration.
@@ -560,7 +853,9 @@ def draw_slope_intercept_changes(prices, state_means):
560
853
  plt.show()
561
854
 
562
855
 
563
- def run_kalman_filter(etfs: Union[List[str] , Tuple[str, ...]], start: str, end: str) -> None:
856
+ def run_kalman_filter(
857
+ etfs: Union[List[str], Tuple[str, ...]],
858
+ start: str, end: str) -> None:
564
859
  """
565
860
  Applies a Kalman filter to a pair of ETF adjusted closing prices within a specified date range
566
861
  to estimate the slope and intercept over time.
@@ -590,3 +885,298 @@ def run_kalman_filter(etfs: Union[List[str] , Tuple[str, ...]], start: str, end:
590
885
  draw_date_coloured_scatterplot(etfs, prices)
591
886
  state_means, state_covs = calc_slope_intercept_kalman(etfs, prices)
592
887
  draw_slope_intercept_changes(prices, state_means)
888
+
889
+
890
+ class KalmanFilterModel():
891
+ """
892
+ Implements a Kalman Filter model a recursive algorithm used for estimating
893
+ the state of a linear dynamic system from a series of noisy measurements.
894
+ It's designed to process market data, estimate dynamic parameters such as
895
+ the slope and intercept of price relationships,
896
+ forecast error and standard deviation of the predictions
897
+
898
+ You can learn more here https://en.wikipedia.org/wiki/Kalman_filter
899
+ """
900
+
901
+ def __init__(self, tickers: list | tuple, **kwargs):
902
+ """
903
+ Initializes the Kalman Filter strategy.
904
+
905
+ Args:
906
+ tickers :
907
+ A list or tuple of ticker symbols representing financial instruments.
908
+
909
+ kwargs : Keyword arguments for additional parameters,
910
+ specifically `delta` and `vt`
911
+ """
912
+ self.tickers = tickers
913
+ assert self.tickers is not None
914
+ self.latest_prices = np.array([-1.0, -1.0])
915
+ self.delta = kwargs.get("delta", 1e-4)
916
+ self.wt = self.delta/(1-self.delta) * np.eye(2)
917
+ self.vt = kwargs.get("vt", 1e-3)
918
+ self.theta = np.zeros(2)
919
+ self.P = np.zeros((2, 2))
920
+ self.R = None
921
+ self.kf = self._init_kalman()
922
+
923
+ def _init_kalman(self):
924
+ """
925
+ Initializes and returns a Kalman Filter configured
926
+ for the trading strategy. The filter is set up with initial
927
+ state and covariance, state transition matrix, process noise
928
+ and measurement noise covariances.
929
+ """
930
+ kf = KalmanFilter(dim_x=2, dim_z=1)
931
+ kf.x = np.zeros((2, 1)) # Initial state
932
+ kf.P = self.P # Initial covariance
933
+ kf.F = np.eye(2) # State transition matrix
934
+ kf.Q = self.wt # Process noise covariance
935
+ kf.R = 1. # Scalar measurement noise covariance
936
+
937
+ return kf
938
+
939
+ def calc_slope_intercep(self, prices: np.ndarray):
940
+ """
941
+ Calculates and returns the slope and intercept
942
+ of the relationship between the provided prices using the Kalman Filter.
943
+ This method updates the filter with the latest price and returns
944
+ the estimated slope and intercept.
945
+
946
+ Args:
947
+ prices : A numpy array of prices for two financial instruments.
948
+
949
+ Returns:
950
+ A tuple containing the slope and intercept of the relationship
951
+ """
952
+ kf = self.kf
953
+ kf.H = np.array([[prices[1], 1.0]])
954
+ kf.predict()
955
+ kf.update(prices[0])
956
+ slope = kf.x.copy().flatten()[0]
957
+ intercept = kf.x.copy().flatten()[1]
958
+
959
+ return slope, intercept
960
+
961
+ def calculate_etqt(self, prices: np.ndarray):
962
+ """
963
+ Calculates the forecast error and standard deviation of the predictions
964
+ using the Kalman Filter.
965
+
966
+ Args:
967
+ prices : A numpy array of prices for two financial instruments.
968
+
969
+ Returns:
970
+ A tuple containing the forecast error and standard deviation of the predictions.
971
+ """
972
+
973
+ self.latest_prices[0] = prices[0]
974
+ self.latest_prices[1] = prices[1]
975
+
976
+ if all(self.latest_prices > -1.0):
977
+ slope, intercept = self.calc_slope_intercep(self.latest_prices)
978
+
979
+ self.theta[0] = slope
980
+ self.theta[1] = intercept
981
+
982
+ # Create the observation matrix of the latest prices
983
+ # of Y and the intercept value (1.0) as well as the
984
+ # scalar value of the latest price from X
985
+ F = np.asarray([self.latest_prices[0], 1.0]).reshape((1, 2))
986
+ y = self.latest_prices[1]
987
+
988
+ # The prior value of the states {\theta_t} is
989
+ # distributed as a multivariate Gaussian with
990
+ # mean a_t and variance-covariance {R_t}
991
+ if self.R is not None:
992
+ self.R = self.C + self.wt
993
+ else:
994
+ self.R = np.zeros((2, 2))
995
+
996
+ # Calculate the Kalman Filter update
997
+ # ---------------------------------
998
+ # Calculate prediction of new observation
999
+ # as well as forecast error of that prediction
1000
+ yhat = F.dot(self.theta)
1001
+ et = y - yhat
1002
+
1003
+ # {Q_t} is the variance of the prediction of
1004
+ # observations and hence sqrt_Qt is the
1005
+ # standard deviation of the predictions
1006
+ Qt = F.dot(self.R).dot(F.T) + self.vt
1007
+ sqrt_Qt = np.sqrt(Qt)
1008
+
1009
+ # The posterior value of the states {\theta_t} is
1010
+ # distributed as a multivariate Gaussian with mean
1011
+ # {m_t} and variance-covariance {C_t}
1012
+ At = self.R.dot(F.T) / Qt
1013
+ self.theta = self.theta + At.flatten() * et
1014
+ self.C = self.R - At * F.dot(self.R)
1015
+ return (et, sqrt_Qt)
1016
+ else:
1017
+ return None
1018
+
1019
+ # ******************************************
1020
+ # ORNSTEIN UHLENBECK PROCESS *
1021
+ # ******************************************
1022
+
1023
+
1024
+ class OrnsteinUhlenbeck():
1025
+ """
1026
+ The Ornstein-Uhlenbeck process is a mathematical model
1027
+ used to describe the behavior of a mean-reverting stochastic process.
1028
+ We use it to model the price dynamics of an asset that tends
1029
+ to revert to a long-term mean.
1030
+
1031
+ We Estimate the drift (θ), volatility (σ), and long-term mean (μ)
1032
+ based on historical price data; then we Simulate the OU process
1033
+ using the estimated parameters.
1034
+
1035
+ https://en.wikipedia.org/wiki/Ornstein%E2%80%93Uhlenbeck_process
1036
+ """
1037
+
1038
+ def __init__(
1039
+ self, prices: np.ndarray,
1040
+ returns: bool = True, timeframe: str = "D1"
1041
+ ):
1042
+ """
1043
+ Initializes the OrnsteinUhlenbeck instance.
1044
+
1045
+ Args:
1046
+ prices (np.ndarray) : Historical close prices.
1047
+
1048
+ retrurns (bool) : Use it to indicate weither
1049
+ you want to simulate the returns or your raw data
1050
+
1051
+ timeframe (str) : The time frame for the Historical prices
1052
+ (1m, 5m, 15m, 30m, 1h, 4h, D1)
1053
+ """
1054
+ self.prices = prices
1055
+ if returns:
1056
+ series = pd.Series(self.prices)
1057
+ self.returns = series.pct_change().dropna().values
1058
+ else:
1059
+ self.returns = self.prices
1060
+
1061
+ time_frame_mapping = {
1062
+ '1m': 1 / (24 * 60), # 1 minute intervals
1063
+ '5m': 5 / (24 * 60), # 5 minute intervals
1064
+ '15m': 15 / (24 * 60), # 15 minute intervals
1065
+ '30m': 30 / (24 * 60), # 30 minute intervals
1066
+ '1h': 1 / 24, # 1 hour intervals
1067
+ '4h': 4 / 24, # 4 hour intervals
1068
+ 'D1': 1, # Daily intervals
1069
+ }
1070
+ if timeframe not in time_frame_mapping:
1071
+ raise ValueError("Unsupported time frame")
1072
+ self.tf = time_frame_mapping[timeframe]
1073
+
1074
+ params = self.estimate_parameters()
1075
+ self.mu_hat = params[0] # Mean (μ)
1076
+ self.theta_hat = params[1] # Drift (θ)
1077
+ self.sigma_hat = params[2] # Volatility (σ)
1078
+ print(f'Estimated μ: {self.mu_hat}')
1079
+ print(f'Estimated θ: {self.theta_hat}')
1080
+ print(f'Estimated σ: {self.sigma_hat}')
1081
+
1082
+ def ornstein_uhlenbeck(self, mu, theta, sigma, dt, X0, n):
1083
+ """
1084
+ Simulates the Ornstein-Uhlenbeck process.
1085
+
1086
+ Args:
1087
+ mu (float): Estimated long-term mean.
1088
+ theta (float): Estimated drift.
1089
+ sigma (float): Estimated volatility.
1090
+ dt (float): Time step.
1091
+ X0 (float): Initial value.
1092
+ n (int): Number of time steps.
1093
+
1094
+ Returns:
1095
+ np.ndarray : Simulated process.
1096
+ """
1097
+ x = np.zeros(n)
1098
+ x[0] = X0
1099
+ for t in range(1, n):
1100
+ dW = np.random.normal(loc=0, scale=np.sqrt(dt))
1101
+ # O-U process differential equation
1102
+ x[t] = x[t-1] + (theta * (mu - x[t-1]) * dt) + (sigma * dW)
1103
+ # dW is a Wiener process
1104
+ # (theta * (mu - x[t-1]) * dt) represents the mean-reverting tendency
1105
+ # (sigma * dW) represents the random volatility
1106
+ return x
1107
+
1108
+ def estimate_parameters(self):
1109
+ """
1110
+ Estimates the mean-reverting parameters (μ, θ, σ)
1111
+ using the negative log-likelihood.
1112
+
1113
+ Returns:
1114
+ Tuple: Estimated μ, θ, and σ.
1115
+ """
1116
+ initial_guess = [0, 0.1, np.std(self.returns)]
1117
+ result = minimize(
1118
+ self._neg_log_likelihood, initial_guess, args=(self.returns,)
1119
+ )
1120
+ mu, theta, sigma = result.x
1121
+ return mu, theta, sigma
1122
+
1123
+ def _neg_log_likelihood(self, params, returns):
1124
+ """
1125
+ Calculates the negative
1126
+ log-likelihood for parameter estimation.
1127
+
1128
+ Args:
1129
+ params (list): List of parameters [mu, theta, sigma].
1130
+ returns (np.ndarray): Historical returns.
1131
+
1132
+ Returns:
1133
+ float: Negative log-likelihood.
1134
+ """
1135
+ mu, theta, sigma = params
1136
+ dt = self.tf
1137
+ n = len(returns)
1138
+ ou_simulated = self.ornstein_uhlenbeck(
1139
+ mu, theta, sigma, dt, 0, n + 1
1140
+ )
1141
+ residuals = ou_simulated[1:n + 1] - returns
1142
+ neg_ll = 0.5 * np.sum(
1143
+ residuals**2
1144
+ ) / sigma**2 + 0.5 * n * np.log(2 * np.pi * sigma**2)
1145
+ return neg_ll
1146
+
1147
+ def simulate_process(self, rts=None, n=100, p=None):
1148
+ """
1149
+ Simulates the OU process multiple times .
1150
+
1151
+ Args:
1152
+ rts (np.ndarray): Historical returns.
1153
+ n (int): Number of simulations to perform.
1154
+ p (int): Number of time steps.
1155
+
1156
+ Returns:
1157
+ np.ndarray: 2D array representing simulated processes.
1158
+ """
1159
+ if rts is not None:
1160
+ returns = rts
1161
+ else:
1162
+ returns = self.returns
1163
+ if p is not None:
1164
+ T = p
1165
+ else:
1166
+ T = len(returns)
1167
+ dt = self.tf
1168
+
1169
+ dW_matrix = np.random.normal(
1170
+ loc=0, scale=np.sqrt(dt), size=(n, T)
1171
+ )
1172
+ simulations_matrix = np.zeros((n, T))
1173
+ simulations_matrix[:, 0] = returns[-1]
1174
+
1175
+ for t in range(1, T):
1176
+ simulations_matrix[:, t] = (
1177
+ simulations_matrix[:, t-1] +
1178
+ self.theta_hat * (
1179
+ self.mu_hat - simulations_matrix[:, t-1]) * dt +
1180
+ self.sigma_hat * dW_matrix[:, t]
1181
+ )
1182
+ return simulations_matrix