bbstrader 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/tseries.py CHANGED
@@ -1,36 +1,24 @@
1
1
  """
2
2
  The `tseries` module is a designed for conducting
3
- advanced time series analysis in financial markets.
4
- It leverages statistical models and algorithms to perform
5
- tasks such as cointegration testing, volatility modeling,
6
- and filter-based estimation to assist in trading strategy development,
7
- market analysis, and financial data exploration.
3
+ some simple time series analysis in financial markets.
8
4
  """
9
5
 
10
6
  import pprint
11
7
  import warnings
12
- from itertools import combinations
13
8
  from typing import List, Tuple, Union
14
9
 
15
10
  import matplotlib.pyplot as plt
16
11
  import numpy as np
17
12
  import pandas as pd
18
- import pmdarima as pm
19
13
  import seaborn as sns
20
14
  import statsmodels.api as sm
21
15
  import statsmodels.tsa.stattools as ts
22
16
  import yfinance as yf
23
- from arch import arch_model
24
17
  from filterpy.kalman import KalmanFilter
25
- from hurst import compute_Hc
26
18
  from pykalman import KalmanFilter as PyKalmanFilter
27
- from scipy.optimize import minimize
28
19
  from sklearn.linear_model import LogisticRegressionCV
29
20
  from sklearn.model_selection import GridSearchCV
30
21
  from sklearn.tree import DecisionTreeClassifier
31
- from statsmodels.graphics.tsaplots import plot_acf
32
- from statsmodels.stats.diagnostic import acorr_ljungbox
33
- from statsmodels.tsa.arima.model import ARIMA
34
22
  from statsmodels.tsa.stattools import adfuller, coint
35
23
  from statsmodels.tsa.vector_ar.var_model import VAR
36
24
  from statsmodels.tsa.vector_ar.vecm import coint_johansen
@@ -40,19 +28,8 @@ warnings.filterwarnings("ignore")
40
28
 
41
29
 
42
30
  __all__ = [
43
- "load_and_prepare_data",
44
- "fit_best_arima",
45
- "fit_garch",
46
- "predict_next_return",
47
- "get_prediction",
48
- "get_corr",
49
- "run_cadf_test",
50
- "run_hurst_test",
51
- "run_coint_test",
52
31
  "run_kalman_filter",
53
- "ArimaGarchModel",
54
32
  "KalmanFilterModel",
55
- "OrnsteinUhlenbeck",
56
33
  "remove_correlated_assets",
57
34
  "check_stationarity",
58
35
  "remove_stationary_assets",
@@ -70,458 +47,54 @@ __all__ = [
70
47
  # *******************************************
71
48
 
72
49
 
73
- def load_and_prepare_data(df: pd.DataFrame):
74
- """
75
- Prepares financial time series data for analysis.
76
-
77
- This function takes a pandas DataFrame containing financial data,
78
- calculates logarithmic returns, and the first difference
79
- of these logarithmic returns. It handles missing values
80
- by filling them with zeros.
81
-
82
- Args:
83
- df (pd.DataFrame): DataFrame containing at least
84
- a `Close` column with closing prices of a financial asset.
85
-
86
- Returns:
87
- pd.DataFrame: DataFrame with additional
88
- columns for logarithmic returns (`log_return`)
89
- and the first difference of logarithmic returns (`diff_log_return`),
90
- with `NaN` values filled with `0`.
91
- """
92
- # Load data
93
- data = df.copy()
94
- # Calculate logarithmic returns
95
- data["log_return"] = np.log(data["Close"] / data["Close"].shift(1))
96
- # Differencing if necessary
97
- data["diff_log_return"] = data["log_return"].diff()
98
- # Drop NaN values
99
- data.fillna(0, inplace=True)
100
- return data
101
-
102
-
103
- def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
104
- """
105
- Identifies and fits the best `ARIMA` model
106
- based on the Akaike Information Criterion `(AIC)`.
107
-
108
- Iterates through different combinations of `p` and `q`
109
- parameters (within specified ranges) for the ARIMA model,
110
- fits them to the provided data, and selects the combination
111
- with the lowest `AIC` value.
50
+ def load_and_prepare_data(df):
51
+ warnings.warn("`load_and_prepare_data` is removed.", DeprecationWarning)
112
52
 
113
- Args:
114
- window_data (pd.Series or np.ndarray):
115
- Time series data to fit the `ARIMA` model on.
116
53
 
117
- Returns:
118
- ARIMA result object: The fitted `ARIMA` model with the lowest `AIC`.
119
- """
120
- if isinstance(window_data, pd.Series):
121
- window_data = window_data.values
122
-
123
- window_data = window_data[~(np.isnan(window_data) | np.isinf(window_data))]
124
- # Fit ARIMA model with best parameters
125
- model = pm.auto_arima(
126
- window_data,
127
- start_p=1,
128
- start_q=1,
129
- max_p=6,
130
- max_q=6,
131
- seasonal=False,
132
- stepwise=True,
54
+ def fit_best_arima(window_data):
55
+ warnings.warn(
56
+ "`fit_best_arima` is deprecated, use `pmdarima.auto_arima` instead.",
57
+ DeprecationWarning,
133
58
  )
134
- final_order = model.order
135
- from arch.utility.exceptions import ConvergenceWarning as ArchWarning
136
- from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
137
-
138
- with warnings.catch_warnings():
139
- warnings.filterwarnings("ignore", category=StatsWarning, module="statsmodels")
140
- warnings.filterwarnings("ignore", category=ArchWarning, module="arch")
141
- try:
142
- best_arima_model = ARIMA(
143
- window_data + 1e-5, order=final_order, missing="drop"
144
- ).fit()
145
- return best_arima_model
146
- except np.linalg.LinAlgError:
147
- # Catch specific linear algebra errors
148
- print("LinAlgError occurred, skipping this data point.")
149
- return None
150
- except Exception as e:
151
- # Catch any other unexpected errors and log them
152
- print(f"An error occurred: {e}")
153
- return None
154
-
155
-
156
- def fit_garch(window_data: Union[pd.Series, np.ndarray]):
157
- """
158
- Fits an `ARIMA` model to the data to get residuals,
159
- then fits a `GARCH(1,1)` model on these residuals.
160
-
161
- Utilizes the residuals from the best `ARIMA` model fit to
162
- then model volatility using a `GARCH(1,1)` model.
163
59
 
164
- Args:
165
- window_data (pd.Series or np.ndarray):
166
- Time series data for which to fit the `ARIMA` and `GARCH` models.
167
60
 
168
- Returns:
169
- tuple: A tuple containing the `ARIMA` result
170
- object and the `GARCH` result object.
171
- """
172
- arima_result = fit_best_arima(window_data)
173
- if arima_result is None:
174
- return None, None
175
- resid = np.asarray(arima_result.resid)
176
- resid = resid[~(np.isnan(resid) | np.isinf(resid))]
177
- garch_model = arch_model(resid, p=1, q=1, rescale=False)
178
- garch_result = garch_model.fit(disp="off")
179
- return arima_result, garch_result
61
+ def fit_garch(window_data):
62
+ warnings.warn(
63
+ "`fit_garch` is deprecated, use `arch.arch_model` instead.",
64
+ DeprecationWarning,
65
+ )
180
66
 
181
67
 
182
68
  def predict_next_return(arima_result, garch_result):
183
- """
184
- Predicts the next return value using fitted `ARIMA` and `GARCH` models.
185
-
186
- Combines the next period forecast from the `ARIMA` model
187
- with the next period volatility forecast from the `GARCH` model
188
- to predict the next return value.
189
-
190
- Args:
191
- arima_result (ARIMA result object): The fitted `ARIMA` model result.
192
- garch_result (ARCH result object): The fitted `GARCH` model result.
193
-
194
- Returns:
195
- float: The predicted next return, adjusted for predicted volatility.
196
- """
197
- if arima_result is None or garch_result is None:
198
- return 0
199
- # Predict next value with ARIMA
200
- arima_pred = arima_result.forecast(steps=1)
201
- # Predict next volatility with GARCH
202
- garch_pred = garch_result.forecast(horizon=1)
203
- next_volatility = garch_pred.variance.iloc[-1, 0]
204
-
205
- # Combine predictions (return + volatility)
206
- if not isinstance(arima_pred, np.ndarray):
207
- pred = arima_pred.values[0]
208
- else:
209
- pred = arima_pred[0]
210
- return pred + next_volatility
211
-
212
-
213
- def get_prediction(window_data: Union[pd.Series, np.ndarray]):
214
- """
215
- Orchestrator function to get the next period's return prediction.
216
-
217
- This function ties together the process of fitting
218
- both `ARIMA` and `GARCH` models on the provided data
219
- and then predicting the next period's return using these models.
69
+ warnings.warn(
70
+ "`predict_next_return` is deprecated.",
71
+ DeprecationWarning,
72
+ )
220
73
 
221
- Args:
222
- window_data (Union[pd.Series , np.ndarray]):
223
- Time series data to fit the models and predict the next return.
224
74
 
225
- Returns
226
- float: Predicted next return value.
227
- """
228
- arima_result, garch_result = fit_garch(window_data)
229
- prediction = predict_next_return(arima_result, garch_result)
230
- return prediction
75
+ def get_prediction(window_data):
76
+ warnings.warn(
77
+ "`get_prediction` is deprecated, ",
78
+ DeprecationWarning,
79
+ )
231
80
 
232
81
 
233
82
  class ArimaGarchModel:
234
- """
235
- This class implements a time serie model
236
- that combines `ARIMA (AutoRegressive Integrated Moving Average)`
237
- and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
238
- to predict future returns based on historical price data.
239
-
240
- The model is implemented in the following steps:
241
- 1. Data Preparation: Load and prepare the historical price data.
242
- 2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
243
- 3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
244
- 4. Trading Strategy: Execute the trading strategy based on the predictions.
245
- 5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
246
-
247
- Exemple:
248
- >>> import yfinance as yf
249
- >>> from bbstrader.tseries import ArimaGarchModel
250
- >>> from bbstrader.tseries import load_and_prepare_data
251
-
252
- >>> if __name__ == '__main__':
253
- >>> # ARCH SPY Vectorize Backtest
254
- >>> k = 252
255
- >>> data = yf.download("SPY", start="2010-01-02", end="2015-12-31")
256
- >>> arch = ArimaGarchModel("SPY", data, k=k)
257
- >>> df = load_and_prepare_data(data)
258
- >>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
259
- >>> arch.backtest_strategy()
260
- """
261
-
262
83
  def __init__(self, symbol, data, k: int = 252):
263
- """
264
- Initializes the ArimaGarchStrategy class.
265
-
266
- Args:
267
- symbol (str): The ticker symbol for the financial instrument.
268
- data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
269
- k (int): The window size for rolling prediction in backtesting.
270
- """
271
- self.symbol = symbol
272
- self.data = self.load_and_prepare_data(data)
273
- self.k = k
274
-
275
- # Step 1: Data Preparation
276
- def load_and_prepare_data(self, df):
277
- """
278
- Prepares the dataset by calculating logarithmic returns
279
- and differencing if necessary.
280
-
281
- Args:
282
- df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
283
-
284
- Returns:
285
- pd.DataFrame: The dataset with additional columns
286
- for log returns and differenced log returns.
287
- """
288
- return load_and_prepare_data(df)
289
-
290
- # Step 2: Modeling (ARIMA + GARCH)
291
- def fit_best_arima(self, window_data):
292
- """
293
- Fits the ARIMA model to the provided window of data,
294
- selecting the best model based on AIC.
295
-
296
- Args:
297
- window_data (np.array): The dataset for a specific window period.
298
-
299
- Returns:
300
- ARIMA model: The best fitted ARIMA model based on AIC.
301
- """
302
- return fit_best_arima(window_data)
303
-
304
- def fit_garch(self, window_data):
305
- """
306
- Fits the GARCH model to the residuals of the best ARIMA model.
307
-
308
- Args:
309
- window_data (np.array): The dataset for a specific window period.
310
-
311
- Returns:
312
- tuple: Contains the ARIMA result and GARCH result.
313
- """
314
- return fit_garch(window_data)
315
-
316
- def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
317
- """
318
- Displays the ARIMA and GARCH model results, including plotting
319
- ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
320
-
321
- Args:
322
- window_data (np.array): The dataset for a specific window period.
323
- acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
324
-
325
- test_resid (bool, optional):
326
- If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
327
- """
328
- arima_result = self.fit_best_arima(window_data)
329
- resid = np.asarray(arima_result.resid)
330
- resid = resid[~(np.isnan(resid) | np.isinf(resid))]
331
- garch_model = arch_model(resid, p=1, q=1, rescale=False)
332
- garch_result = garch_model.fit(disp="off")
333
- residuals = garch_result.resid
334
-
335
- # TODO : Plot the ACF of the residuals
336
- if acf:
337
- fig = plt.figure(figsize=(12, 8))
338
- # Plot the ACF of ARIMA residuals
339
- ax1 = fig.add_subplot(211, ylabel="ACF")
340
- plot_acf(resid, alpha=0.05, ax=ax1, title="ACF of ARIMA Residuals")
341
- ax1.set_xlabel("Lags")
342
- ax1.grid(True)
343
-
344
- # Plot the ACF of GARCH residuals on the same axes
345
- ax2 = fig.add_subplot(212, ylabel="ACF")
346
- plot_acf(residuals, alpha=0.05, ax=ax2, title="ACF of GARCH Residuals")
347
- ax2.set_xlabel("Lags")
348
- ax2.grid(True)
349
-
350
- # Plot the figure
351
- plt.tight_layout()
352
- plt.show()
353
-
354
- # TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
355
- if test_resid:
356
- print(arima_result.summary())
357
- print(garch_result.summary())
358
- bp_test = acorr_ljungbox(resid, return_df=True)
359
- print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
360
-
361
- # Step 3: Prediction
362
- def predict_next_return(self, arima_result, garch_result):
363
- """
364
- Predicts the next return using the ARIMA model
365
- and the next volatility using the GARCH model.
366
-
367
- Args:
368
- arima_result (ARIMA model): The ARIMA model result.
369
- garch_result (GARCH model): The GARCH model result.
370
-
371
- Returns:
372
- float: The predicted next return.
373
- """
374
- return predict_next_return(arima_result, garch_result)
375
-
376
- def get_prediction(self, window_data):
377
- """
378
- Generates a prediction for the next return based on a window of data.
379
-
380
- Args:
381
- window_data (np.array): The dataset for a specific window period.
382
-
383
- Returns:
384
- float: The predicted next return.
385
- """
386
- return get_prediction(window_data)
387
-
388
- def calculate_signals(self, window_data):
389
- """
390
- Calculates the trading signal based on the prediction.
391
-
392
- Args:
393
- window_data (np.array): The dataset for a specific window period.
394
-
395
- Returns:
396
- str: The trading signal ('LONG', 'SHORT', or None).
397
- """
398
- prediction = self.get_prediction(window_data)
399
- if prediction > 0:
400
- signal = "LONG"
401
- elif prediction < 0:
402
- signal = "SHORT"
403
- else:
404
- signal = None
405
- return signal
406
-
407
- # Step 4: Trading Strategy
408
-
409
- def execute_trading_strategy(self, predictions):
410
- """
411
- Executes the trading strategy based on a list
412
- of predictions, determining positions to take.
413
-
414
- Args:
415
- predictions (list): A list of predicted returns.
416
-
417
- Returns:
418
- list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
419
- """
420
- positions = [] # Long if 1, Short if -1
421
- previous_position = 0 # Initial position
422
- for prediction in predictions:
423
- if prediction > 0:
424
- current_position = 1 # Long
425
- elif prediction < 0:
426
- current_position = -1 # Short
427
- else:
428
- current_position = previous_position # Hold previous position
429
- positions.append(current_position)
430
- previous_position = current_position
431
-
432
- return positions
433
-
434
- # Step 5: Vectorized Backtesting
435
- def generate_predictions(self):
436
- """
437
- Generator that yields predictions one by one.
438
- """
439
- data = self.data
440
- window_size = self.k
441
- for i in range(window_size, len(data)):
442
- print(
443
- f"Processing window {i - window_size + 1}/{len(data) - window_size}..."
444
- )
445
- window_data = data["diff_log_return"].iloc[i - window_size : i]
446
- next_return = self.get_prediction(window_data)
447
- yield next_return
448
-
449
- def backtest_strategy(self):
450
- """
451
- Performs a backtest of the strategy over
452
- the entire dataset, plotting cumulative returns.
453
- """
454
- data = self.data
455
- window_size = self.k
456
- print(
457
- f"Starting backtesting for {self.symbol}\n"
458
- f"Window size {window_size}.\n"
459
- f"Total iterations: {len(data) - window_size}.\n"
84
+ warnings.warn(
85
+ "`ArimaGarchModel` is deprecated, use `pmdarima.auto_arima` and `arch.arch_model` instead.",
86
+ DeprecationWarning,
460
87
  )
461
- predictions_generator = self.generate_predictions()
462
-
463
- positions = self.execute_trading_strategy(predictions_generator)
464
-
465
- strategy_returns = (
466
- np.array(positions[:-1]) * data["log_return"].iloc[window_size + 1 :].values
467
- )
468
- buy_and_hold = data["log_return"].iloc[window_size + 1 :].values
469
- buy_and_hold_returns = np.cumsum(buy_and_hold)
470
- cumulative_returns = np.cumsum(strategy_returns)
471
- dates = data.index[window_size + 1 :]
472
- self.plot_cumulative_returns(cumulative_returns, buy_and_hold_returns, dates)
473
-
474
- print("\nBacktesting completed !!")
475
-
476
- # Function to plot the cumulative returns
477
- def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
478
- """
479
- Plots the cumulative returns of the ARIMA+GARCH strategy against
480
- a buy-and-hold strategy.
481
-
482
- Args:
483
- strategy_returns (np.array): Cumulative returns from the strategy.
484
- buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
485
- dates (pd.Index): The dates corresponding to the returns.
486
- """
487
- plt.figure(figsize=(14, 7))
488
- plt.plot(dates, strategy_returns, label="ARIMA+GARCH ", color="blue")
489
- plt.plot(dates, buy_and_hold_returns, label="Buy & Hold", color="red")
490
- plt.xlabel("Time")
491
- plt.ylabel("Cumulative Returns")
492
- plt.title(f"ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})")
493
- plt.legend()
494
- plt.grid(True)
495
- plt.show()
496
88
 
497
89
 
498
90
  # *********************************************
499
91
  # STATS TEST (Cointegration , Mean Reverting)*
500
92
  # *********************************************
501
93
  def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
502
- """
503
- Calculates and prints the correlation matrix of the adjusted closing prices
504
- for a given list of stock tickers within a specified date range.
505
-
506
- Args:
507
- tickers (Union[List[str] , Tuple[str, ...]]):
508
- A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
509
- start (str): The start date for the historical data in 'YYYY-MM-DD' format.
510
- end (str): The end date for the historical data in 'YYYY-MM-DD' format.
511
-
512
- Example:
513
- >>> from bbstrader.tseries import get_corr
514
- >>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
515
- """
516
- # Download historical data
517
- data = yf.download(tickers, start=start, end=end, multi_level_index=False, auto_adjust=True)
518
- data = data["Adj Close"] if "Adj Close" in data.columns else data["Close"]
519
-
520
- # Calculate correlation matrix
521
- correlation_matrix = data.corr()
522
-
523
- # Display the matrix
524
- print(correlation_matrix)
94
+ warnings.warn(
95
+ "`get_corr` is deprecated, use pandas DataFrame's `corr` method instead.",
96
+ DeprecationWarning,
97
+ )
525
98
 
526
99
 
527
100
  def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
@@ -722,119 +295,22 @@ def run_cadf_test(
722
295
  pprint.pprint(cadf)
723
296
 
724
297
 
725
- def _hurst(ts):
726
- """
727
- Returns the Hurst Exponent of the time series vector ts,
728
- """
729
- # Create the range of lag values
730
- lags = range(2, 100)
731
-
732
- # Calculate the array of the variances of the lagged differences
733
- tau = [np.sqrt(np.std(np.subtract(ts[lag:], ts[:-lag]))) for lag in lags]
734
-
735
- # Use a linear fit to estimate the Hurst Exponent
736
- poly = np.polyfit(np.log(lags), np.log(tau), 1)
737
-
738
- # Return the Hurst exponent from the polyfit output
739
- return poly[0] * 2.0
740
-
741
-
742
- # Function to calculate Hurst Exponent
743
-
744
-
745
- def hurst(time_series):
746
- H, c, data_range = compute_Hc(time_series, kind="price", simplified=True)
747
- return H
748
-
749
-
750
298
  def run_hurst_test(symbol: str, start: str, end: str):
751
- """
752
- Calculates and prints the Hurst Exponent for a given stock's adjusted closing prices
753
- within a specified date range, and for three generated series (Geometric Brownian Motion,
754
- Mean-Reverting, and Trending).
755
-
756
- The Hurst Exponent is used to determine the long-term memory of a time series.
757
-
758
- Args:
759
- symbol (str): A valid stock ticker symbol (e.g., 'AAPL').
760
- start (str): The start date for the historical data in 'YYYY-MM-DD' format.
761
- end (str): The end date for the historical data in 'YYYY-MM-DD' format.
762
-
763
- Example:
764
- >>> from bbstrader.tseries import run_hurst_test
765
-
766
- >>> run_hurst_test('AAPL', '2023-01-01', '2023-12-31')
767
- """
768
- data = yf.download(
769
- symbol,
770
- start=start,
771
- end=end,
772
- progress=False,
773
- multi_level_index=False,
774
- auto_adjust=True,
299
+ warnings.warn(
300
+ "`run_hurst_test` is deprecated, use `hurst.compute_Hc` instead.",
301
+ DeprecationWarning,
775
302
  )
776
303
 
777
- # Create a Geometric Brownian Motion, Mean-Reverting, and Trending Series
778
- gbm = np.log(np.cumsum(np.random.randn(100000)) + 1000)
779
- mr = np.log(np.random.randn(100000) + 1000)
780
- tr = np.log(np.cumsum(np.random.randn(100000) + 1) + 1000)
781
-
782
- # Output the Hurst Exponent for each of the series
783
- print(f"\nHurst(GBM): {_hurst(gbm)}")
784
- print(f"Hurst(MR): {_hurst(mr)}")
785
- print(f"Hurst(TR): {_hurst(tr)}")
786
- print(f"Hurst({symbol}): {hurst(data['Close'])}\n")
787
-
788
304
 
789
305
  def test_cointegration(ticker1, ticker2, start, end):
790
- # Download historical data
791
- stock_data_pair = yf.download(
792
- [ticker1, ticker2],
793
- start=start,
794
- end=end,
795
- progress=False,
796
- multi_level_index=False,
797
- auto_adjust=True,
798
- )["Close"].dropna()
799
-
800
- # Perform Johansen cointegration test
801
- result = coint_johansen(stock_data_pair, det_order=0, k_ar_diff=1)
802
-
803
- # Get the cointegration rank
804
- traces_stats = result.lr1
805
- print(f"\nTraces Stats: \n{traces_stats}")
806
-
807
- # Get the critical values for 95% confidence level
808
- critical_values = result.cvt
809
- print(f"\nCritical Values: \n{critical_values}")
810
-
811
- # Compare the cointegration rank with critical values
812
- if traces_stats[0] > critical_values[:, 1].all():
813
- print(f"\n{ticker1} and {ticker2} are cointegrated.\n")
814
- else:
815
- print(f"\nNo cointegration found for {ticker1} and {ticker2}.\n")
306
+ warnings.warn(
307
+ "`test_cointegration` is deprecated, see statsmodels.tsa.stattools.coint instead.",
308
+ DeprecationWarning,
309
+ )
816
310
 
817
311
 
818
312
  def run_coint_test(tickers: List[str], start: str, end: str) -> None:
819
- """
820
- Performs pairwise cointegration tests on a list of stock tickers over a specified date range.
821
-
822
- For each unique pair of tickers, the function downloads historical adjusted closing prices and
823
- tests for cointegration.
824
-
825
- Args:
826
- tickers (List[str]): A list of valid stock ticker symbols (e.g., ['AAPL', 'MSFT', 'GOOG']).
827
- start (str): The start date for the historical data in 'YYYY-MM-DD' format.
828
- end (str): The end date for the historical data in 'YYYY-MM-DD' format.
829
-
830
- Example:
831
- >>> from bbstrader.tseries import run_coint_test
832
-
833
- >>> run_coint_test(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
834
- """
835
- # Loop through ticker combinations
836
- for ticker1, ticker2 in combinations(tickers, 2):
837
- test_cointegration(ticker1, ticker2, start, end)
313
+ test_cointegration()
838
314
 
839
315
 
840
316
  # *********************************
@@ -1086,158 +562,12 @@ class KalmanFilterModel:
1086
562
  return None
1087
563
 
1088
564
 
1089
- # ******************************************
1090
- # ORNSTEIN UHLENBECK PROCESS *
1091
- # ******************************************
1092
-
1093
-
1094
565
  class OrnsteinUhlenbeck:
1095
- """
1096
- The Ornstein-Uhlenbeck process is a mathematical model
1097
- used to describe the behavior of a mean-reverting stochastic process.
1098
- We use it to model the price dynamics of an asset that tends
1099
- to revert to a long-term mean.
1100
-
1101
- We Estimate the drift (θ), volatility (σ), and long-term mean (μ)
1102
- based on historical price data; then we Simulate the OU process
1103
- using the estimated parameters.
1104
-
1105
- https://en.wikipedia.org/wiki/Ornstein%E2%80%93Uhlenbeck_process
1106
- """
1107
-
1108
566
  def __init__(self, prices: np.ndarray, returns: bool = True, timeframe: str = "D1"):
1109
- """
1110
- Initializes the OrnsteinUhlenbeck instance.
1111
-
1112
- Args:
1113
- prices (np.ndarray) : Historical close prices.
1114
-
1115
- retrurns (bool) : Use it to indicate weither
1116
- you want to simulate the returns or your raw data
1117
-
1118
- timeframe (str) : The time frame for the Historical prices
1119
- (1m, 5m, 15m, 30m, 1h, 4h, D1)
1120
- """
1121
- self.prices = prices
1122
- if returns:
1123
- series = pd.Series(self.prices)
1124
- self.returns = series.pct_change().dropna().values
1125
- else:
1126
- self.returns = self.prices
1127
-
1128
- time_frame_mapping = {
1129
- "1m": 1 / (24 * 60), # 1 minute intervals
1130
- "5m": 5 / (24 * 60), # 5 minute intervals
1131
- "15m": 15 / (24 * 60), # 15 minute intervals
1132
- "30m": 30 / (24 * 60), # 30 minute intervals
1133
- "1h": 1 / 24, # 1 hour intervals
1134
- "4h": 4 / 24, # 4 hour intervals
1135
- "D1": 1, # Daily intervals
1136
- }
1137
- if timeframe not in time_frame_mapping:
1138
- raise ValueError("Unsupported time frame")
1139
- self.tf = time_frame_mapping[timeframe]
1140
-
1141
- params = self.estimate_parameters()
1142
- self.mu_hat = params[0] # Mean (μ)
1143
- self.theta_hat = params[1] # Drift (θ)
1144
- self.sigma_hat = params[2] # Volatility (σ)
1145
- print(f"Estimated μ: {self.mu_hat}")
1146
- print(f"Estimated θ: {self.theta_hat}")
1147
- print(f"Estimated σ: {self.sigma_hat}")
1148
-
1149
- def ornstein_uhlenbeck(self, mu, theta, sigma, dt, X0, n):
1150
- """
1151
- Simulates the Ornstein-Uhlenbeck process.
1152
-
1153
- Args:
1154
- mu (float): Estimated long-term mean.
1155
- theta (float): Estimated drift.
1156
- sigma (float): Estimated volatility.
1157
- dt (float): Time step.
1158
- X0 (float): Initial value.
1159
- n (int): Number of time steps.
1160
-
1161
- Returns:
1162
- np.ndarray : Simulated process.
1163
- """
1164
- x = np.zeros(n)
1165
- x[0] = X0
1166
- for t in range(1, n):
1167
- dW = np.random.normal(loc=0, scale=np.sqrt(dt))
1168
- # O-U process differential equation
1169
- x[t] = x[t - 1] + (theta * (mu - x[t - 1]) * dt) + (sigma * dW)
1170
- # dW is a Wiener process
1171
- # (theta * (mu - x[t-1]) * dt) represents the mean-reverting tendency
1172
- # (sigma * dW) represents the random volatility
1173
- return x
1174
-
1175
- def estimate_parameters(self):
1176
- """
1177
- Estimates the mean-reverting parameters (μ, θ, σ)
1178
- using the negative log-likelihood.
1179
-
1180
- Returns:
1181
- Tuple: Estimated μ, θ, and σ.
1182
- """
1183
- initial_guess = [0, 0.1, np.std(self.returns)]
1184
- result = minimize(self._neg_log_likelihood, initial_guess, args=(self.returns,))
1185
- mu, theta, sigma = result.x
1186
- return mu, theta, sigma
1187
-
1188
- def _neg_log_likelihood(self, params, returns):
1189
- """
1190
- Calculates the negative
1191
- log-likelihood for parameter estimation.
1192
-
1193
- Args:
1194
- params (list): List of parameters [mu, theta, sigma].
1195
- returns (np.ndarray): Historical returns.
1196
-
1197
- Returns:
1198
- float: Negative log-likelihood.
1199
- """
1200
- mu, theta, sigma = params
1201
- dt = self.tf
1202
- n = len(returns)
1203
- ou_simulated = self.ornstein_uhlenbeck(mu, theta, sigma, dt, 0, n + 1)
1204
- residuals = ou_simulated[1 : n + 1] - returns
1205
- neg_ll = 0.5 * np.sum(residuals**2) / sigma**2 + 0.5 * n * np.log(
1206
- 2 * np.pi * sigma**2
567
+ warnings.warn(
568
+ "`OrnsteinUhlenbeck` is deprecated, use `statsmodels.tsa` instead.",
569
+ DeprecationWarning,
1207
570
  )
1208
- return neg_ll
1209
-
1210
- def simulate_process(self, returns=None, n=100, p=None):
1211
- """
1212
- Simulates the OU process multiple times .
1213
-
1214
- Args:
1215
- returns (np.ndarray): Historical returns.
1216
- n (int): Number of simulations to perform.
1217
- p (int): Number of time steps.
1218
-
1219
- Returns:
1220
- np.ndarray: 2D array representing simulated processes.
1221
- """
1222
- if returns is None:
1223
- returns = self.returns
1224
- if p is not None:
1225
- T = p
1226
- else:
1227
- T = len(returns)
1228
- dt = self.tf
1229
-
1230
- dW_matrix = np.random.normal(loc=0, scale=np.sqrt(dt), size=(n, T))
1231
- simulations_matrix = np.zeros((n, T))
1232
- simulations_matrix[:, 0] = returns[-1]
1233
-
1234
- for t in range(1, T):
1235
- simulations_matrix[:, t] = (
1236
- simulations_matrix[:, t - 1]
1237
- + self.theta_hat * (self.mu_hat - simulations_matrix[:, t - 1]) * dt
1238
- + self.sigma_hat * dW_matrix[:, t]
1239
- )
1240
- return simulations_matrix
1241
571
 
1242
572
 
1243
573
  def remove_correlated_assets(df: pd.DataFrame, cutoff=0.99):