bbstrader 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/tseries.py CHANGED
@@ -8,6 +8,8 @@ market analysis, and financial data exploration.
8
8
  """
9
9
  import numpy as np
10
10
  import pandas as pd
11
+ import pprint
12
+ import warnings
11
13
  import yfinance as yf
12
14
  from arch import arch_model
13
15
  from statsmodels.tsa.arima.model import ARIMA
@@ -25,13 +27,12 @@ from statsmodels.graphics.tsaplots import plot_acf
25
27
  from itertools import combinations
26
28
  from typing import Union, List, Tuple
27
29
  from statsmodels.stats.diagnostic import acorr_ljungbox
28
- import pprint
29
- import warnings
30
+ from arch.utility.exceptions import ConvergenceWarning as ArchWarning
31
+ from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
30
32
  warnings.filterwarnings("ignore")
33
+ warnings.filterwarnings("ignore", category=StatsWarning, module='statsmodels')
34
+ warnings.filterwarnings("ignore", category=ArchWarning, module='arch')
31
35
 
32
- # *******************************************
33
- # ARIMA AND GARCH MODELS *
34
- # *******************************************
35
36
 
36
37
  __all__ = [
37
38
  "load_and_prepare_data",
@@ -49,6 +50,10 @@ __all__ = [
49
50
  "OrnsteinUhlenbeckModel"
50
51
  ]
51
52
 
53
+ # *******************************************
54
+ # ARIMA AND GARCH MODELS *
55
+ # *******************************************
56
+
52
57
 
53
58
  def load_and_prepare_data(df: pd.DataFrame):
54
59
  """
@@ -99,7 +104,7 @@ def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
99
104
  """
100
105
  if isinstance(window_data, pd.Series):
101
106
  window_data = window_data.values
102
-
107
+
103
108
  window_data = window_data[~(np.isnan(window_data) | np.isinf(window_data))]
104
109
  # Fit ARIMA model with best parameters
105
110
  model = pm.auto_arima(
@@ -112,10 +117,6 @@ def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
112
117
  stepwise=True
113
118
  )
114
119
  final_order = model.order
115
- from arch.utility.exceptions import ConvergenceWarning as ArchConvergenceWarning
116
- from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsConvergenceWarning
117
- warnings.filterwarnings("ignore", category=StatsConvergenceWarning)
118
- warnings.filterwarnings("ignore", category=ArchConvergenceWarning)
119
120
  try:
120
121
  best_arima_model = ARIMA(
121
122
  window_data + 1e-5, order=final_order, missing='drop').fit()
@@ -183,9 +184,10 @@ def predict_next_return(arima_result, garch_result):
183
184
  if not isinstance(arima_pred, np.ndarray):
184
185
  pred = arima_pred.values[0]
185
186
  else:
186
- pred = arima_pred[0]
187
+ pred = arima_pred[0]
187
188
  return pred + next_volatility
188
189
 
190
+
189
191
  def get_prediction(window_data: Union[pd.Series, np.ndarray]):
190
192
  """
191
193
  Orchestrator function to get the next period's return prediction.
@@ -206,156 +208,420 @@ def get_prediction(window_data: Union[pd.Series, np.ndarray]):
206
208
  return prediction
207
209
 
208
210
 
209
- # *********************************************
210
- # STATS TEST (Cointegration , Mean Reverting)*
211
- # *********************************************
212
- def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
211
+ class ArimaGarchModel():
213
212
  """
214
- Calculates and prints the correlation matrix of the adjusted closing prices
215
- for a given list of stock tickers within a specified date range.
213
+ This class implements a time serie model
214
+ that combines `ARIMA (AutoRegressive Integrated Moving Average)`
215
+ and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
216
+ to predict future returns based on historical price data.
216
217
 
217
- Args:
218
- tickers (Union[List[str] , Tuple[str, ...]]):
219
- A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
220
- start (str): The start date for the historical data in 'YYYY-MM-DD' format.
221
- end (str): The end date for the historical data in 'YYYY-MM-DD' format.
218
+ The model is implemented in the following steps:
219
+ 1. Data Preparation: Load and prepare the historical price data.
220
+ 2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
221
+ 3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
222
+ 4. Trading Strategy: Execute the trading strategy based on the predictions.
223
+ 5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
222
224
 
223
- Example:
224
- >>> from bbstrader.tseries import get_corr
225
- >>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
226
- """
227
- # Download historical data
228
- data = yf.download(tickers, start=start, end=end)['Adj Close']
225
+ Exemple:
226
+ >>> import yfinance as yf
227
+ >>> from bbstrader.strategies import ArimaGarchModel
228
+ >>> from bbstrader.tseries import load_and_prepare_data
229
229
 
230
- # Calculate correlation matrix
231
- correlation_matrix = data.corr()
230
+ >>> if __name__ == '__main__':
231
+ >>> # ARCH SPY Vectorize Backtest
232
+ >>> k = 252
233
+ >>> data = yf.download("SPY", start="2004-01-02", end="2015-12-31")
234
+ >>> arch = ArimaGarchModel("SPY", data, k=k)
235
+ >>> df = load_and_prepare_data(data)
236
+ >>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
237
+ >>> arch.backtest_strategy()
238
+ """
232
239
 
233
- # Display the matrix
234
- print(correlation_matrix)
240
+ def __init__(self, symbol, data, k: int = 252):
241
+ """
242
+ Initializes the ArimaGarchStrategy class.
235
243
 
244
+ Args:
245
+ symbol (str): The ticker symbol for the financial instrument.
246
+ data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
247
+ k (int): The window size for rolling prediction in backtesting.
248
+ """
249
+ self.symbol = symbol
250
+ self.data = self.load_and_prepare_data(data)
251
+ self.k = k
236
252
 
237
- def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
238
- """
239
- Plot both time series on the same line graph for
240
- the specified date range.
253
+ # Step 1: Data Preparation
254
+ def load_and_prepare_data(self, df):
255
+ """
256
+ Prepares the dataset by calculating logarithmic returns
257
+ and differencing if necessary.
241
258
 
242
- Args:
243
- df (pd.DataFrame):
244
- The DataFrame containing prices for each series
245
- ts1 (str): The first time series column name
246
- ts2 (str): The second time series column name
247
- """
248
- fig, ax = plt.subplots()
249
- ax.plot(df.index, df[ts1], label=ts1)
250
- ax.plot(df.index, df[ts2], label=ts2)
259
+ Args:
260
+ df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
251
261
 
252
- fig.autofmt_xdate()
253
- plt.xlabel('Month/Year')
254
- plt.ylabel('Price ($)')
255
- plt.title(f'{ts1} and {ts2} Daily Prices ')
256
- plt.legend()
257
- plt.show()
262
+ Returns:
263
+ pd.DataFrame: The dataset with additional columns
264
+ for log returns and differenced log returns.
265
+ """
266
+ return load_and_prepare_data(df)
258
267
 
268
+ # Step 2: Modeling (ARIMA + GARCH)
269
+ def fit_best_arima(self, window_data):
270
+ """
271
+ Fits the ARIMA model to the provided window of data,
272
+ selecting the best model based on AIC.
259
273
 
260
- def plot_scatter_series(df: pd.DataFrame, ts1: str, ts2: str):
261
- """
262
- Plot a scatter plot of both time series for
263
- via the provided DataFrame.
274
+ Args:
275
+ window_data (np.array): The dataset for a specific window period.
264
276
 
265
- Args:
266
- df (pd.DataFrame):
267
- The DataFrame containing prices for each series
268
- ts1 (str): The first time series column name
269
- ts2 (str): The second time series column name
270
- """
271
- plt.xlabel(f'{ts1} Price ($)')
272
- plt.ylabel(f'{ts2} Price ($)')
273
- plt.title(f'{ts1} and {ts2} Price Scatterplot')
274
- plt.scatter(df[ts1], df[ts2])
277
+ Returns:
278
+ ARIMA model: The best fitted ARIMA model based on AIC.
279
+ """
280
+ return fit_best_arima(window_data)
275
281
 
276
- # Plot the regression line
277
- plt.plot(df[ts1], results.fittedvalues,
278
- linestyle='--', color='red', linewidth=2,
279
- label='Regression Line'
280
- )
281
- plt.legend()
282
- plt.show()
282
+ def fit_garch(self, window_data):
283
+ """
284
+ Fits the GARCH model to the residuals of the best ARIMA model.
283
285
 
286
+ Args:
287
+ window_data (np.array): The dataset for a specific window period.
284
288
 
285
- def plot_residuals(df: pd.DataFrame):
286
- """
287
- Plot the residuals of OLS procedure for both
288
- time series.
289
+ Returns:
290
+ tuple: Contains the ARIMA result and GARCH result.
291
+ """
292
+ return fit_garch(window_data)
289
293
 
290
- Args:
291
- df (pd.DataFrame):
292
- The DataFrame containing prices for each series
293
- """
294
- fig, ax = plt.subplots()
295
- ax.plot(df.index, df["res"], label="Residuals")
294
+ def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
295
+ """
296
+ Displays the ARIMA and GARCH model results, including plotting
297
+ ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
296
298
 
297
- fig.autofmt_xdate()
298
- plt.xlabel('Month/Year')
299
- plt.ylabel('Price ($)')
300
- plt.title('Residual Plot')
301
- plt.legend()
302
- plt.show()
299
+ Args:
300
+ window_data (np.array): The dataset for a specific window period.
301
+ acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
303
302
 
303
+ test_resid (bool, optional):
304
+ If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
305
+ """
306
+ arima_result = self.fit_best_arima(window_data)
307
+ resid = np.asarray(arima_result.resid)
308
+ resid = resid[~(np.isnan(resid) | np.isinf(resid))]
309
+ garch_model = arch_model(resid, p=1, q=1, rescale=False)
310
+ garch_result = garch_model.fit(disp='off')
311
+ residuals = garch_result.resid
304
312
 
305
- def run_cadf_test(pair: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
306
- """
307
- Performs the Cointegration Augmented Dickey-Fuller (CADF) test on a pair of stock tickers
308
- over a specified date range to check for cointegration.
313
+ # TODO : Plot the ACF of the residuals
314
+ if acf:
315
+ fig = plt.figure(figsize=(12, 8))
316
+ # Plot the ACF of ARIMA residuals
317
+ ax1 = fig.add_subplot(211, ylabel='ACF')
318
+ plot_acf(resid, alpha=0.05, ax=ax1, title='ACF of ARIMA Residuals')
319
+ ax1.set_xlabel('Lags')
320
+ ax1.grid(True)
309
321
 
310
- The function downloads historical adjusted closing prices for the specified pair of stock tickers,
311
- calculates the optimal hedge ratio (beta) using Ordinary Least Squares (OLS) regression, plots the
312
- time series and their residuals, and finally performs the CADF test on the residuals.
322
+ # Plot the ACF of GARCH residuals on the same axes
323
+ ax2 = fig.add_subplot(212, ylabel='ACF')
324
+ plot_acf(residuals, alpha=0.05, ax=ax2,
325
+ title='ACF of GARCH Residuals')
326
+ ax2.set_xlabel('Lags')
327
+ ax2.grid(True)
313
328
 
314
- Args:
315
- pair (List[str] or Tuple[str, ...]):
316
- A list or tuple containing two valid stock tickers (e.g., ['AAPL', 'MSFT']).
317
- start (str): The start date for the historical data in 'YYYY-MM-DD' format.
318
- end (str): The end date for the historical data in 'YYYY-MM-DD' format.
329
+ # Plot the figure
330
+ plt.tight_layout()
331
+ plt.show()
319
332
 
320
- Example:
321
- >>> from bbstrader.tseries import run_cadf_test
322
- >>> run_cadf_test(['AAPL', 'MSFT'], '2023-01-01', '2023-12-31')
323
- >>> Regression Metrics:
324
- >>> Optimal Hedge Ratio (Beta): 2.2485845594120333
325
- >>> Result Parmas:
333
+ # TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
334
+ if test_resid:
335
+ print(arima_result.summary())
336
+ print(garch_result.summary())
337
+ bp_test = acorr_ljungbox(resid, return_df=True)
338
+ print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
326
339
 
327
- >>> const -74.418034
328
- >>> AAPL 2.248585
329
- >>> dtype: float64
340
+ # Step 3: Prediction
341
+ def predict_next_return(self, arima_result, garch_result):
342
+ """
343
+ Predicts the next return using the ARIMA model
344
+ and the next volatility using the GARCH model.
330
345
 
331
- >>> Regression Summary:
332
- >>> OLS Regression Results
333
- >>> ==============================================================================
334
- >>> Dep. Variable: MSFT R-squared: 0.900
335
- >>> Model: OLS Adj. R-squared: 0.900
336
- >>> Method: Least Squares F-statistic: 2244.
337
- >>> Date: Sat, 20 Jul 2024 Prob (F-statistic): 2.95e-126
338
- >>> Time: 13:36:58 Log-Likelihood: -996.45
339
- >>> No. Observations: 250 AIC: 1997.
340
- >>> Df Residuals: 248 BIC: 2004.
341
- >>> Df Model: 1
342
- >>> Covariance Type: nonrobust
343
- >>> ==============================================================================
344
- >>> coef std err t P>|t| [0.025 0.975]
345
- >>> ------------------------------------------------------------------------------
346
- >>> const -74.4180 8.191 -9.085 0.000 -90.551 -58.286
347
- >>> AAPL 2.2486 0.047 47.369 0.000 2.155 2.342
348
- >>> ==============================================================================
349
- >>> Omnibus: 4.923 Durbin-Watson: 0.121
350
- >>> Prob(Omnibus): 0.085 Jarque-Bera (JB): 4.862
351
- >>> Skew: 0.342 Prob(JB): 0.0879
352
- >>> Kurtosis: 2.993 Cond. No. 1.71e+03
353
- >>> ==============================================================================
346
+ Args:
347
+ arima_result (ARIMA model): The ARIMA model result.
348
+ garch_result (GARCH model): The GARCH model result.
354
349
 
355
- >>> Notes:
356
- >>> [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
357
- >>> [2] The condition number is large, 1.71e+03. This might indicate that there are
358
- >>> strong multicollinearity or other numerical problems.
350
+ Returns:
351
+ float: The predicted next return.
352
+ """
353
+ return predict_next_return(arima_result, garch_result)
354
+
355
+ def get_prediction(self, window_data):
356
+ """
357
+ Generates a prediction for the next return based on a window of data.
358
+
359
+ Args:
360
+ window_data (np.array): The dataset for a specific window period.
361
+
362
+ Returns:
363
+ float: The predicted next return.
364
+ """
365
+ return get_prediction(window_data)
366
+
367
+ def calculate_signals(self, window_data):
368
+ """
369
+ Calculates the trading signal based on the prediction.
370
+
371
+ Args:
372
+ window_data (np.array): The dataset for a specific window period.
373
+
374
+ Returns:
375
+ str: The trading signal ('LONG', 'SHORT', or None).
376
+ """
377
+ prediction = self.get_prediction(window_data)
378
+ if prediction > 0:
379
+ signal = "LONG"
380
+ elif prediction < 0:
381
+ signal = "SHORT"
382
+ else:
383
+ signal = None
384
+ return signal
385
+
386
+ # Step 4: Trading Strategy
387
+
388
+ def execute_trading_strategy(self, predictions):
389
+ """
390
+ Executes the trading strategy based on a list
391
+ of predictions, determining positions to take.
392
+
393
+ Args:
394
+ predictions (list): A list of predicted returns.
395
+
396
+ Returns:
397
+ list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
398
+ """
399
+ positions = [] # Long if 1, Short if -1
400
+ previous_position = 0 # Initial position
401
+ for prediction in predictions:
402
+ if prediction > 0:
403
+ current_position = 1 # Long
404
+ elif prediction < 0:
405
+ current_position = -1 # Short
406
+ else:
407
+ current_position = previous_position # Hold previous position
408
+ positions.append(current_position)
409
+ previous_position = current_position
410
+
411
+ return positions
412
+
413
+ # Step 5: Vectorized Backtesting
414
+ def generate_predictions(self):
415
+ """
416
+ Generator that yields predictions one by one.
417
+ """
418
+ data = self.data
419
+ window_size = self.k
420
+ for i in range(window_size, len(data)):
421
+ print(
422
+ f"Processing window {i - window_size + 1}/{len(data) - window_size}...")
423
+ window_data = data['diff_log_return'].iloc[i-window_size:i]
424
+ next_return = self.get_prediction(window_data)
425
+ yield next_return
426
+
427
+ def backtest_strategy(self):
428
+ """
429
+ Performs a backtest of the strategy over
430
+ the entire dataset, plotting cumulative returns.
431
+ """
432
+ data = self.data
433
+ window_size = self.k
434
+ print(
435
+ f"Starting backtesting for {self.symbol}\n"
436
+ f"Window size {window_size}.\n"
437
+ f"Total iterations: {len(data) - window_size}.\n")
438
+ predictions_generator = self.generate_predictions()
439
+
440
+ positions = self.execute_trading_strategy(predictions_generator)
441
+
442
+ strategy_returns = np.array(
443
+ positions[:-1]) * data['log_return'].iloc[window_size+1:].values
444
+ buy_and_hold = data['log_return'].iloc[window_size+1:].values
445
+ buy_and_hold_returns = np.cumsum(buy_and_hold)
446
+ cumulative_returns = np.cumsum(strategy_returns)
447
+ dates = data.index[window_size+1:]
448
+ self.plot_cumulative_returns(
449
+ cumulative_returns, buy_and_hold_returns, dates)
450
+
451
+ print("\nBacktesting completed !!")
452
+
453
+ # Function to plot the cumulative returns
454
+ def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
455
+ """
456
+ Plots the cumulative returns of the ARIMA+GARCH strategy against
457
+ a buy-and-hold strategy.
458
+
459
+ Args:
460
+ strategy_returns (np.array): Cumulative returns from the strategy.
461
+ buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
462
+ dates (pd.Index): The dates corresponding to the returns.
463
+ """
464
+ plt.figure(figsize=(14, 7))
465
+ plt.plot(dates, strategy_returns, label='ARIMA+GARCH ', color='blue')
466
+ plt.plot(dates, buy_and_hold_returns, label='Buy & Hold', color='red')
467
+ plt.xlabel('Time')
468
+ plt.ylabel('Cumulative Returns')
469
+ plt.title(f'ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})')
470
+ plt.legend()
471
+ plt.grid(True)
472
+ plt.show()
473
+
474
+
475
+ # *********************************************
476
+ # STATS TEST (Cointegration , Mean Reverting)*
477
+ # *********************************************
478
+ def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
479
+ """
480
+ Calculates and prints the correlation matrix of the adjusted closing prices
481
+ for a given list of stock tickers within a specified date range.
482
+
483
+ Args:
484
+ tickers (Union[List[str] , Tuple[str, ...]]):
485
+ A list or tuple of valid stock tickers (e.g., ['AAPL', 'MSFT', 'GOOG']).
486
+ start (str): The start date for the historical data in 'YYYY-MM-DD' format.
487
+ end (str): The end date for the historical data in 'YYYY-MM-DD' format.
488
+
489
+ Example:
490
+ >>> from bbstrader.tseries import get_corr
491
+ >>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
492
+ """
493
+ # Download historical data
494
+ data = yf.download(tickers, start=start, end=end)['Adj Close']
495
+
496
+ # Calculate correlation matrix
497
+ correlation_matrix = data.corr()
498
+
499
+ # Display the matrix
500
+ print(correlation_matrix)
501
+
502
+
503
+ def plot_price_series(df: pd.DataFrame, ts1: str, ts2: str):
504
+ """
505
+ Plot both time series on the same line graph for
506
+ the specified date range.
507
+
508
+ Args:
509
+ df (pd.DataFrame):
510
+ The DataFrame containing prices for each series
511
+ ts1 (str): The first time series column name
512
+ ts2 (str): The second time series column name
513
+ """
514
+ fig, ax = plt.subplots()
515
+ ax.plot(df.index, df[ts1], label=ts1)
516
+ ax.plot(df.index, df[ts2], label=ts2)
517
+
518
+ fig.autofmt_xdate()
519
+ plt.xlabel('Month/Year')
520
+ plt.ylabel('Price ($)')
521
+ plt.title(f'{ts1} and {ts2} Daily Prices ')
522
+ plt.legend()
523
+ plt.show()
524
+
525
+
526
+ def plot_scatter_series(df: pd.DataFrame, ts1: str, ts2: str):
527
+ """
528
+ Plot a scatter plot of both time series for
529
+ via the provided DataFrame.
530
+
531
+ Args:
532
+ df (pd.DataFrame):
533
+ The DataFrame containing prices for each series
534
+ ts1 (str): The first time series column name
535
+ ts2 (str): The second time series column name
536
+ """
537
+ plt.xlabel(f'{ts1} Price ($)')
538
+ plt.ylabel(f'{ts2} Price ($)')
539
+ plt.title(f'{ts1} and {ts2} Price Scatterplot')
540
+ plt.scatter(df[ts1], df[ts2])
541
+
542
+ # Plot the regression line
543
+ plt.plot(df[ts1], results.fittedvalues,
544
+ linestyle='--', color='red', linewidth=2,
545
+ label='Regression Line'
546
+ )
547
+ plt.legend()
548
+ plt.show()
549
+
550
+
551
+ def plot_residuals(df: pd.DataFrame):
552
+ """
553
+ Plot the residuals of OLS procedure for both
554
+ time series.
555
+
556
+ Args:
557
+ df (pd.DataFrame):
558
+ The DataFrame containing prices for each series
559
+ """
560
+ fig, ax = plt.subplots()
561
+ ax.plot(df.index, df["res"], label="Residuals")
562
+
563
+ fig.autofmt_xdate()
564
+ plt.xlabel('Month/Year')
565
+ plt.ylabel('Price ($)')
566
+ plt.title('Residual Plot')
567
+ plt.legend()
568
+ plt.show()
569
+
570
+
571
+ def run_cadf_test(pair: Union[List[str], Tuple[str, ...]], start: str, end: str) -> None:
572
+ """
573
+ Performs the Cointegration Augmented Dickey-Fuller (CADF) test on a pair of stock tickers
574
+ over a specified date range to check for cointegration.
575
+
576
+ The function downloads historical adjusted closing prices for the specified pair of stock tickers,
577
+ calculates the optimal hedge ratio (beta) using Ordinary Least Squares (OLS) regression, plots the
578
+ time series and their residuals, and finally performs the CADF test on the residuals.
579
+
580
+ Args:
581
+ pair (List[str] or Tuple[str, ...]):
582
+ A list or tuple containing two valid stock tickers (e.g., ['AAPL', 'MSFT']).
583
+ start (str): The start date for the historical data in 'YYYY-MM-DD' format.
584
+ end (str): The end date for the historical data in 'YYYY-MM-DD' format.
585
+
586
+ Example:
587
+ >>> from bbstrader.tseries import run_cadf_test
588
+ >>> run_cadf_test(['AAPL', 'MSFT'], '2023-01-01', '2023-12-31')
589
+ >>> Regression Metrics:
590
+ >>> Optimal Hedge Ratio (Beta): 2.2485845594120333
591
+ >>> Result Parmas:
592
+
593
+ >>> const -74.418034
594
+ >>> AAPL 2.248585
595
+ >>> dtype: float64
596
+
597
+ >>> Regression Summary:
598
+ >>> OLS Regression Results
599
+ >>> ==============================================================================
600
+ >>> Dep. Variable: MSFT R-squared: 0.900
601
+ >>> Model: OLS Adj. R-squared: 0.900
602
+ >>> Method: Least Squares F-statistic: 2244.
603
+ >>> Date: Sat, 20 Jul 2024 Prob (F-statistic): 2.95e-126
604
+ >>> Time: 13:36:58 Log-Likelihood: -996.45
605
+ >>> No. Observations: 250 AIC: 1997.
606
+ >>> Df Residuals: 248 BIC: 2004.
607
+ >>> Df Model: 1
608
+ >>> Covariance Type: nonrobust
609
+ >>> ==============================================================================
610
+ >>> coef std err t P>|t| [0.025 0.975]
611
+ >>> ------------------------------------------------------------------------------
612
+ >>> const -74.4180 8.191 -9.085 0.000 -90.551 -58.286
613
+ >>> AAPL 2.2486 0.047 47.369 0.000 2.155 2.342
614
+ >>> ==============================================================================
615
+ >>> Omnibus: 4.923 Durbin-Watson: 0.121
616
+ >>> Prob(Omnibus): 0.085 Jarque-Bera (JB): 4.862
617
+ >>> Skew: 0.342 Prob(JB): 0.0879
618
+ >>> Kurtosis: 2.993 Cond. No. 1.71e+03
619
+ >>> ==============================================================================
620
+
621
+ >>> Notes:
622
+ >>> [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
623
+ >>> [2] The condition number is large, 1.71e+03. This might indicate that there are
624
+ >>> strong multicollinearity or other numerical problems.
359
625
 
360
626
  >>> Cointegration TEST Results:
361
627
  >>> (np.float64(-3.204126144947765),
@@ -621,268 +887,138 @@ def run_kalman_filter(
621
887
  draw_slope_intercept_changes(prices, state_means)
622
888
 
623
889
 
624
- class ArimaGarchModel():
890
+ class KalmanFilterModel():
625
891
  """
626
- This class implements a time serie model
627
- that combines `ARIMA (AutoRegressive Integrated Moving Average)`
628
- and `GARCH (Generalized Autoregressive Conditional Heteroskedasticity)` models
629
- to predict future returns based on historical price data.
630
-
631
- The model is implemented in the following steps:
632
- 1. Data Preparation: Load and prepare the historical price data.
633
- 2. Modeling: Fit the ARIMA model to the data and then fit the GARCH model to the residuals.
634
- 3. Prediction: Predict the next return using the ARIMA model and the next volatility using the GARCH model.
635
- 4. Trading Strategy: Execute the trading strategy based on the predictions.
636
- 5. Vectorized Backtesting: Backtest the trading strategy using the historical data.
637
-
638
- Exemple:
639
- >>> import yfinance as yf
640
- >>> from bbstrader.strategies import ArimaGarchModel
641
- >>> from bbstrader.tseries import load_and_prepare_data
892
+ Implements a Kalman Filter model a recursive algorithm used for estimating
893
+ the state of a linear dynamic system from a series of noisy measurements.
894
+ It's designed to process market data, estimate dynamic parameters such as
895
+ the slope and intercept of price relationships,
896
+ forecast error and standard deviation of the predictions
642
897
 
643
- >>> if __name__ == '__main__':
644
- >>> # ARCH SPY Vectorize Backtest
645
- >>> k = 252
646
- >>> data = yf.download("SPY", start="2004-01-02", end="2015-12-31")
647
- >>> arch = ArimaGarchModel("SPY", data, k=k)
648
- >>> df = load_and_prepare_data(data)
649
- >>> arch.show_arima_garch_results(df['diff_log_return'].values[-k:])
650
- >>> arch.backtest_strategy()
898
+ You can learn more here https://en.wikipedia.org/wiki/Kalman_filter
651
899
  """
652
900
 
653
- def __init__(self, symbol, data, k: int = 252):
654
- """
655
- Initializes the ArimaGarchStrategy class.
656
-
657
- Args:
658
- symbol (str): The ticker symbol for the financial instrument.
659
- data (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
660
- k (int): The window size for rolling prediction in backtesting.
661
- """
662
- self.symbol = symbol
663
- self.data = self.load_and_prepare_data(data)
664
- self.k = k
665
-
666
- # Step 1: Data Preparation
667
- def load_and_prepare_data(self, df):
668
- """
669
- Prepares the dataset by calculating logarithmic returns
670
- and differencing if necessary.
671
-
672
- Args:
673
- df (pd.DataFrame): `The raw dataset containing at least the 'Close' prices`.
674
-
675
- Returns:
676
- pd.DataFrame: The dataset with additional columns
677
- for log returns and differenced log returns.
678
- """
679
- return load_and_prepare_data(df)
680
-
681
- # Step 2: Modeling (ARIMA + GARCH)
682
- def fit_best_arima(self, window_data):
683
- """
684
- Fits the ARIMA model to the provided window of data,
685
- selecting the best model based on AIC.
686
-
687
- Args:
688
- window_data (np.array): The dataset for a specific window period.
689
-
690
- Returns:
691
- ARIMA model: The best fitted ARIMA model based on AIC.
692
- """
693
- return fit_best_arima(window_data)
694
-
695
- def fit_garch(self, window_data):
696
- """
697
- Fits the GARCH model to the residuals of the best ARIMA model.
698
-
699
- Args:
700
- window_data (np.array): The dataset for a specific window period.
701
-
702
- Returns:
703
- tuple: Contains the ARIMA result and GARCH result.
704
- """
705
- return fit_garch(window_data)
706
-
707
- def show_arima_garch_results(self, window_data, acf=True, test_resid=True):
901
+ def __init__(self, tickers: list | tuple, **kwargs):
708
902
  """
709
- Displays the ARIMA and GARCH model results, including plotting
710
- ACF of residuals and conducting , Box-Pierce and Ljung-Box tests.
903
+ Initializes the Kalman Filter strategy.
711
904
 
712
905
  Args:
713
- window_data (np.array): The dataset for a specific window period.
714
- acf (bool, optional): If True, plot the ACF of residuals. Defaults to True.
715
-
716
- test_resid (bool, optional):
717
- If True, conduct Box-Pierce and Ljung-Box tests on residuals. Defaults to True.
718
- """
719
- arima_result = self.fit_best_arima(window_data)
720
- resid = np.asarray(arima_result.resid)
721
- resid = resid[~(np.isnan(resid) | np.isinf(resid))]
722
- garch_model = arch_model(resid, p=1, q=1, rescale=False)
723
- garch_result = garch_model.fit(disp='off')
724
- residuals = garch_result.resid
725
-
726
- # TODO : Plot the ACF of the residuals
727
- if acf:
728
- fig = plt.figure(figsize=(12, 8))
729
- # Plot the ACF of ARIMA residuals
730
- ax1 = fig.add_subplot(211, ylabel='ACF')
731
- plot_acf(resid, alpha=0.05, ax=ax1, title='ACF of ARIMA Residuals')
732
- ax1.set_xlabel('Lags')
733
- ax1.grid(True)
734
-
735
- # Plot the ACF of GARCH residuals on the same axes
736
- ax2 = fig.add_subplot(212, ylabel='ACF')
737
- plot_acf(residuals, alpha=0.05, ax=ax2,
738
- title='ACF of GARCH Residuals')
739
- ax2.set_xlabel('Lags')
740
- ax2.grid(True)
741
-
742
- # Plot the figure
743
- plt.tight_layout()
744
- plt.show()
745
-
746
- # TODO : Conduct Box-Pierce and Ljung-Box Tests of the residuals
747
- if test_resid:
748
- print(arima_result.summary())
749
- print(garch_result.summary())
750
- bp_test = acorr_ljungbox(resid, return_df=True)
751
- print("Box-Pierce and Ljung-Box Tests Results for ARIMA:\n", bp_test)
906
+ tickers :
907
+ A list or tuple of ticker symbols representing financial instruments.
752
908
 
753
- # Step 3: Prediction
754
- def predict_next_return(self, arima_result, garch_result):
909
+ kwargs : Keyword arguments for additional parameters,
910
+ specifically `delta` and `vt`
755
911
  """
756
- Predicts the next return using the ARIMA model
757
- and the next volatility using the GARCH model.
758
-
759
- Args:
760
- arima_result (ARIMA model): The ARIMA model result.
761
- garch_result (GARCH model): The GARCH model result.
912
+ self.tickers = tickers
913
+ assert self.tickers is not None
914
+ self.latest_prices = np.array([-1.0, -1.0])
915
+ self.delta = kwargs.get("delta", 1e-4)
916
+ self.wt = self.delta/(1-self.delta) * np.eye(2)
917
+ self.vt = kwargs.get("vt", 1e-3)
918
+ self.theta = np.zeros(2)
919
+ self.P = np.zeros((2, 2))
920
+ self.R = None
921
+ self.kf = self._init_kalman()
762
922
 
763
- Returns:
764
- float: The predicted next return.
923
+ def _init_kalman(self):
765
924
  """
766
- return predict_next_return(arima_result, garch_result)
767
-
768
- def get_prediction(self, window_data):
925
+ Initializes and returns a Kalman Filter configured
926
+ for the trading strategy. The filter is set up with initial
927
+ state and covariance, state transition matrix, process noise
928
+ and measurement noise covariances.
769
929
  """
770
- Generates a prediction for the next return based on a window of data.
771
-
772
- Args:
773
- window_data (np.array): The dataset for a specific window period.
930
+ kf = KalmanFilter(dim_x=2, dim_z=1)
931
+ kf.x = np.zeros((2, 1)) # Initial state
932
+ kf.P = self.P # Initial covariance
933
+ kf.F = np.eye(2) # State transition matrix
934
+ kf.Q = self.wt # Process noise covariance
935
+ kf.R = 1. # Scalar measurement noise covariance
774
936
 
775
- Returns:
776
- float: The predicted next return.
777
- """
778
- return get_prediction(window_data)
937
+ return kf
779
938
 
780
- def calculate_signals(self, window_data):
939
+ def calc_slope_intercep(self, prices: np.ndarray):
781
940
  """
782
- Calculates the trading signal based on the prediction.
941
+ Calculates and returns the slope and intercept
942
+ of the relationship between the provided prices using the Kalman Filter.
943
+ This method updates the filter with the latest price and returns
944
+ the estimated slope and intercept.
783
945
 
784
946
  Args:
785
- window_data (np.array): The dataset for a specific window period.
947
+ prices : A numpy array of prices for two financial instruments.
786
948
 
787
- Returns:
788
- str: The trading signal ('LONG', 'SHORT', or None).
789
- """
790
- prediction = self.get_prediction(window_data)
791
- if prediction > 0:
792
- signal = "LONG"
793
- elif prediction < 0:
794
- signal = "SHORT"
795
- else:
796
- signal = None
797
- return signal
949
+ Returns:
950
+ A tuple containing the slope and intercept of the relationship
951
+ """
952
+ kf = self.kf
953
+ kf.H = np.array([[prices[1], 1.0]])
954
+ kf.predict()
955
+ kf.update(prices[0])
956
+ slope = kf.x.copy().flatten()[0]
957
+ intercept = kf.x.copy().flatten()[1]
798
958
 
799
- # Step 4: Trading Strategy
959
+ return slope, intercept
800
960
 
801
- def execute_trading_strategy(self, predictions):
961
+ def calculate_etqt(self, prices: np.ndarray):
802
962
  """
803
- Executes the trading strategy based on a list
804
- of predictions, determining positions to take.
963
+ Calculates the forecast error and standard deviation of the predictions
964
+ using the Kalman Filter.
805
965
 
806
966
  Args:
807
- predictions (list): A list of predicted returns.
967
+ prices : A numpy array of prices for two financial instruments.
808
968
 
809
969
  Returns:
810
- list: A list of positions (1 for 'LONG', -1 for 'SHORT', 0 for 'HOLD').
970
+ A tuple containing the forecast error and standard deviation of the predictions.
811
971
  """
812
- positions = [] # Long if 1, Short if -1
813
- previous_position = 0 # Initial position
814
- for prediction in predictions:
815
- if prediction > 0:
816
- current_position = 1 # Long
817
- elif prediction < 0:
818
- current_position = -1 # Short
819
- else:
820
- current_position = previous_position # Hold previous position
821
- positions.append(current_position)
822
- previous_position = current_position
823
972
 
824
- return positions
973
+ self.latest_prices[0] = prices[0]
974
+ self.latest_prices[1] = prices[1]
825
975
 
826
- # Step 5: Vectorized Backtesting
827
- def generate_predictions(self):
828
- """
829
- Generator that yields predictions one by one.
830
- """
831
- data = self.data
832
- window_size = self.k
833
- for i in range(window_size, len(data)):
834
- print(
835
- f"Processing window {i - window_size + 1}/{len(data) - window_size}...")
836
- window_data = data['diff_log_return'].iloc[i-window_size:i]
837
- next_return = self.get_prediction(window_data)
838
- yield next_return
976
+ if all(self.latest_prices > -1.0):
977
+ slope, intercept = self.calc_slope_intercep(self.latest_prices)
839
978
 
840
- def backtest_strategy(self):
841
- """
842
- Performs a backtest of the strategy over
843
- the entire dataset, plotting cumulative returns.
844
- """
845
- data = self.data
846
- window_size = self.k
847
- print(
848
- f"Starting backtesting for {self.symbol}\n"
849
- f"Window size {window_size}.\n"
850
- f"Total iterations: {len(data) - window_size}.\n")
851
- predictions_generator = self.generate_predictions()
979
+ self.theta[0] = slope
980
+ self.theta[1] = intercept
852
981
 
853
- positions = self.execute_trading_strategy(predictions_generator)
982
+ # Create the observation matrix of the latest prices
983
+ # of Y and the intercept value (1.0) as well as the
984
+ # scalar value of the latest price from X
985
+ F = np.asarray([self.latest_prices[0], 1.0]).reshape((1, 2))
986
+ y = self.latest_prices[1]
854
987
 
855
- strategy_returns = np.array(
856
- positions[:-1]) * data['log_return'].iloc[window_size+1:].values
857
- buy_and_hold = data['log_return'].iloc[window_size+1:].values
858
- buy_and_hold_returns = np.cumsum(buy_and_hold)
859
- cumulative_returns = np.cumsum(strategy_returns)
860
- dates = data.index[window_size+1:]
861
- self.plot_cumulative_returns(
862
- cumulative_returns, buy_and_hold_returns, dates)
988
+ # The prior value of the states {\theta_t} is
989
+ # distributed as a multivariate Gaussian with
990
+ # mean a_t and variance-covariance {R_t}
991
+ if self.R is not None:
992
+ self.R = self.C + self.wt
993
+ else:
994
+ self.R = np.zeros((2, 2))
863
995
 
864
- print("\nBacktesting completed !!")
996
+ # Calculate the Kalman Filter update
997
+ # ---------------------------------
998
+ # Calculate prediction of new observation
999
+ # as well as forecast error of that prediction
1000
+ yhat = F.dot(self.theta)
1001
+ et = y - yhat
865
1002
 
866
- # Function to plot the cumulative returns
867
- def plot_cumulative_returns(self, strategy_returns, buy_and_hold_returns, dates):
868
- """
869
- Plots the cumulative returns of the ARIMA+GARCH strategy against
870
- a buy-and-hold strategy.
1003
+ # {Q_t} is the variance of the prediction of
1004
+ # observations and hence sqrt_Qt is the
1005
+ # standard deviation of the predictions
1006
+ Qt = F.dot(self.R).dot(F.T) + self.vt
1007
+ sqrt_Qt = np.sqrt(Qt)
871
1008
 
872
- Args:
873
- strategy_returns (np.array): Cumulative returns from the strategy.
874
- buy_and_hold_returns (np.array): Cumulative returns from a buy-and-hold strategy.
875
- dates (pd.Index): The dates corresponding to the returns.
876
- """
877
- plt.figure(figsize=(14, 7))
878
- plt.plot(dates, strategy_returns, label='ARIMA+GARCH ', color='blue')
879
- plt.plot(dates, buy_and_hold_returns, label='Buy & Hold', color='red')
880
- plt.xlabel('Time')
881
- plt.ylabel('Cumulative Returns')
882
- plt.title(f'ARIMA+GARCH Strategy vs. Buy & Hold on ({self.symbol})')
883
- plt.legend()
884
- plt.grid(True)
885
- plt.show()
1009
+ # The posterior value of the states {\theta_t} is
1010
+ # distributed as a multivariate Gaussian with mean
1011
+ # {m_t} and variance-covariance {C_t}
1012
+ At = self.R.dot(F.T) / Qt
1013
+ self.theta = self.theta + At.flatten() * et
1014
+ self.C = self.R - At * F.dot(self.R)
1015
+ return (et, sqrt_Qt)
1016
+ else:
1017
+ return None
1018
+
1019
+ # ******************************************
1020
+ # ORNSTEIN UHLENBECK PROCESS *
1021
+ # ******************************************
886
1022
 
887
1023
 
888
1024
  class OrnsteinUhlenbeck():
@@ -1044,133 +1180,3 @@ class OrnsteinUhlenbeck():
1044
1180
  self.sigma_hat * dW_matrix[:, t]
1045
1181
  )
1046
1182
  return simulations_matrix
1047
-
1048
-
1049
- class KalmanFilterModel():
1050
- """
1051
- Implements a Kalman Filter model a recursive algorithm used for estimating
1052
- the state of a linear dynamic system from a series of noisy measurements.
1053
- It's designed to process market data, estimate dynamic parameters such as
1054
- the slope and intercept of price relationships,
1055
- forecast error and standard deviation of the predictions
1056
-
1057
- You can learn more here https://en.wikipedia.org/wiki/Kalman_filter
1058
- """
1059
-
1060
- def __init__(self, tickers: list | tuple, **kwargs):
1061
- """
1062
- Initializes the Kalman Filter strategy.
1063
-
1064
- Args:
1065
- tickers :
1066
- A list or tuple of ticker symbols representing financial instruments.
1067
-
1068
- kwargs : Keyword arguments for additional parameters,
1069
- specifically `delta` and `vt`
1070
- """
1071
- self.tickers = tickers
1072
- assert self.tickers is not None
1073
- self.latest_prices = np.array([-1.0, -1.0])
1074
- self.delta = kwargs.get("delta", 1e-4)
1075
- self.wt = self.delta/(1-self.delta) * np.eye(2)
1076
- self.vt = kwargs.get("vt", 1e-3)
1077
- self.theta = np.zeros(2)
1078
- self.P = np.zeros((2, 2))
1079
- self.R = None
1080
- self.kf = self._init_kalman()
1081
-
1082
- def _init_kalman(self):
1083
- """
1084
- Initializes and returns a Kalman Filter configured
1085
- for the trading strategy. The filter is set up with initial
1086
- state and covariance, state transition matrix, process noise
1087
- and measurement noise covariances.
1088
- """
1089
- kf = KalmanFilter(dim_x=2, dim_z=1)
1090
- kf.x = np.zeros((2, 1)) # Initial state
1091
- kf.P = self.P # Initial covariance
1092
- kf.F = np.eye(2) # State transition matrix
1093
- kf.Q = self.wt # Process noise covariance
1094
- kf.R = 1. # Scalar measurement noise covariance
1095
-
1096
- return kf
1097
-
1098
- def calc_slope_intercep(self, prices: np.ndarray):
1099
- """
1100
- Calculates and returns the slope and intercept
1101
- of the relationship between the provided prices using the Kalman Filter.
1102
- This method updates the filter with the latest price and returns
1103
- the estimated slope and intercept.
1104
-
1105
- Args:
1106
- prices : A numpy array of prices for two financial instruments.
1107
-
1108
- Returns:
1109
- A tuple containing the slope and intercept of the relationship
1110
- """
1111
- kf = self.kf
1112
- kf.H = np.array([[prices[1], 1.0]])
1113
- kf.predict()
1114
- kf.update(prices[0])
1115
- slope = kf.x.copy().flatten()[0]
1116
- intercept = kf.x.copy().flatten()[1]
1117
-
1118
- return slope, intercept
1119
-
1120
- def calculate_etqt(self, prices: np.ndarray):
1121
- """
1122
- Calculates the forecast error and standard deviation of the predictions
1123
- using the Kalman Filter.
1124
-
1125
- Args:
1126
- prices : A numpy array of prices for two financial instruments.
1127
-
1128
- Returns:
1129
- A tuple containing the forecast error and standard deviation of the predictions.
1130
- """
1131
-
1132
- self.latest_prices[0] = prices[0]
1133
- self.latest_prices[1] = prices[1]
1134
-
1135
- if all(self.latest_prices > -1.0):
1136
- slope, intercept = self.calc_slope_intercep(self.latest_prices)
1137
-
1138
- self.theta[0] = slope
1139
- self.theta[1] = intercept
1140
-
1141
- # Create the observation matrix of the latest prices
1142
- # of Y and the intercept value (1.0) as well as the
1143
- # scalar value of the latest price from X
1144
- F = np.asarray([self.latest_prices[0], 1.0]).reshape((1, 2))
1145
- y = self.latest_prices[1]
1146
-
1147
- # The prior value of the states {\theta_t} is
1148
- # distributed as a multivariate Gaussian with
1149
- # mean a_t and variance-covariance {R_t}
1150
- if self.R is not None:
1151
- self.R = self.C + self.wt
1152
- else:
1153
- self.R = np.zeros((2, 2))
1154
-
1155
- # Calculate the Kalman Filter update
1156
- # ---------------------------------
1157
- # Calculate prediction of new observation
1158
- # as well as forecast error of that prediction
1159
- yhat = F.dot(self.theta)
1160
- et = y - yhat
1161
-
1162
- # {Q_t} is the variance of the prediction of
1163
- # observations and hence sqrt_Qt is the
1164
- # standard deviation of the predictions
1165
- Qt = F.dot(self.R).dot(F.T) + self.vt
1166
- sqrt_Qt = np.sqrt(Qt)
1167
-
1168
- # The posterior value of the states {\theta_t} is
1169
- # distributed as a multivariate Gaussian with mean
1170
- # {m_t} and variance-covariance {C_t}
1171
- At = self.R.dot(F.T) / Qt
1172
- self.theta = self.theta + At.flatten() * et
1173
- self.C = self.R - At * F.dot(self.R)
1174
- return (et, sqrt_Qt)
1175
- else:
1176
- return None