bbstrader 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bbstrader might be problematic. Click here for more details.

bbstrader/tseries.py CHANGED
@@ -8,33 +8,34 @@ market analysis, and financial data exploration.
8
8
  """
9
9
  import pprint
10
10
  import warnings
11
+ from itertools import combinations
12
+ from typing import List, Tuple, Union
13
+
14
+ import matplotlib.pyplot as plt
11
15
  import numpy as np
12
16
  import pandas as pd
13
- from tqdm import tqdm
14
- import yfinance as yf
15
17
  import pmdarima as pm
16
18
  import seaborn as sns
17
19
  import statsmodels.api as sm
18
- import matplotlib.pyplot as plt
19
20
  import statsmodels.tsa.stattools as ts
20
- from hurst import compute_Hc
21
+ import yfinance as yf
21
22
  from arch import arch_model
22
- from scipy.optimize import minimize
23
23
  from filterpy.kalman import KalmanFilter
24
+ from hurst import compute_Hc
24
25
  from pykalman import KalmanFilter as PyKalmanFilter
25
- from statsmodels.tsa.vector_ar.vecm import coint_johansen
26
+ from scipy.optimize import minimize
27
+ from sklearn.linear_model import LogisticRegressionCV
28
+ from sklearn.model_selection import GridSearchCV
29
+ from sklearn.tree import DecisionTreeClassifier
26
30
  from statsmodels.graphics.tsaplots import plot_acf
27
- from statsmodels.tsa.stattools import adfuller, coint
31
+ from statsmodels.stats.diagnostic import acorr_ljungbox
28
32
  from statsmodels.tsa.arima.model import ARIMA
33
+ from statsmodels.tsa.stattools import adfuller, coint
29
34
  from statsmodels.tsa.vector_ar.var_model import VAR
30
- from sklearn.model_selection import GridSearchCV
31
- from sklearn.tree import DecisionTreeClassifier
32
- from sklearn.linear_model import LogisticRegressionCV
33
- from statsmodels.stats.diagnostic import acorr_ljungbox
34
- from itertools import combinations
35
- from typing import Union, List, Tuple
36
- warnings.filterwarnings("ignore")
35
+ from statsmodels.tsa.vector_ar.vecm import coint_johansen
36
+ from tqdm import tqdm
37
37
 
38
+ warnings.filterwarnings("ignore")
38
39
 
39
40
 
40
41
  __all__ = [
@@ -50,8 +51,17 @@ __all__ = [
50
51
  "run_kalman_filter",
51
52
  "ArimaGarchModel",
52
53
  "KalmanFilterModel",
53
- "OrnsteinUhlenbeckModel",
54
-
54
+ "OrnsteinUhlenbeck",
55
+ "remove_correlated_assets",
56
+ "check_stationarity",
57
+ "remove_stationary_assets",
58
+ "select_assets",
59
+ "compute_pair_metrics",
60
+ "find_cointegrated_pairs",
61
+ "analyze_cointegrated_pairs",
62
+ "select_candidate_pairs",
63
+ "KFSmoother",
64
+ "KFHedgeRatio",
55
65
  ]
56
66
 
57
67
  # *******************************************
@@ -124,7 +134,8 @@ def fit_best_arima(window_data: Union[pd.Series, np.ndarray]):
124
134
  from arch.utility.exceptions import ConvergenceWarning as ArchWarning
125
135
  from statsmodels.tools.sm_exceptions import ConvergenceWarning as StatsWarning
126
136
  with warnings.catch_warnings():
127
- warnings.filterwarnings("ignore", category=StatsWarning, module='statsmodels')
137
+ warnings.filterwarnings(
138
+ "ignore", category=StatsWarning, module='statsmodels')
128
139
  warnings.filterwarnings("ignore", category=ArchWarning, module='arch')
129
140
  try:
130
141
  best_arima_model = ARIMA(
@@ -500,7 +511,8 @@ def get_corr(tickers: Union[List[str], Tuple[str, ...]], start: str, end: str) -
500
511
  >>> get_corr(['AAPL', 'MSFT', 'GOOG'], '2023-01-01', '2023-12-31')
501
512
  """
502
513
  # Download historical data
503
- data = yf.download(tickers, start=start, end=end, multi_level_index=False)['Adj Close']
514
+ data = yf.download(tickers, start=start, end=end,
515
+ multi_level_index=False)['Adj Close']
504
516
 
505
517
  # Calculate correlation matrix
506
518
  correlation_matrix = data.corr()
@@ -644,8 +656,10 @@ def run_cadf_test(pair: Union[List[str], Tuple[str, ...]], start: str, end: str)
644
656
  """
645
657
  # Download historical data for required stocks
646
658
  p0, p1 = pair[0], pair[1]
647
- _p0 = yf.download(p0, start=start, end=end, progress=False, multi_level_index=False)
648
- _p1 = yf.download(p1, start=start, end=end, progress=False, multi_level_index=False)
659
+ _p0 = yf.download(p0, start=start, end=end,
660
+ progress=False, multi_level_index=False)
661
+ _p1 = yf.download(p1, start=start, end=end,
662
+ progress=False, multi_level_index=False)
649
663
  df = pd.DataFrame(index=_p0.index)
650
664
  df[p0] = _p0["Adj Close"]
651
665
  df[p1] = _p1["Adj Close"]
@@ -674,7 +688,7 @@ def run_cadf_test(pair: Union[List[str], Tuple[str, ...]], start: str, end: str)
674
688
  # Display regression metrics
675
689
  print("\nRegression Metrics:")
676
690
  print(f"Optimal Hedge Ratio (Beta): {beta_hr}")
677
- print(f'Result Parmas: \n')
691
+ print('Result Parmas: \n')
678
692
  print(results.params)
679
693
  print("\nRegression Summary:")
680
694
  print(results.summary())
@@ -727,7 +741,8 @@ def run_hurst_test(symbol: str, start: str, end: str):
727
741
 
728
742
  >>> run_hurst_test('AAPL', '2023-01-01', '2023-12-31')
729
743
  """
730
- data = yf.download(symbol, start=start, end=end, progress=False, multi_level_index=False)
744
+ data = yf.download(symbol, start=start, end=end,
745
+ progress=False, multi_level_index=False)
731
746
 
732
747
  # Create a Geometric Brownian Motion, Mean-Reverting, and Trending Series
733
748
  gbm = np.log(np.cumsum(np.random.randn(100000))+1000)
@@ -884,8 +899,10 @@ def run_kalman_filter(
884
899
 
885
900
  >>> run_kalman_filter(['SPY', 'QQQ'], '2023-01-01', '2023-12-31')
886
901
  """
887
- etf_df1 = yf.download(etfs[0], start, end, progress=False, multi_level_index=False)
888
- etf_df2 = yf.download(etfs[1], start, end, progress=False, multi_level_index=False)
902
+ etf_df1 = yf.download(etfs[0], start, end,
903
+ progress=False, multi_level_index=False)
904
+ etf_df2 = yf.download(etfs[1], start, end,
905
+ progress=False, multi_level_index=False)
889
906
 
890
907
  prices = pd.DataFrame(index=etf_df1.index)
891
908
  prices[etfs[0]] = etf_df1["Adj Close"]
@@ -921,12 +938,12 @@ class KalmanFilterModel():
921
938
  self.tickers = tickers
922
939
  assert self.tickers is not None
923
940
 
924
- self.R = None
925
- self.theta = np.zeros(2)
926
- self.P = np.zeros((2, 2))
927
- self.delta = kwargs.get("delta", 1e-4)
928
- self.vt = kwargs.get("vt", 1e-3)
929
- self.wt = self.delta/(1-self.delta) * np.eye(2)
941
+ self.R = None
942
+ self.theta = np.zeros(2)
943
+ self.P = np.zeros((2, 2))
944
+ self.delta = kwargs.get("delta", 1e-4)
945
+ self.vt = kwargs.get("vt", 1e-3)
946
+ self.wt = self.delta/(1-self.delta) * np.eye(2)
930
947
  self.latest_prices = np.array([-1.0, -1.0])
931
948
  self.kf = self._init_kalman()
932
949
 
@@ -947,6 +964,7 @@ class KalmanFilterModel():
947
964
  return kf
948
965
 
949
966
  Array = np.ndarray
967
+
950
968
  def calc_slope_intercep(self, prices: Array) -> Tuple:
951
969
  """
952
970
  Calculates and returns the slope and intercept
@@ -967,7 +985,7 @@ class KalmanFilterModel():
967
985
  intercept = self.kf.x.copy().flatten()[1]
968
986
 
969
987
  return slope, intercept
970
-
988
+
971
989
  def calculate_etqt(self, prices: Array) -> Tuple:
972
990
  """
973
991
  Calculates the ``forecast error`` and ``standard deviation`` of the predictions
@@ -1166,7 +1184,7 @@ class OrnsteinUhlenbeck():
1166
1184
  Returns:
1167
1185
  np.ndarray: 2D array representing simulated processes.
1168
1186
  """
1169
- if returns is None:
1187
+ if returns is None:
1170
1188
  returns = self.returns
1171
1189
  if p is not None:
1172
1190
  T = p
@@ -1200,11 +1218,11 @@ def remove_correlated_assets(df: pd.DataFrame, cutoff=.99):
1200
1218
  and rows represent observations (e.g., time-series data).
1201
1219
  cutoff (float, optional, default=0.99): The correlation threshold.
1202
1220
  Columns with absolute correlation greater than this value will be considered for removal.
1203
-
1221
+
1204
1222
  Returns:
1205
1223
  pd.DataFrame: A DataFrame with less correlated assets.
1206
1224
  The columns that are highly correlated (above the cutoff) are removed.
1207
-
1225
+
1208
1226
  References
1209
1227
  ----------
1210
1228
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
@@ -1243,12 +1261,12 @@ def check_stationarity(df: pd.DataFrame):
1243
1261
 
1244
1262
  Args:
1245
1263
  df (pd.DataFrame): A DataFrame where each column represents a time series of an asset.
1246
-
1264
+
1247
1265
  Returns:
1248
1266
  pd.DataFrame: A DataFrame containing the ADF p-values for each asset,
1249
1267
  - ticker Asset name (column name from df).
1250
1268
  - adf p-value from the ADF test, indicating the probability of the null hypothesis (data is non-stationary).
1251
-
1269
+
1252
1270
  References
1253
1271
  ----------
1254
1272
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
@@ -1278,7 +1296,7 @@ def remove_stationary_assets(df: pd.DataFrame, pval=.05):
1278
1296
  df (pd.DataFrame): A DataFrame where each column represents a time series of an asset.
1279
1297
  pval (float, optional, default=0.05): The significance level to determine stationarity.
1280
1298
  Columns with an ADF test p-value below this threshold are considered stationary and removed.
1281
-
1299
+
1282
1300
  Returns:
1283
1301
  pd.DataFrame: A DataFrame containing only the non-stationary assets.
1284
1302
 
@@ -1286,7 +1304,7 @@ def remove_stationary_assets(df: pd.DataFrame, pval=.05):
1286
1304
  ----------
1287
1305
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
1288
1306
  chapter 9, Time-Series Models for Volatility Forecasts and Statistical Arbitrage.
1289
-
1307
+
1290
1308
  Example:
1291
1309
  >>> df = pd.DataFrame({
1292
1310
  ... 'AAPL': [100, 101, 102, 103, 104],
@@ -1312,7 +1330,7 @@ def select_assets(df: pd.DataFrame, n=100, start=None, end=None, rolling_window=
1312
1330
  start (str, optional): Start date for filtering the data. Default is the earliest date in the DataFrame.
1313
1331
  end (str, optional): End date for filtering the data. Default is the latest date in the DataFrame.
1314
1332
  rolling_window (int, optional): Rolling window for calculating the average trading volume. Default is None.
1315
-
1333
+
1316
1334
  Returns:
1317
1335
  pd.DataFrame: A DataFrame of selected assets with filtered, cleaned data, indexed by date.
1318
1336
 
@@ -1323,25 +1341,27 @@ def select_assets(df: pd.DataFrame, n=100, start=None, end=None, rolling_window=
1323
1341
  """
1324
1342
  required_columns = {'close', 'volume'}
1325
1343
  if not required_columns.issubset(df.columns):
1326
- raise ValueError(f"Input DataFrame must contain {required_columns}, but got {df.columns.tolist()}.")
1327
-
1344
+ raise ValueError(
1345
+ f"Input DataFrame must contain {required_columns}, but got {df.columns.tolist()}.")
1346
+
1328
1347
  if not isinstance(df.index, pd.MultiIndex) or 'ticker' not in df.index.names or 'date' not in df.index.names:
1329
- raise ValueError("Index must be a MultiIndex with levels ['ticker', 'date'].")
1330
-
1348
+ raise ValueError(
1349
+ "Index must be a MultiIndex with levels ['ticker', 'date'].")
1350
+
1331
1351
  df = df.copy()
1332
1352
  idx = pd.IndexSlice
1333
1353
  start = start or df.index.get_level_values('date').min()
1334
1354
  end = end or df.index.get_level_values('date').max()
1335
1355
  df = (df
1336
- .loc[lambda df: ~df.index.duplicated()]
1337
- .sort_index()
1338
- .loc[idx[:, f'{start}':f'{end}'], :]
1339
- .assign(dv=lambda df: df.close.mul(df.volume)))
1340
-
1356
+ .loc[lambda df: ~df.index.duplicated()]
1357
+ .sort_index()
1358
+ .loc[idx[:, f'{start}':f'{end}'], :]
1359
+ .assign(dv=lambda df: df.close.mul(df.volume)))
1360
+
1341
1361
  if rolling_window is None:
1342
1362
  most_traded = (df.groupby(level='ticker')
1343
- .dv.mean()
1344
- .nlargest(n=n).index)
1363
+ .dv.mean()
1364
+ .nlargest(n=n).index)
1345
1365
  else:
1346
1366
  # Calculate the rolling average of dollar volume
1347
1367
  df['dv_rolling_avg'] = (
@@ -1358,9 +1378,9 @@ def select_assets(df: pd.DataFrame, n=100, start=None, end=None, rolling_window=
1358
1378
  .index
1359
1379
  )
1360
1380
  df = (df.loc[idx[most_traded, :], 'close']
1361
- .unstack('ticker')
1362
- .ffill(limit=5)
1363
- .dropna(axis=1))
1381
+ .unstack('ticker')
1382
+ .ffill(limit=5)
1383
+ .dropna(axis=1))
1364
1384
  df = remove_correlated_assets(df)
1365
1385
  df = remove_stationary_assets(df)
1366
1386
  return df.sort_index()
@@ -1377,7 +1397,7 @@ def compute_pair_metrics(security: pd.Series, candidates: pd.DataFrame):
1377
1397
  The name of the Series should correspond to the security's identifier (e.g., ticker symbol).
1378
1398
  candidates (pd.DataFrame): A DataFrame where each column represents a time-series of prices
1379
1399
  for candidate securities to be evaluated against the target security.
1380
-
1400
+
1381
1401
  Returns:
1382
1402
  pd.DataFrame: A DataFrame combining:
1383
1403
  Drift: Estimated drift of spreads between the target security and each candidate.
@@ -1388,7 +1408,7 @@ def compute_pair_metrics(security: pd.Series, candidates: pd.DataFrame):
1388
1408
  Cointegration metrics:
1389
1409
  Engle-Granger test statistics (``t1``, ``t2``) and p-values (``p1``, ``p2``).
1390
1410
  Johansen test trace statistics (``trace0``, ``trace1``) and selected lag order (``k_ar_diff``).
1391
-
1411
+
1392
1412
  References
1393
1413
  ----------
1394
1414
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
@@ -1401,38 +1421,39 @@ def compute_pair_metrics(security: pd.Series, candidates: pd.DataFrame):
1401
1421
  n, m = spreads.shape
1402
1422
  X = np.ones(shape=(n, 2))
1403
1423
  X[:, 1] = np.arange(1, n + 1)
1404
-
1424
+
1405
1425
  # compute drift
1406
1426
  drift = ((np.linalg.inv(X.T @ X) @ X.T @ spreads).iloc[1]
1407
1427
  .to_frame('drift'))
1408
-
1428
+
1409
1429
  # compute volatility
1410
1430
  vol = spreads.std().to_frame('vol')
1411
-
1431
+
1412
1432
  # returns correlation
1413
1433
  corr_ret = (candidates.pct_change()
1414
1434
  .corrwith(security.pct_change())
1415
1435
  .to_frame('corr_ret'))
1416
-
1436
+
1417
1437
  # normalized price series correlation
1418
1438
  corr = candidates.corrwith(security).to_frame('corr')
1419
1439
  metrics = drift.join(vol).join(corr).join(corr_ret).assign(n=n)
1420
-
1440
+
1421
1441
  tests = []
1422
1442
  # run cointegration tests
1423
1443
  for candidate, prices in tqdm(candidates.items()):
1424
1444
  df = pd.DataFrame({'s1': security, 's2': prices})
1425
1445
  var = VAR(df.values)
1426
- lags = var.select_order() # select VAR order
1446
+ lags = var.select_order() # select VAR order
1427
1447
  k_ar_diff = lags.selected_orders['aic']
1428
1448
  # Johansen Test with constant Term and estd. lag order
1429
1449
  cj0 = coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff)
1430
1450
  # Engle-Granger Tests
1431
1451
  t1, p1 = coint(security, prices, trend='c')[:2]
1432
1452
  t2, p2 = coint(prices, security, trend='c')[:2]
1433
- tests.append([ticker, candidate, t1, p1, t2, p2,
1453
+ tests.append([ticker, candidate, t1, p1, t2, p2,
1434
1454
  k_ar_diff, *cj0.lr1])
1435
- columns = ['s1', 's2', 't1', 'p1', 't2', 'p2', 'k_ar_diff', 'trace0', 'trace1']
1455
+ columns = ['s1', 's2', 't1', 'p1', 't2',
1456
+ 'p2', 'k_ar_diff', 'trace0', 'trace1']
1436
1457
  tests = pd.DataFrame(tests, columns=columns).set_index('s2')
1437
1458
  return metrics.join(tests)
1438
1459
 
@@ -1443,9 +1464,8 @@ __CRITICAL_VALUES = {
1443
1464
  }
1444
1465
 
1445
1466
 
1446
- def find_cointegrated_pairs(securities: pd.DataFrame, candidates: pd.DataFrame,
1467
+ def find_cointegrated_pairs(securities: pd.DataFrame, candidates: pd.DataFrame,
1447
1468
  n=None, start=None, stop=None, coint=False):
1448
-
1449
1469
  """
1450
1470
  Identifies cointegrated pairs between a target set of securities and candidate securities
1451
1471
  based on econometric tests. The function evaluates statistical relationships,
@@ -1508,8 +1528,9 @@ def find_cointegrated_pairs(securities: pd.DataFrame, candidates: pd.DataFrame,
1508
1528
  >>> | Security1| Candidate1| -3.5 | 0.01 | 1 | 1 | 1 |
1509
1529
  >>> | Security2| Candidate2| -2.9 | 0.04 | 1 | 1 | 1 |
1510
1530
  """
1511
- trace0_cv = __CRITICAL_VALUES[0][.95] # critical value for 0 cointegration relationships
1512
- trace1_cv = __CRITICAL_VALUES[1][.95] # critical value for 1 cointegration relationship
1531
+ trace0_cv = __CRITICAL_VALUES[0][.95] # critical value for 0 cointegration relationships
1532
+ # critical value for 1 cointegration relationship
1533
+ trace1_cv = __CRITICAL_VALUES[1][.95]
1513
1534
  spreads = []
1514
1535
  if start is not None and stop is not None:
1515
1536
  securities = securities.loc[str(start): str(stop), :]
@@ -1526,7 +1547,7 @@ def find_cointegrated_pairs(securities: pd.DataFrame, candidates: pd.DataFrame,
1526
1547
  spreads['t'] = spreads[['t1', 't2']].min(axis=1)
1527
1548
  spreads['p'] = spreads[['p1', 'p2']].min(axis=1)
1528
1549
  spreads['joh_sig'] = ((spreads.trace0 > trace0_cv) &
1529
- (spreads.trace1 > trace1_cv)).astype(int)
1550
+ (spreads.trace1 > trace1_cv)).astype(int)
1530
1551
  spreads['eg_sig'] = (spreads.p < .05).astype(int)
1531
1552
  spreads['s1_dep'] = spreads.p1 < spreads.p2
1532
1553
  spreads['coint'] = (spreads.joh_sig & spreads.eg_sig).astype(int)
@@ -1534,23 +1555,23 @@ def find_cointegrated_pairs(securities: pd.DataFrame, candidates: pd.DataFrame,
1534
1555
  if coint:
1535
1556
  if n is not None:
1536
1557
  top_pairs = (spreads.query('coint == 1')
1537
- .sort_values('t', ascending=False)
1538
- .head(n))
1558
+ .sort_values('t', ascending=False)
1559
+ .head(n))
1539
1560
  else:
1540
1561
  top_pairs = (spreads.query('coint == 1')
1541
1562
  .sort_values('t', ascending=False))
1542
1563
  else:
1543
1564
  if n is not None:
1544
1565
  top_pairs = (spreads
1545
- .sort_values('t', ascending=False)
1546
- .head(n))
1566
+ .sort_values('t', ascending=False)
1567
+ .head(n))
1547
1568
  else:
1548
1569
  top_pairs = (spreads
1549
- .sort_values('t', ascending=False))
1570
+ .sort_values('t', ascending=False))
1550
1571
  return top_pairs
1551
1572
 
1552
1573
 
1553
- def analyze_cointegrated_pairs(spreads: pd.DataFrame, plot_coint=True, crosstab=False,
1574
+ def analyze_cointegrated_pairs(spreads: pd.DataFrame, plot_coint=True, crosstab=False,
1554
1575
  heuristics=False, log_reg=False, decis_tree=False):
1555
1576
  """
1556
1577
  Analyzes cointegrated pairs by visualizing, summarizing, and applying predictive models.
@@ -1569,12 +1590,12 @@ def analyze_cointegrated_pairs(spreads: pd.DataFrame, plot_coint=True, crosstab=
1569
1590
  If True, fits a logistic regression model to predict cointegration and evaluates its performance.
1570
1591
  decis_tree (bool, optional):
1571
1592
  If True, fits a decision tree model to predict cointegration and evaluates its performance.
1572
-
1593
+
1573
1594
  References
1574
1595
  ----------
1575
1596
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
1576
1597
  chapter 9, Time-Series Models for Volatility Forecasts and Statistical Arbitrage.
1577
-
1598
+
1578
1599
  Example:
1579
1600
  >>> import pandas as pd
1580
1601
  >>> from bbstrader.tseries import find_cointegrated_pairs, analyze_cointegrated_pairs
@@ -1595,14 +1616,14 @@ def analyze_cointegrated_pairs(spreads: pd.DataFrame, plot_coint=True, crosstab=
1595
1616
  if plot_coint:
1596
1617
  trace0_cv = __CRITICAL_VALUES[0][.95]
1597
1618
  spreads = spreads.reset_index()
1598
- sns.scatterplot(x=np.log1p(spreads.t.abs()),
1599
- y=np.log1p(spreads.trace1),
1600
- hue='coint', data=spreads[spreads.trace0>trace0_cv]);
1619
+ sns.scatterplot(x=np.log1p(spreads.t.abs()),
1620
+ y=np.log1p(spreads.trace1),
1621
+ hue='coint', data=spreads[spreads.trace0 > trace0_cv])
1601
1622
  fig, axes = plt.subplots(ncols=4, figsize=(20, 5))
1602
1623
  for i, heuristic in enumerate(['drift', 'vol', 'corr', 'corr_ret']):
1603
1624
  sns.boxplot(x='coint', y=heuristic, data=spreads, ax=axes[i])
1604
- fig.tight_layout();
1605
-
1625
+ fig.tight_layout()
1626
+
1606
1627
  if heuristics:
1607
1628
  spreads = spreads.reset_index()
1608
1629
  h = spreads.groupby(spreads.coint)[
@@ -1612,13 +1633,13 @@ def analyze_cointegrated_pairs(spreads: pd.DataFrame, plot_coint=True, crosstab=
1612
1633
  if log_reg:
1613
1634
  y = spreads.coint
1614
1635
  X = spreads[['drift', 'vol', 'corr', 'corr_ret']]
1615
- log_reg = LogisticRegressionCV(Cs=np.logspace(-10, 10, 21),
1616
- class_weight='balanced',
1617
- scoring='roc_auc')
1636
+ log_reg = LogisticRegressionCV(Cs=np.logspace(-10, 10, 21),
1637
+ class_weight='balanced',
1638
+ scoring='roc_auc')
1618
1639
  log_reg.fit(X=X, y=y)
1619
1640
  Cs = log_reg.Cs_
1620
1641
  scores = pd.DataFrame(log_reg.scores_[True], columns=Cs).mean()
1621
- scores.plot(logx=True);
1642
+ scores.plot(logx=True)
1622
1643
  res = f'C:{np.log10(scores.idxmax()):.2f}, AUC: {scores.max():.2%}'
1623
1644
  print(res)
1624
1645
  print(log_reg.coef_)
@@ -1626,9 +1647,10 @@ def analyze_cointegrated_pairs(spreads: pd.DataFrame, plot_coint=True, crosstab=
1626
1647
  if decis_tree:
1627
1648
  model = DecisionTreeClassifier(class_weight='balanced')
1628
1649
  decision_tree = GridSearchCV(model,
1629
- param_grid={'max_depth': list(range(1, 10))},
1630
- cv=5,
1631
- scoring='roc_auc')
1650
+ param_grid={
1651
+ 'max_depth': list(range(1, 10))},
1652
+ cv=5,
1653
+ scoring='roc_auc')
1632
1654
  y = spreads.coint
1633
1655
  X = spreads[['drift', 'vol', 'corr', 'corr_ret']]
1634
1656
  decision_tree.fit(X, y)
@@ -1655,7 +1677,7 @@ def select_candidate_pairs(pairs: pd.DataFrame, period=False):
1655
1677
 
1656
1678
  Returns:
1657
1679
  list[dict]: A list of dictionaries, each containing the keys 'x' and 'y' (and optionally 'period') representing the selected pairs.
1658
-
1680
+
1659
1681
  References
1660
1682
  ----------
1661
1683
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
@@ -1663,8 +1685,10 @@ def select_candidate_pairs(pairs: pd.DataFrame, period=False):
1663
1685
  """
1664
1686
  candidates = pairs.query('coint == 1').copy()
1665
1687
  candidates = candidates.reset_index()
1666
- candidates['y'] = candidates.apply(lambda x: x['s1'] if x.s1_dep else x['s2'], axis=1)
1667
- candidates['x'] = candidates.apply(lambda x: x['s2'] if x.s1_dep else x['s1'], axis=1)
1688
+ candidates['y'] = candidates.apply(
1689
+ lambda x: x['s1'] if x.s1_dep else x['s2'], axis=1)
1690
+ candidates['x'] = candidates.apply(
1691
+ lambda x: x['s2'] if x.s1_dep else x['s1'], axis=1)
1668
1692
  if period:
1669
1693
  return candidates[['x', 'y', 'period']].to_dict(orient='records')
1670
1694
  return candidates[['x', 'y']].to_dict(orient='records')
@@ -1682,7 +1706,7 @@ def KFSmoother(prices: pd.Series | np.ndarray) -> pd.Series | np.ndarray:
1682
1706
  pd.Series or np.ndarray
1683
1707
  The smoothed time series data. If the input is a pandas Series, the output will also be a pandas Series with the same index.
1684
1708
  If the input is a numpy array, the output will be a numpy array.
1685
-
1709
+
1686
1710
  References
1687
1711
  ----------
1688
1712
  Stefan Jansen (2020). Machine Learning for Algorithmic Trading - Second Edition.
@@ -1701,10 +1725,11 @@ def KFSmoother(prices: pd.Series | np.ndarray) -> pd.Series | np.ndarray:
1701
1725
  2020-01-07 00:00:00+00:00 60.02240894
1702
1726
  2020-01-08 00:00:00+00:00 63.15057948
1703
1727
  dtype: float64
1704
-
1728
+
1705
1729
  """
1706
1730
  if not isinstance(prices, (np.ndarray, pd.Series)):
1707
- raise ValueError("Input must be either a numpy array or a pandas Series.")
1731
+ raise ValueError(
1732
+ "Input must be either a numpy array or a pandas Series.")
1708
1733
  kf = PyKalmanFilter(
1709
1734
  transition_matrices=np.eye(1),
1710
1735
  observation_matrices=np.eye(1),
@@ -1729,7 +1754,7 @@ def KFHedgeRatio(x: pd.Series | np.ndarray, y: pd.Series | np.ndarray) -> np.nda
1729
1754
  The independent variable, which can be either a pandas Series or a numpy array.
1730
1755
  y : pd.Series or np.ndarray
1731
1756
  The dependent variable, which can be either a pandas Series or a numpy array.
1732
-
1757
+
1733
1758
  Returns:
1734
1759
  np.ndarray
1735
1760
  The estimated hedge ratio as a numpy array.
@@ -1744,8 +1769,9 @@ def KFHedgeRatio(x: pd.Series | np.ndarray, y: pd.Series | np.ndarray) -> np.nda
1744
1769
  """
1745
1770
  if (not isinstance(x, (np.ndarray, pd.Series))
1746
1771
  or not isinstance(y, (np.ndarray, pd.Series))):
1747
- raise ValueError("Both x and y must be either a numpy array or a pandas Series.")
1748
-
1772
+ raise ValueError(
1773
+ "Both x and y must be either a numpy array or a pandas Series.")
1774
+
1749
1775
  delta = 1e-3
1750
1776
  trans_cov = delta / (1 - delta) * np.eye(2)
1751
1777
  obs_mat = np.expand_dims(np.vstack([[x], [np.ones(len(x))]]).T, axis=1)
@@ -1761,6 +1787,6 @@ def KFHedgeRatio(x: pd.Series | np.ndarray, y: pd.Series | np.ndarray) -> np.nda
1761
1787
  )
1762
1788
  y = y.values if isinstance(y, pd.Series) else y
1763
1789
  state_means, _ = kf.filter(y)
1764
- # Indexing with [:, 0] in state_means[:, 0] extracts only the first state variable of
1790
+ # Indexing with [:, 0] in state_means[:, 0] extracts only the first state variable of
1765
1791
  # each Kalman Filter estimate, which is the estimated hedge ratio.
1766
1792
  return -state_means[:, 0]
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2023-2024 Bertin Balouki SIMYELI
3
+ Copyright (c) 2023-2025 Bertin Balouki SIMYELI
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bbstrader
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Simplified Investment & Trading Toolkit
5
5
  Home-page: https://github.com/bbalouki/bbstrader
6
6
  Download-URL: https://pypi.org/project/bbstrader/
@@ -49,6 +49,7 @@ Requires-Dist: python-telegram-bot
49
49
  Requires-Dist: pyportfolioopt
50
50
  Requires-Dist: eodhd
51
51
  Requires-Dist: financetoolkit
52
+ Requires-Dist: tables
52
53
  Requires-Dist: lightgbm
53
54
  Requires-Dist: alphalens-reloaded
54
55
  Provides-Extra: mt5
@@ -0,0 +1,37 @@
1
+ bbstrader/__ini__.py,sha256=c2TwxnJi0PtvigdAbAS2wsTAd7CnvwRHUrYr-s4wEsI,567
2
+ bbstrader/config.py,sha256=gepL2m_Ishu8BvuGpFOpSTPEosaAZmoqBK4DfdBceBs,3779
3
+ bbstrader/tseries.py,sha256=WidlFyQ6y76MUGPfSsOpdfhFSeuBRoijb4Ktr3N99f0,70439
4
+ bbstrader/btengine/__init__.py,sha256=FL0kC0NcsnlTH-yuTv4lu6AexY1wZKN1AQ9rv9MZagQ,3009
5
+ bbstrader/btengine/backtest.py,sha256=KN1We1ye7js3rL4Pd9dAj-LY3aB-PcDLUI7f6IRYd2g,14559
6
+ bbstrader/btengine/data.py,sha256=eIax5Fvw4wk60Rsr-TJELM_DLjvvI4D6-qTrnnbWMgw,26736
7
+ bbstrader/btengine/event.py,sha256=hquVFmNMauEARrw1HBrFxwGkKj8qbcKeFGRal2K5Z3o,8766
8
+ bbstrader/btengine/execution.py,sha256=bkc5t6Zx1hTNfycL8msgGsVk23jXko7oKoLndWGrrqg,10232
9
+ bbstrader/btengine/performance.py,sha256=Cm1o4tUI7iuai0dHiGhkkm0Hww4ikCg_q1p8JYfhR2o,10666
10
+ bbstrader/btengine/portfolio.py,sha256=auqOU3j0xc3CNicu6pBk-JCvdmYZUN8xke3HXUbco2o,16096
11
+ bbstrader/btengine/strategy.py,sha256=bdHA-US8SqdRWn4BpQwC-A3AvMdt7rYbStJ1qkbGRWQ,30709
12
+ bbstrader/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ bbstrader/core/data.py,sha256=3_odj9jxokyKU15j1aHTlgLQDjW75tKqGpCUfkpYq2Q,452
14
+ bbstrader/core/utils.py,sha256=vgW1I1zHar_ywBt3AaRMneTd5c38gTXoY2vGBVL92kY,1446
15
+ bbstrader/ibkr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ bbstrader/ibkr/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ bbstrader/metatrader/__init__.py,sha256=rrL_EtecsOCD2Cwhmpgta5CjSGT0K6vzSBiQoyCLe3M,283
18
+ bbstrader/metatrader/account.py,sha256=v0nozj0bPDv4lMC3ayeMPdD6OJ4N9RY6BFLjmPOWW1Q,55999
19
+ bbstrader/metatrader/rates.py,sha256=wR5UZuE2R0pFr2Mgi_448R0B0o_iLAAInAiNB-aUwm0,20821
20
+ bbstrader/metatrader/risk.py,sha256=-mOmbv7jlGiNWL84j4ZYhDETDKgNO3o5--6Azka0z1U,26510
21
+ bbstrader/metatrader/trade.py,sha256=PVU6t5Q22N844BJCBW3gyDnK4zLHvk3TrLiLyk_iFI4,71168
22
+ bbstrader/metatrader/utils.py,sha256=Q4rNYEfrPXNvTrTbzyY_Vo5hXGq6lbdg_Rvr0CVtYWk,17646
23
+ bbstrader/models/__init__.py,sha256=SnGBMQ-zcUIpms3oNeqg7EVDFpg-7OPjNAD8kvi_Q84,508
24
+ bbstrader/models/factors.py,sha256=Qhq9KO8MzOlVLH0qDcU-1-xvw_fMNCZG1Jd-oki7XTI,12917
25
+ bbstrader/models/ml.py,sha256=nVVdLjSui4yKcErbH86cjbyqbJjRzb3nqf21-tO-Nkc,48458
26
+ bbstrader/models/optimization.py,sha256=1FJC9Q0P6lG2BQUtRG905fvPV0KJiqGSWFSAZ8fsQZI,6615
27
+ bbstrader/models/portfolio.py,sha256=2w8RbdfWqeFerRt5WtaVDuplHlYo16V1MK8DvlcSXpc,8540
28
+ bbstrader/models/risk.py,sha256=qo2bENemDtlPuE9yUIIxE2SgaxxO7nX94CzuICJ1tEM,15608
29
+ bbstrader/trading/__init__.py,sha256=2VoxbzfP1XBLVuxJtjRhjEBCtnv9HqwQzfMV4B5mM7M,468
30
+ bbstrader/trading/execution.py,sha256=zgoVHz7RkA8s1fuyJG5iS6UPeOZJoY3y353xGriq1FE,28635
31
+ bbstrader/trading/scripts.py,sha256=OCIy1dO5jD5ZZ3nyhR4EuW8cJ90IK6g75yXpr5xFYJk,1906
32
+ bbstrader/trading/strategies.py,sha256=wP874dGSzJl-owSYdBZC_J1-8nQQ9HF01AhbbOKc4Bc,36420
33
+ bbstrader-0.2.1.dist-info/LICENSE,sha256=P3PBO9RuYPzl6-PkjysTNnwmwMB64ph36Bz9DBj8MS4,1115
34
+ bbstrader-0.2.1.dist-info/METADATA,sha256=6QafbqF7ko4y2LdqlypyPz6uMccgeIysiiSYWaN3zWg,10123
35
+ bbstrader-0.2.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
36
+ bbstrader-0.2.1.dist-info/top_level.txt,sha256=Wwj322jZmxGZ6gD_TdaPiPLjED5ReObm5omerwlmZIg,10
37
+ bbstrader-0.2.1.dist-info/RECORD,,
@@ -1,36 +0,0 @@
1
- bbstrader/__ini__.py,sha256=LV8eBeUebDf3-tRkOh761hRlEZWp4vXebT-archG1XE,481
2
- bbstrader/config.py,sha256=a9KCINIbdm8rgybS84zxJmUznNF9gmEfY6sfRKf_GN4,3774
3
- bbstrader/tseries.py,sha256=4F3XiNz_27MJvgYRQWJbopFzOI0LUnW3eq6R-uMOIfk,69942
4
- bbstrader/btengine/__init__.py,sha256=OaXZTjgDwqWrjPq-CNE4kJkmriKXt9t5pIghW1MDTeo,2911
5
- bbstrader/btengine/backtest.py,sha256=A3S84jpGTE_zhguOEGoGu6H_4ws4Iq5sf0n7TZaUYfQ,14615
6
- bbstrader/btengine/data.py,sha256=iNd2_V_gAblzbgMP5-prT17nZ-WBq6uoX3vK8zsI5LM,26798
7
- bbstrader/btengine/event.py,sha256=zF_ST4tcjV5uJJVV1IbRXQgCLbca2R2fmE7A2MaIno4,8748
8
- bbstrader/btengine/execution.py,sha256=Fs6Hk64DxEOEVzAjsQ3CIVvYifWLLgkDjOixSh_Ghsc,10282
9
- bbstrader/btengine/performance.py,sha256=WTYzB50lUD5aShPIEebbQPlaC2NVW6VfxdgGHjcIIAw,10707
10
- bbstrader/btengine/portfolio.py,sha256=wCRmGxaZvihUPlXIlZp9cQo9fqPP-Tk5oALjknMfnos,16055
11
- bbstrader/btengine/strategy.py,sha256=PL890aPkXevWnhaobJd78LDgTODZ8mHQOnGBk0bLbWI,30473
12
- bbstrader/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- bbstrader/core/data.py,sha256=jLJlj3kkprCcDAjN3ij0pThNqkxOhhnuer_VyFLvYfk,488
14
- bbstrader/core/utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- bbstrader/ibkr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- bbstrader/metatrader/__init__.py,sha256=OLVOB_EieEb1P72I8V4Vem8kQWJ__D_L3c_wfwqY-9k,211
17
- bbstrader/metatrader/account.py,sha256=tkcAEgFIYZwtFRQc07lXQQFWRqglzR4H4LjX2BDRvj8,56371
18
- bbstrader/metatrader/rates.py,sha256=CY6CuyXKo68h4ww7UV2P3xH8tHyzvhCNihCsaYS74is,21001
19
- bbstrader/metatrader/risk.py,sha256=uLarOF-g9-RBdJuKSmIfT5WrPn47bmrvMxP21pQg4xo,26793
20
- bbstrader/metatrader/trade.py,sha256=ugUr39FnVmqpj4f0FzO10MudWGDL71qT1_T9j0Sx_Ns,71012
21
- bbstrader/metatrader/utils.py,sha256=BTaZun4DKWpCxBBzY0SLQqqz7n_7F_R1F59APfyaa3E,17666
22
- bbstrader/models/__init__.py,sha256=uqV7O-7lZzvw1fHL1_LM3zYtHp_ExsrjQt3n6zmG_MU,436
23
- bbstrader/models/factors.py,sha256=3xF3LPgRR8pTLi2wD6k9F-4F9GlZJ4_otAvSlYycGvg,12915
24
- bbstrader/models/ml.py,sha256=T8apBfRESBnbskIuR16YfZXW9rgdpN5_CL9yr5r8MLw,47262
25
- bbstrader/models/optimization.py,sha256=VbbjdHP4gcmCTPYp74L0aftQumGAIru1keXxheDSiF4,6685
26
- bbstrader/models/portfolio.py,sha256=KA9X0xbwHG3rUaIihqFnepMs0iM2MHsJiko867jpB3c,8556
27
- bbstrader/models/risk.py,sha256=8WcsBp3wdtFEq_ERX9_q9oD34--ZKgueT2hpA7aTOSg,15550
28
- bbstrader/trading/__init__.py,sha256=3CCzV5rQbH8NthjDJhD0_2FABvpiCmkeC9cVeoW7bi4,438
29
- bbstrader/trading/execution.py,sha256=-nUk9BhbsTjCotBOWKZHnfG1IA6UuvDFnCGNF3WR8Z8,27721
30
- bbstrader/trading/scripts.py,sha256=rQmnG_4F_MuUEc96RXpAQT4kXrC-FkscsgHKgDAR_-Y,1902
31
- bbstrader/trading/strategies.py,sha256=QAdK28Ff013bWxgoX2A8uPJYMYLTO5-vU7mDhF_UPF8,36468
32
- bbstrader-0.2.0.dist-info/LICENSE,sha256=1EudjwwP2oTJy8Vh0e-Kzv8VZZU95y-t6c3DYhR51uc,1115
33
- bbstrader-0.2.0.dist-info/METADATA,sha256=PdRDfLx_MrgHwsyWgfu3HaSAlDqExiIT5OS0iSK9qnk,10100
34
- bbstrader-0.2.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
35
- bbstrader-0.2.0.dist-info/top_level.txt,sha256=Wwj322jZmxGZ6gD_TdaPiPLjED5ReObm5omerwlmZIg,10
36
- bbstrader-0.2.0.dist-info/RECORD,,