bbstrader 0.2.92__py3-none-any.whl → 0.2.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bbstrader might be problematic. Click here for more details.
- bbstrader/__ini__.py +20 -20
- bbstrader/__main__.py +50 -50
- bbstrader/btengine/__init__.py +54 -54
- bbstrader/btengine/data.py +11 -9
- bbstrader/btengine/scripts.py +157 -157
- bbstrader/compat.py +19 -19
- bbstrader/config.py +137 -137
- bbstrader/core/data.py +22 -22
- bbstrader/core/utils.py +146 -146
- bbstrader/metatrader/__init__.py +6 -6
- bbstrader/metatrader/account.py +1516 -1516
- bbstrader/metatrader/copier.py +750 -735
- bbstrader/metatrader/rates.py +584 -584
- bbstrader/metatrader/risk.py +749 -748
- bbstrader/metatrader/scripts.py +81 -81
- bbstrader/metatrader/trade.py +1836 -1826
- bbstrader/metatrader/utils.py +645 -645
- bbstrader/models/__init__.py +10 -10
- bbstrader/models/factors.py +312 -312
- bbstrader/models/ml.py +1272 -1265
- bbstrader/models/optimization.py +182 -182
- bbstrader/models/portfolio.py +223 -223
- bbstrader/models/risk.py +398 -398
- bbstrader/trading/__init__.py +11 -11
- bbstrader/trading/execution.py +846 -842
- bbstrader/trading/script.py +155 -155
- bbstrader/trading/scripts.py +69 -69
- bbstrader/trading/strategies.py +860 -860
- bbstrader/tseries.py +1842 -1842
- {bbstrader-0.2.92.dist-info → bbstrader-0.2.94.dist-info}/LICENSE +21 -21
- {bbstrader-0.2.92.dist-info → bbstrader-0.2.94.dist-info}/METADATA +188 -187
- bbstrader-0.2.94.dist-info/RECORD +44 -0
- {bbstrader-0.2.92.dist-info → bbstrader-0.2.94.dist-info}/WHEEL +1 -1
- bbstrader-0.2.92.dist-info/RECORD +0 -44
- {bbstrader-0.2.92.dist-info → bbstrader-0.2.94.dist-info}/entry_points.txt +0 -0
- {bbstrader-0.2.92.dist-info → bbstrader-0.2.94.dist-info}/top_level.txt +0 -0
bbstrader/models/__init__.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
"""
|
|
2
|
-
The `models` module provides a foundational framework for implementing various quantitative finance models.
|
|
3
|
-
|
|
4
|
-
It is designed to be a versatile base module for different types of models used in financial analysis and trading.
|
|
5
|
-
"""
|
|
6
|
-
from bbstrader.models.risk import * # noqa: F403
|
|
7
|
-
from bbstrader.models.optimization import * # noqa: F403
|
|
8
|
-
from bbstrader.models.portfolio import * # noqa: F403
|
|
9
|
-
from bbstrader.models.factors import * # noqa: F403
|
|
10
|
-
from bbstrader.models.ml import * # noqa: F403
|
|
1
|
+
"""
|
|
2
|
+
The `models` module provides a foundational framework for implementing various quantitative finance models.
|
|
3
|
+
|
|
4
|
+
It is designed to be a versatile base module for different types of models used in financial analysis and trading.
|
|
5
|
+
"""
|
|
6
|
+
from bbstrader.models.risk import * # noqa: F403
|
|
7
|
+
from bbstrader.models.optimization import * # noqa: F403
|
|
8
|
+
from bbstrader.models.portfolio import * # noqa: F403
|
|
9
|
+
from bbstrader.models.factors import * # noqa: F403
|
|
10
|
+
from bbstrader.models.ml import * # noqa: F403
|
bbstrader/models/factors.py
CHANGED
|
@@ -1,312 +1,312 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from typing import Dict, List
|
|
3
|
-
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import yfinance as yf
|
|
6
|
-
|
|
7
|
-
from bbstrader.btengine.data import EODHDataHandler, FMPDataHandler
|
|
8
|
-
from bbstrader.metatrader.rates import download_historical_data
|
|
9
|
-
from bbstrader.tseries import (
|
|
10
|
-
find_cointegrated_pairs,
|
|
11
|
-
select_assets,
|
|
12
|
-
select_candidate_pairs,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
__all__ = [
|
|
16
|
-
"search_coint_candidate_pairs",
|
|
17
|
-
]
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def search_coint_candidate_pairs(
|
|
21
|
-
securities: pd.DataFrame | List[str] = None,
|
|
22
|
-
candidates: pd.DataFrame | List[str] = None,
|
|
23
|
-
start: str = None,
|
|
24
|
-
end: str = None,
|
|
25
|
-
period_search: bool = False,
|
|
26
|
-
select: bool = True,
|
|
27
|
-
source: str = None,
|
|
28
|
-
universe: int = 100,
|
|
29
|
-
window: int = 2,
|
|
30
|
-
rolling_window: int = None,
|
|
31
|
-
npairs: int = 10,
|
|
32
|
-
tf: str = "D1",
|
|
33
|
-
path: str = None,
|
|
34
|
-
**kwargs,
|
|
35
|
-
) -> List[Dict[str, str]] | pd.DataFrame:
|
|
36
|
-
"""
|
|
37
|
-
Searches for candidate pairs of securities based on cointegration analysis.
|
|
38
|
-
|
|
39
|
-
This function either processes preloaded securities and candidates data
|
|
40
|
-
(as pandas DataFrames) or downloads historical data from a specified
|
|
41
|
-
source (e.g., Yahoo Finance, MetaTrader 5, Financial Modeling Prep, or EODHD).
|
|
42
|
-
It then selects the top `npairs` based on cointegration.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
securities (pd.DataFrame | List[str], optional):
|
|
46
|
-
A DataFrame or list of tickers representing the securities for analysis.
|
|
47
|
-
If using a DataFrame, it should include a MultiIndex with levels
|
|
48
|
-
['ticker', 'date'].
|
|
49
|
-
candidates (pd.DataFrame | List[str], optional):
|
|
50
|
-
A DataFrame or list of tickers representing the candidate securities
|
|
51
|
-
for pair selection.
|
|
52
|
-
start (str, optional):
|
|
53
|
-
The start date for data retrieval in 'YYYY-MM-DD' format. Ignored
|
|
54
|
-
if both `securities` and `candidates` are DataFrames.
|
|
55
|
-
end (str, optional):
|
|
56
|
-
The end date for data retrieval in 'YYYY-MM-DD' format. Ignored
|
|
57
|
-
if both `securities` and `candidates` are DataFrames.
|
|
58
|
-
period_search (bool, optional):
|
|
59
|
-
If True, the function will perform a periodic search for cointegrated from 3 years
|
|
60
|
-
to the end date by taking 2 yerars rolling window. So you need to have at least 3 years of data
|
|
61
|
-
or set the `window` parameter to 3. Defaults to False.
|
|
62
|
-
select (bool, optional):
|
|
63
|
-
If True, the function will select the top cointegrated pairs based on the
|
|
64
|
-
cointegration test results in form of List[dict].
|
|
65
|
-
If False, the function will return all cointegrated pairs in form of DataFrame.
|
|
66
|
-
This can be useful for further analysis or visualization.
|
|
67
|
-
source (str, optional):
|
|
68
|
-
The data source for historical data retrieval. Must be one of
|
|
69
|
-
['yf', 'mt5', 'fmp', 'eodhd']. Required if `securities` and
|
|
70
|
-
`candidates` are lists of tickers.
|
|
71
|
-
universe (int, optional):
|
|
72
|
-
The maximum number of assets to retain for analysis. Defaults to 100.
|
|
73
|
-
window (int, optional):
|
|
74
|
-
The number of years of historical data to retrieve if `start` and `end`
|
|
75
|
-
are not specified. Defaults to 2 years.
|
|
76
|
-
rolling_window (int, optional):
|
|
77
|
-
The size of the rolling window (in days) used for asset selection.
|
|
78
|
-
Defaults to None.
|
|
79
|
-
npairs (int, optional):
|
|
80
|
-
The number of top cointegrated pairs to select. Defaults to 10.
|
|
81
|
-
tf (str, optional):
|
|
82
|
-
The timeframe for MetaTrader 5 data retrieval. Defaults to 'D1'.
|
|
83
|
-
path (str, optional):
|
|
84
|
-
The path to MetaTrader 5 historical data files. Required if `source='mt5'`.
|
|
85
|
-
**kwargs:
|
|
86
|
-
Additional parameters for data retrieval (e.g., API keys, date ranges
|
|
87
|
-
for specific sources), see ``bbstrader.btengine.data.FMPDataHandler`` or
|
|
88
|
-
``bbstrader.btengine.data.EODHDataHandler`` for more details.
|
|
89
|
-
|
|
90
|
-
Returns:
|
|
91
|
-
List[dict]: A list containing the selected top cointegrated pairs if `select=True`.
|
|
92
|
-
pd.DataFrame: A DataFrame containing all cointegrated pairs if `select=False`.
|
|
93
|
-
|
|
94
|
-
Raises:
|
|
95
|
-
ValueError: If the inputs are invalid or if the `source` is not one of
|
|
96
|
-
the supported sources.
|
|
97
|
-
|
|
98
|
-
Examples:
|
|
99
|
-
Using preloaded DataFrames:
|
|
100
|
-
>>> securities = pd.read_csv('securities.csv', index_col=['ticker', 'date'])
|
|
101
|
-
>>> candidates = pd.read_csv('candidates.csv', index_col=['ticker', 'date'])
|
|
102
|
-
>>> pairs = search_candidate_pairs(securities=securities, candidates=candidates)
|
|
103
|
-
|
|
104
|
-
Using a data source (Yahoo Finance):
|
|
105
|
-
>>> securities = ['SPY', 'IWM', 'XLF', 'HYG', 'XLE', 'LQD', 'GDX', 'FXI', 'EWZ', ...]
|
|
106
|
-
>>> candidates = ['AAPL', 'AMZN', 'NVDA', 'MSFT', 'GOOGL', 'AMD', 'BAC', 'NFLX', ...]
|
|
107
|
-
|
|
108
|
-
>>> pairs = search_candidate_pairs(
|
|
109
|
-
... securities=securities,
|
|
110
|
-
... candidates=candidates,
|
|
111
|
-
... start='2022-12-12',
|
|
112
|
-
... end='2024-12-10',
|
|
113
|
-
... source='yf',
|
|
114
|
-
... npairs=10
|
|
115
|
-
... )
|
|
116
|
-
>>> [
|
|
117
|
-
... {'x': 'LQD', 'y': 'TMO'},
|
|
118
|
-
... {'x': 'IEF', 'y': 'COP'},
|
|
119
|
-
... {'x': 'WMT', 'y': 'IWM'},
|
|
120
|
-
... {'x': 'MDT', 'y': 'OIH'},
|
|
121
|
-
... {'x': 'EWZ', 'y': 'CMCSA'},
|
|
122
|
-
... {'x': 'VLO', 'y': 'XOP'},
|
|
123
|
-
... {'x': 'SHY', 'y': 'F'},
|
|
124
|
-
... {'x': 'ABT', 'y': 'LQD'},
|
|
125
|
-
... {'x': 'PFE', 'y': 'USO'},
|
|
126
|
-
... {'x': 'LQD', 'y': 'MDT'}
|
|
127
|
-
... ]
|
|
128
|
-
|
|
129
|
-
Using MetaTrader 5:
|
|
130
|
-
>>> securities = ['EURUSD', 'GBPUSD']
|
|
131
|
-
>>> candidates = ['USDJPY', 'AUDUSD']
|
|
132
|
-
>>> pairs = search_candidate_pairs(
|
|
133
|
-
... securities=securities,
|
|
134
|
-
... candidates=candidates,
|
|
135
|
-
... source='mt5',
|
|
136
|
-
... tf='H1',
|
|
137
|
-
... path='/path/to/terminal64.exe',
|
|
138
|
-
... )
|
|
139
|
-
|
|
140
|
-
Notes:
|
|
141
|
-
- If `securities` and `candidates` are DataFrames, the function assumes
|
|
142
|
-
the data is already preprocessed and indexed by ['ticker', 'date'].
|
|
143
|
-
- When using `source='fmp'` or `source='eodhd'`, API keys and other
|
|
144
|
-
required parameters should be passed via `kwargs`.
|
|
145
|
-
|
|
146
|
-
"""
|
|
147
|
-
|
|
148
|
-
def _download_and_process_data(source, tickers, start, end, tf, path, **kwargs):
|
|
149
|
-
"""Download and process data for a list of tickers from the specified source."""
|
|
150
|
-
data_list = []
|
|
151
|
-
for ticker in tickers:
|
|
152
|
-
try:
|
|
153
|
-
if source == "yf":
|
|
154
|
-
data = yf.download(
|
|
155
|
-
ticker,
|
|
156
|
-
start=start,
|
|
157
|
-
end=end,
|
|
158
|
-
progress=False,
|
|
159
|
-
multi_level_index=False,
|
|
160
|
-
)
|
|
161
|
-
data = data.drop(columns=["Adj Close"], axis=1)
|
|
162
|
-
elif source == "mt5":
|
|
163
|
-
start, end = pd.Timestamp(start), pd.Timestamp(end)
|
|
164
|
-
data = download_historical_data(
|
|
165
|
-
symbol=ticker,
|
|
166
|
-
timeframe=tf,
|
|
167
|
-
date_from=start,
|
|
168
|
-
date_to=end,
|
|
169
|
-
**{"path": path},
|
|
170
|
-
)
|
|
171
|
-
data = data.drop(columns=["adj_close"], axis=1)
|
|
172
|
-
elif source in ["fmp", "eodhd"]:
|
|
173
|
-
handler_class = (
|
|
174
|
-
FMPDataHandler if source == "fmp" else EODHDataHandler
|
|
175
|
-
)
|
|
176
|
-
handler = handler_class(events=None, symbol_list=[ticker], **kwargs)
|
|
177
|
-
data = handler.data[ticker]
|
|
178
|
-
else:
|
|
179
|
-
raise ValueError(f"Invalid source: {source}")
|
|
180
|
-
|
|
181
|
-
data = data.reset_index()
|
|
182
|
-
data = data.rename(columns=str.lower)
|
|
183
|
-
data["ticker"] = ticker
|
|
184
|
-
data_list.append(data)
|
|
185
|
-
|
|
186
|
-
except Exception as e:
|
|
187
|
-
print(f"No Data found for {ticker}: {e}")
|
|
188
|
-
continue
|
|
189
|
-
|
|
190
|
-
return pd.concat(data_list)
|
|
191
|
-
|
|
192
|
-
def _handle_date_range(start, end, window):
|
|
193
|
-
"""Handle start and end date generation."""
|
|
194
|
-
if start is None or end is None:
|
|
195
|
-
end = pd.Timestamp(datetime.now()).strftime("%Y-%m-%d")
|
|
196
|
-
start = (
|
|
197
|
-
pd.Timestamp(datetime.now())
|
|
198
|
-
- pd.DateOffset(years=window)
|
|
199
|
-
+ pd.DateOffset(days=1)
|
|
200
|
-
).strftime("%Y-%m-%d")
|
|
201
|
-
return start, end
|
|
202
|
-
|
|
203
|
-
def _period_search(start, end, securities, candidates, npairs=npairs):
|
|
204
|
-
if window < 3 or (pd.Timestamp(end) - pd.Timestamp(start)).days / 365 < 3:
|
|
205
|
-
raise ValueError(
|
|
206
|
-
"The date range must be at least two (2) years for period search."
|
|
207
|
-
)
|
|
208
|
-
top_pairs = []
|
|
209
|
-
p_start = pd.Timestamp(end) - pd.DateOffset(years=1)
|
|
210
|
-
periods = pd.date_range(start=p_start, end=pd.Timestamp(end), freq="BQE")
|
|
211
|
-
npairs = max(round(npairs / 2), 1)
|
|
212
|
-
for period in periods:
|
|
213
|
-
s_start = period - pd.DateOffset(years=2) + pd.DateOffset(days=1)
|
|
214
|
-
print(f"Searching for pairs in period: {s_start} - {period}")
|
|
215
|
-
pairs = find_cointegrated_pairs(
|
|
216
|
-
securities,
|
|
217
|
-
candidates,
|
|
218
|
-
n=npairs,
|
|
219
|
-
start=str(s_start),
|
|
220
|
-
stop=str(period),
|
|
221
|
-
coint=True,
|
|
222
|
-
)
|
|
223
|
-
pairs["period"] = period
|
|
224
|
-
top_pairs.append(pairs)
|
|
225
|
-
top_pairs = pd.concat(top_pairs)
|
|
226
|
-
if len(top_pairs.columns) <= 1:
|
|
227
|
-
raise ValueError(
|
|
228
|
-
"No pairs found in the specified period."
|
|
229
|
-
"Please adjust the date range or increase the number of pairs."
|
|
230
|
-
)
|
|
231
|
-
return top_pairs.head(npairs * 2)
|
|
232
|
-
|
|
233
|
-
def _process_asset_data(securities, candidates, universe, rolling_window):
|
|
234
|
-
"""Process and select assets from the data."""
|
|
235
|
-
securities = select_assets(
|
|
236
|
-
securities, n=universe, rolling_window=rolling_window
|
|
237
|
-
)
|
|
238
|
-
candidates = select_assets(
|
|
239
|
-
candidates, n=universe, rolling_window=rolling_window
|
|
240
|
-
)
|
|
241
|
-
return securities, candidates
|
|
242
|
-
|
|
243
|
-
if (
|
|
244
|
-
securities is not None
|
|
245
|
-
and candidates is not None
|
|
246
|
-
and isinstance(securities, pd.DataFrame)
|
|
247
|
-
and isinstance(candidates, pd.DataFrame)
|
|
248
|
-
):
|
|
249
|
-
if isinstance(securities.index, pd.MultiIndex) and isinstance(
|
|
250
|
-
candidates.index, pd.MultiIndex
|
|
251
|
-
):
|
|
252
|
-
securities, candidates = _process_asset_data(
|
|
253
|
-
securities, candidates, universe, rolling_window
|
|
254
|
-
)
|
|
255
|
-
if period_search:
|
|
256
|
-
start = securities.index.get_level_values("date").min()
|
|
257
|
-
end = securities.index.get_level_values("date").max()
|
|
258
|
-
top_pairs = _period_search(start, end, securities, candidates)
|
|
259
|
-
else:
|
|
260
|
-
top_pairs = find_cointegrated_pairs(
|
|
261
|
-
securities, candidates, n=npairs, coint=True
|
|
262
|
-
)
|
|
263
|
-
if select:
|
|
264
|
-
return select_candidate_pairs(
|
|
265
|
-
top_pairs, period=True if period_search else False
|
|
266
|
-
)
|
|
267
|
-
else:
|
|
268
|
-
return top_pairs
|
|
269
|
-
|
|
270
|
-
elif source is not None:
|
|
271
|
-
if source not in ["yf", "mt5", "fmp", "eodhd"]:
|
|
272
|
-
raise ValueError("source must be either 'yf', 'mt5', 'fmp', or 'eodhd'")
|
|
273
|
-
if not isinstance(securities, list) or not isinstance(candidates, list):
|
|
274
|
-
raise ValueError("securities and candidates must be a list of tickers")
|
|
275
|
-
|
|
276
|
-
start, end = _handle_date_range(start, end, window)
|
|
277
|
-
if source in ["fmp", "eodhd"]:
|
|
278
|
-
kwargs[f"{source}_start"] = kwargs.get(f"{source}_start") or start
|
|
279
|
-
kwargs[f"{source}_end"] = kwargs.get(f"{source}_end") or end
|
|
280
|
-
|
|
281
|
-
securities_data = _download_and_process_data(
|
|
282
|
-
source, securities, start, end, tf, path, **kwargs
|
|
283
|
-
)
|
|
284
|
-
candidates_data = _download_and_process_data(
|
|
285
|
-
source, candidates, start, end, tf, path, **kwargs
|
|
286
|
-
)
|
|
287
|
-
securities_data = securities_data.set_index(["ticker", "date"])
|
|
288
|
-
candidates_data = candidates_data.set_index(["ticker", "date"])
|
|
289
|
-
securities_data, candidates_data = _process_asset_data(
|
|
290
|
-
securities_data, candidates_data, universe, rolling_window
|
|
291
|
-
)
|
|
292
|
-
if period_search:
|
|
293
|
-
top_pairs = _period_search(
|
|
294
|
-
start, end, securities_data, candidates_data
|
|
295
|
-
).head(npairs)
|
|
296
|
-
else:
|
|
297
|
-
top_pairs = find_cointegrated_pairs(
|
|
298
|
-
securities_data, candidates_data, n=npairs, coint=True
|
|
299
|
-
)
|
|
300
|
-
if select:
|
|
301
|
-
return select_candidate_pairs(
|
|
302
|
-
top_pairs, period=True if period_search else False
|
|
303
|
-
)
|
|
304
|
-
else:
|
|
305
|
-
return top_pairs
|
|
306
|
-
|
|
307
|
-
else:
|
|
308
|
-
msg = (
|
|
309
|
-
"Invalid input. Either provide securities"
|
|
310
|
-
"and candidates as DataFrames or specify a data source."
|
|
311
|
-
)
|
|
312
|
-
raise ValueError(msg)
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import yfinance as yf
|
|
6
|
+
|
|
7
|
+
from bbstrader.btengine.data import EODHDataHandler, FMPDataHandler
|
|
8
|
+
from bbstrader.metatrader.rates import download_historical_data
|
|
9
|
+
from bbstrader.tseries import (
|
|
10
|
+
find_cointegrated_pairs,
|
|
11
|
+
select_assets,
|
|
12
|
+
select_candidate_pairs,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"search_coint_candidate_pairs",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def search_coint_candidate_pairs(
|
|
21
|
+
securities: pd.DataFrame | List[str] = None,
|
|
22
|
+
candidates: pd.DataFrame | List[str] = None,
|
|
23
|
+
start: str = None,
|
|
24
|
+
end: str = None,
|
|
25
|
+
period_search: bool = False,
|
|
26
|
+
select: bool = True,
|
|
27
|
+
source: str = None,
|
|
28
|
+
universe: int = 100,
|
|
29
|
+
window: int = 2,
|
|
30
|
+
rolling_window: int = None,
|
|
31
|
+
npairs: int = 10,
|
|
32
|
+
tf: str = "D1",
|
|
33
|
+
path: str = None,
|
|
34
|
+
**kwargs,
|
|
35
|
+
) -> List[Dict[str, str]] | pd.DataFrame:
|
|
36
|
+
"""
|
|
37
|
+
Searches for candidate pairs of securities based on cointegration analysis.
|
|
38
|
+
|
|
39
|
+
This function either processes preloaded securities and candidates data
|
|
40
|
+
(as pandas DataFrames) or downloads historical data from a specified
|
|
41
|
+
source (e.g., Yahoo Finance, MetaTrader 5, Financial Modeling Prep, or EODHD).
|
|
42
|
+
It then selects the top `npairs` based on cointegration.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
securities (pd.DataFrame | List[str], optional):
|
|
46
|
+
A DataFrame or list of tickers representing the securities for analysis.
|
|
47
|
+
If using a DataFrame, it should include a MultiIndex with levels
|
|
48
|
+
['ticker', 'date'].
|
|
49
|
+
candidates (pd.DataFrame | List[str], optional):
|
|
50
|
+
A DataFrame or list of tickers representing the candidate securities
|
|
51
|
+
for pair selection.
|
|
52
|
+
start (str, optional):
|
|
53
|
+
The start date for data retrieval in 'YYYY-MM-DD' format. Ignored
|
|
54
|
+
if both `securities` and `candidates` are DataFrames.
|
|
55
|
+
end (str, optional):
|
|
56
|
+
The end date for data retrieval in 'YYYY-MM-DD' format. Ignored
|
|
57
|
+
if both `securities` and `candidates` are DataFrames.
|
|
58
|
+
period_search (bool, optional):
|
|
59
|
+
If True, the function will perform a periodic search for cointegrated from 3 years
|
|
60
|
+
to the end date by taking 2 yerars rolling window. So you need to have at least 3 years of data
|
|
61
|
+
or set the `window` parameter to 3. Defaults to False.
|
|
62
|
+
select (bool, optional):
|
|
63
|
+
If True, the function will select the top cointegrated pairs based on the
|
|
64
|
+
cointegration test results in form of List[dict].
|
|
65
|
+
If False, the function will return all cointegrated pairs in form of DataFrame.
|
|
66
|
+
This can be useful for further analysis or visualization.
|
|
67
|
+
source (str, optional):
|
|
68
|
+
The data source for historical data retrieval. Must be one of
|
|
69
|
+
['yf', 'mt5', 'fmp', 'eodhd']. Required if `securities` and
|
|
70
|
+
`candidates` are lists of tickers.
|
|
71
|
+
universe (int, optional):
|
|
72
|
+
The maximum number of assets to retain for analysis. Defaults to 100.
|
|
73
|
+
window (int, optional):
|
|
74
|
+
The number of years of historical data to retrieve if `start` and `end`
|
|
75
|
+
are not specified. Defaults to 2 years.
|
|
76
|
+
rolling_window (int, optional):
|
|
77
|
+
The size of the rolling window (in days) used for asset selection.
|
|
78
|
+
Defaults to None.
|
|
79
|
+
npairs (int, optional):
|
|
80
|
+
The number of top cointegrated pairs to select. Defaults to 10.
|
|
81
|
+
tf (str, optional):
|
|
82
|
+
The timeframe for MetaTrader 5 data retrieval. Defaults to 'D1'.
|
|
83
|
+
path (str, optional):
|
|
84
|
+
The path to MetaTrader 5 historical data files. Required if `source='mt5'`.
|
|
85
|
+
**kwargs:
|
|
86
|
+
Additional parameters for data retrieval (e.g., API keys, date ranges
|
|
87
|
+
for specific sources), see ``bbstrader.btengine.data.FMPDataHandler`` or
|
|
88
|
+
``bbstrader.btengine.data.EODHDataHandler`` for more details.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List[dict]: A list containing the selected top cointegrated pairs if `select=True`.
|
|
92
|
+
pd.DataFrame: A DataFrame containing all cointegrated pairs if `select=False`.
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
ValueError: If the inputs are invalid or if the `source` is not one of
|
|
96
|
+
the supported sources.
|
|
97
|
+
|
|
98
|
+
Examples:
|
|
99
|
+
Using preloaded DataFrames:
|
|
100
|
+
>>> securities = pd.read_csv('securities.csv', index_col=['ticker', 'date'])
|
|
101
|
+
>>> candidates = pd.read_csv('candidates.csv', index_col=['ticker', 'date'])
|
|
102
|
+
>>> pairs = search_candidate_pairs(securities=securities, candidates=candidates)
|
|
103
|
+
|
|
104
|
+
Using a data source (Yahoo Finance):
|
|
105
|
+
>>> securities = ['SPY', 'IWM', 'XLF', 'HYG', 'XLE', 'LQD', 'GDX', 'FXI', 'EWZ', ...]
|
|
106
|
+
>>> candidates = ['AAPL', 'AMZN', 'NVDA', 'MSFT', 'GOOGL', 'AMD', 'BAC', 'NFLX', ...]
|
|
107
|
+
|
|
108
|
+
>>> pairs = search_candidate_pairs(
|
|
109
|
+
... securities=securities,
|
|
110
|
+
... candidates=candidates,
|
|
111
|
+
... start='2022-12-12',
|
|
112
|
+
... end='2024-12-10',
|
|
113
|
+
... source='yf',
|
|
114
|
+
... npairs=10
|
|
115
|
+
... )
|
|
116
|
+
>>> [
|
|
117
|
+
... {'x': 'LQD', 'y': 'TMO'},
|
|
118
|
+
... {'x': 'IEF', 'y': 'COP'},
|
|
119
|
+
... {'x': 'WMT', 'y': 'IWM'},
|
|
120
|
+
... {'x': 'MDT', 'y': 'OIH'},
|
|
121
|
+
... {'x': 'EWZ', 'y': 'CMCSA'},
|
|
122
|
+
... {'x': 'VLO', 'y': 'XOP'},
|
|
123
|
+
... {'x': 'SHY', 'y': 'F'},
|
|
124
|
+
... {'x': 'ABT', 'y': 'LQD'},
|
|
125
|
+
... {'x': 'PFE', 'y': 'USO'},
|
|
126
|
+
... {'x': 'LQD', 'y': 'MDT'}
|
|
127
|
+
... ]
|
|
128
|
+
|
|
129
|
+
Using MetaTrader 5:
|
|
130
|
+
>>> securities = ['EURUSD', 'GBPUSD']
|
|
131
|
+
>>> candidates = ['USDJPY', 'AUDUSD']
|
|
132
|
+
>>> pairs = search_candidate_pairs(
|
|
133
|
+
... securities=securities,
|
|
134
|
+
... candidates=candidates,
|
|
135
|
+
... source='mt5',
|
|
136
|
+
... tf='H1',
|
|
137
|
+
... path='/path/to/terminal64.exe',
|
|
138
|
+
... )
|
|
139
|
+
|
|
140
|
+
Notes:
|
|
141
|
+
- If `securities` and `candidates` are DataFrames, the function assumes
|
|
142
|
+
the data is already preprocessed and indexed by ['ticker', 'date'].
|
|
143
|
+
- When using `source='fmp'` or `source='eodhd'`, API keys and other
|
|
144
|
+
required parameters should be passed via `kwargs`.
|
|
145
|
+
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def _download_and_process_data(source, tickers, start, end, tf, path, **kwargs):
|
|
149
|
+
"""Download and process data for a list of tickers from the specified source."""
|
|
150
|
+
data_list = []
|
|
151
|
+
for ticker in tickers:
|
|
152
|
+
try:
|
|
153
|
+
if source == "yf":
|
|
154
|
+
data = yf.download(
|
|
155
|
+
ticker,
|
|
156
|
+
start=start,
|
|
157
|
+
end=end,
|
|
158
|
+
progress=False,
|
|
159
|
+
multi_level_index=False,
|
|
160
|
+
)
|
|
161
|
+
data = data.drop(columns=["Adj Close"], axis=1)
|
|
162
|
+
elif source == "mt5":
|
|
163
|
+
start, end = pd.Timestamp(start), pd.Timestamp(end)
|
|
164
|
+
data = download_historical_data(
|
|
165
|
+
symbol=ticker,
|
|
166
|
+
timeframe=tf,
|
|
167
|
+
date_from=start,
|
|
168
|
+
date_to=end,
|
|
169
|
+
**{"path": path},
|
|
170
|
+
)
|
|
171
|
+
data = data.drop(columns=["adj_close"], axis=1)
|
|
172
|
+
elif source in ["fmp", "eodhd"]:
|
|
173
|
+
handler_class = (
|
|
174
|
+
FMPDataHandler if source == "fmp" else EODHDataHandler
|
|
175
|
+
)
|
|
176
|
+
handler = handler_class(events=None, symbol_list=[ticker], **kwargs)
|
|
177
|
+
data = handler.data[ticker]
|
|
178
|
+
else:
|
|
179
|
+
raise ValueError(f"Invalid source: {source}")
|
|
180
|
+
|
|
181
|
+
data = data.reset_index()
|
|
182
|
+
data = data.rename(columns=str.lower)
|
|
183
|
+
data["ticker"] = ticker
|
|
184
|
+
data_list.append(data)
|
|
185
|
+
|
|
186
|
+
except Exception as e:
|
|
187
|
+
print(f"No Data found for {ticker}: {e}")
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
return pd.concat(data_list)
|
|
191
|
+
|
|
192
|
+
def _handle_date_range(start, end, window):
|
|
193
|
+
"""Handle start and end date generation."""
|
|
194
|
+
if start is None or end is None:
|
|
195
|
+
end = pd.Timestamp(datetime.now()).strftime("%Y-%m-%d")
|
|
196
|
+
start = (
|
|
197
|
+
pd.Timestamp(datetime.now())
|
|
198
|
+
- pd.DateOffset(years=window)
|
|
199
|
+
+ pd.DateOffset(days=1)
|
|
200
|
+
).strftime("%Y-%m-%d")
|
|
201
|
+
return start, end
|
|
202
|
+
|
|
203
|
+
def _period_search(start, end, securities, candidates, npairs=npairs):
|
|
204
|
+
if window < 3 or (pd.Timestamp(end) - pd.Timestamp(start)).days / 365 < 3:
|
|
205
|
+
raise ValueError(
|
|
206
|
+
"The date range must be at least two (2) years for period search."
|
|
207
|
+
)
|
|
208
|
+
top_pairs = []
|
|
209
|
+
p_start = pd.Timestamp(end) - pd.DateOffset(years=1)
|
|
210
|
+
periods = pd.date_range(start=p_start, end=pd.Timestamp(end), freq="BQE")
|
|
211
|
+
npairs = max(round(npairs / 2), 1)
|
|
212
|
+
for period in periods:
|
|
213
|
+
s_start = period - pd.DateOffset(years=2) + pd.DateOffset(days=1)
|
|
214
|
+
print(f"Searching for pairs in period: {s_start} - {period}")
|
|
215
|
+
pairs = find_cointegrated_pairs(
|
|
216
|
+
securities,
|
|
217
|
+
candidates,
|
|
218
|
+
n=npairs,
|
|
219
|
+
start=str(s_start),
|
|
220
|
+
stop=str(period),
|
|
221
|
+
coint=True,
|
|
222
|
+
)
|
|
223
|
+
pairs["period"] = period
|
|
224
|
+
top_pairs.append(pairs)
|
|
225
|
+
top_pairs = pd.concat(top_pairs)
|
|
226
|
+
if len(top_pairs.columns) <= 1:
|
|
227
|
+
raise ValueError(
|
|
228
|
+
"No pairs found in the specified period."
|
|
229
|
+
"Please adjust the date range or increase the number of pairs."
|
|
230
|
+
)
|
|
231
|
+
return top_pairs.head(npairs * 2)
|
|
232
|
+
|
|
233
|
+
def _process_asset_data(securities, candidates, universe, rolling_window):
|
|
234
|
+
"""Process and select assets from the data."""
|
|
235
|
+
securities = select_assets(
|
|
236
|
+
securities, n=universe, rolling_window=rolling_window
|
|
237
|
+
)
|
|
238
|
+
candidates = select_assets(
|
|
239
|
+
candidates, n=universe, rolling_window=rolling_window
|
|
240
|
+
)
|
|
241
|
+
return securities, candidates
|
|
242
|
+
|
|
243
|
+
if (
|
|
244
|
+
securities is not None
|
|
245
|
+
and candidates is not None
|
|
246
|
+
and isinstance(securities, pd.DataFrame)
|
|
247
|
+
and isinstance(candidates, pd.DataFrame)
|
|
248
|
+
):
|
|
249
|
+
if isinstance(securities.index, pd.MultiIndex) and isinstance(
|
|
250
|
+
candidates.index, pd.MultiIndex
|
|
251
|
+
):
|
|
252
|
+
securities, candidates = _process_asset_data(
|
|
253
|
+
securities, candidates, universe, rolling_window
|
|
254
|
+
)
|
|
255
|
+
if period_search:
|
|
256
|
+
start = securities.index.get_level_values("date").min()
|
|
257
|
+
end = securities.index.get_level_values("date").max()
|
|
258
|
+
top_pairs = _period_search(start, end, securities, candidates)
|
|
259
|
+
else:
|
|
260
|
+
top_pairs = find_cointegrated_pairs(
|
|
261
|
+
securities, candidates, n=npairs, coint=True
|
|
262
|
+
)
|
|
263
|
+
if select:
|
|
264
|
+
return select_candidate_pairs(
|
|
265
|
+
top_pairs, period=True if period_search else False
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
return top_pairs
|
|
269
|
+
|
|
270
|
+
elif source is not None:
|
|
271
|
+
if source not in ["yf", "mt5", "fmp", "eodhd"]:
|
|
272
|
+
raise ValueError("source must be either 'yf', 'mt5', 'fmp', or 'eodhd'")
|
|
273
|
+
if not isinstance(securities, list) or not isinstance(candidates, list):
|
|
274
|
+
raise ValueError("securities and candidates must be a list of tickers")
|
|
275
|
+
|
|
276
|
+
start, end = _handle_date_range(start, end, window)
|
|
277
|
+
if source in ["fmp", "eodhd"]:
|
|
278
|
+
kwargs[f"{source}_start"] = kwargs.get(f"{source}_start") or start
|
|
279
|
+
kwargs[f"{source}_end"] = kwargs.get(f"{source}_end") or end
|
|
280
|
+
|
|
281
|
+
securities_data = _download_and_process_data(
|
|
282
|
+
source, securities, start, end, tf, path, **kwargs
|
|
283
|
+
)
|
|
284
|
+
candidates_data = _download_and_process_data(
|
|
285
|
+
source, candidates, start, end, tf, path, **kwargs
|
|
286
|
+
)
|
|
287
|
+
securities_data = securities_data.set_index(["ticker", "date"])
|
|
288
|
+
candidates_data = candidates_data.set_index(["ticker", "date"])
|
|
289
|
+
securities_data, candidates_data = _process_asset_data(
|
|
290
|
+
securities_data, candidates_data, universe, rolling_window
|
|
291
|
+
)
|
|
292
|
+
if period_search:
|
|
293
|
+
top_pairs = _period_search(
|
|
294
|
+
start, end, securities_data, candidates_data
|
|
295
|
+
).head(npairs)
|
|
296
|
+
else:
|
|
297
|
+
top_pairs = find_cointegrated_pairs(
|
|
298
|
+
securities_data, candidates_data, n=npairs, coint=True
|
|
299
|
+
)
|
|
300
|
+
if select:
|
|
301
|
+
return select_candidate_pairs(
|
|
302
|
+
top_pairs, period=True if period_search else False
|
|
303
|
+
)
|
|
304
|
+
else:
|
|
305
|
+
return top_pairs
|
|
306
|
+
|
|
307
|
+
else:
|
|
308
|
+
msg = (
|
|
309
|
+
"Invalid input. Either provide securities"
|
|
310
|
+
"and candidates as DataFrames or specify a data source."
|
|
311
|
+
)
|
|
312
|
+
raise ValueError(msg)
|