BackcastPro 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of BackcastPro might be problematic. Click here for more details.
- BackcastPro/__init__.py +90 -0
- BackcastPro/_plotting.py +785 -0
- BackcastPro/_stats.py +212 -0
- BackcastPro/_util.py +337 -0
- BackcastPro/backtesting.py +1763 -0
- BackcastPro/lib.py +646 -0
- BackcastPro/test/__init__.py +29 -0
- BackcastPro/test/__main__.py +7 -0
- BackcastPro/test/_test.py +1174 -0
- backcastpro-0.0.2.dist-info/METADATA +53 -0
- backcastpro-0.0.2.dist-info/RECORD +13 -0
- BackcastPro/example.py +0 -2
- backcastpro-0.0.1.dist-info/METADATA +0 -18
- backcastpro-0.0.1.dist-info/RECORD +0 -6
- {backcastpro-0.0.1.dist-info → backcastpro-0.0.2.dist-info}/WHEEL +0 -0
- {backcastpro-0.0.1.dist-info → backcastpro-0.0.2.dist-info}/top_level.txt +0 -0
BackcastPro/_stats.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, List, Union, cast
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from ._util import _data_period, _indicator_warmup_nbars
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .backtesting import Strategy, Trade
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def compute_drawdown_duration_peaks(dd: pd.Series):
|
|
15
|
+
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
|
|
16
|
+
iloc = pd.Series(iloc, index=dd.index[iloc])
|
|
17
|
+
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
|
|
18
|
+
df = df[df['iloc'] > df['prev'] + 1].astype(np.int64)
|
|
19
|
+
|
|
20
|
+
# If no drawdown since no trade, avoid below for pandas sake and return nan series
|
|
21
|
+
if not len(df):
|
|
22
|
+
return (dd.replace(0, np.nan),) * 2
|
|
23
|
+
|
|
24
|
+
df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
|
|
25
|
+
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
|
|
26
|
+
df = df.reindex(dd.index)
|
|
27
|
+
return df['duration'], df['peak_dd']
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def geometric_mean(returns: pd.Series) -> float:
|
|
31
|
+
returns = returns.fillna(0) + 1
|
|
32
|
+
if np.any(returns <= 0):
|
|
33
|
+
return 0
|
|
34
|
+
return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def compute_stats(
|
|
38
|
+
trades: Union[List['Trade'], pd.DataFrame],
|
|
39
|
+
equity: np.ndarray,
|
|
40
|
+
ohlc_data: pd.DataFrame,
|
|
41
|
+
strategy_instance: Strategy | None,
|
|
42
|
+
risk_free_rate: float = 0,
|
|
43
|
+
) -> pd.Series:
|
|
44
|
+
assert -1 < risk_free_rate < 1
|
|
45
|
+
|
|
46
|
+
index = ohlc_data.index
|
|
47
|
+
dd = 1 - equity / np.maximum.accumulate(equity)
|
|
48
|
+
dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
|
|
49
|
+
|
|
50
|
+
equity_df = pd.DataFrame({
|
|
51
|
+
'Equity': equity,
|
|
52
|
+
'DrawdownPct': dd,
|
|
53
|
+
'DrawdownDuration': dd_dur},
|
|
54
|
+
index=index)
|
|
55
|
+
|
|
56
|
+
if isinstance(trades, pd.DataFrame):
|
|
57
|
+
trades_df: pd.DataFrame = trades
|
|
58
|
+
commissions = None # Not shown
|
|
59
|
+
else:
|
|
60
|
+
# Came straight from Backtest.run()
|
|
61
|
+
trades_df = pd.DataFrame({
|
|
62
|
+
'Size': [t.size for t in trades],
|
|
63
|
+
'EntryBar': [t.entry_bar for t in trades],
|
|
64
|
+
'ExitBar': [t.exit_bar for t in trades],
|
|
65
|
+
'EntryPrice': [t.entry_price for t in trades],
|
|
66
|
+
'ExitPrice': [t.exit_price for t in trades],
|
|
67
|
+
'SL': [t.sl for t in trades],
|
|
68
|
+
'TP': [t.tp for t in trades],
|
|
69
|
+
'PnL': [t.pl for t in trades],
|
|
70
|
+
'Commission': [t._commissions for t in trades],
|
|
71
|
+
'ReturnPct': [t.pl_pct for t in trades],
|
|
72
|
+
'EntryTime': [t.entry_time for t in trades],
|
|
73
|
+
'ExitTime': [t.exit_time for t in trades],
|
|
74
|
+
})
|
|
75
|
+
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
|
|
76
|
+
trades_df['Tag'] = [t.tag for t in trades]
|
|
77
|
+
|
|
78
|
+
# Add indicator values
|
|
79
|
+
if len(trades_df) and strategy_instance:
|
|
80
|
+
for ind in strategy_instance._indicators:
|
|
81
|
+
ind = np.atleast_2d(ind)
|
|
82
|
+
for i, values in enumerate(ind): # multi-d indicators
|
|
83
|
+
suffix = f'_{i}' if len(ind) > 1 else ''
|
|
84
|
+
trades_df[f'Entry_{ind.name}{suffix}'] = values[trades_df['EntryBar'].values]
|
|
85
|
+
trades_df[f'Exit_{ind.name}{suffix}'] = values[trades_df['ExitBar'].values]
|
|
86
|
+
|
|
87
|
+
commissions = sum(t._commissions for t in trades)
|
|
88
|
+
del trades
|
|
89
|
+
|
|
90
|
+
pl = trades_df['PnL']
|
|
91
|
+
returns = trades_df['ReturnPct']
|
|
92
|
+
durations = trades_df['Duration']
|
|
93
|
+
|
|
94
|
+
def _round_timedelta(value, _period=_data_period(index)):
|
|
95
|
+
if not isinstance(value, pd.Timedelta):
|
|
96
|
+
return value
|
|
97
|
+
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
|
|
98
|
+
return value.ceil(resolution)
|
|
99
|
+
|
|
100
|
+
s = pd.Series(dtype=object)
|
|
101
|
+
s.loc['Start'] = index[0]
|
|
102
|
+
s.loc['End'] = index[-1]
|
|
103
|
+
s.loc['Duration'] = s.End - s.Start
|
|
104
|
+
|
|
105
|
+
have_position = np.repeat(0, len(index))
|
|
106
|
+
for t in trades_df.itertuples(index=False):
|
|
107
|
+
have_position[t.EntryBar:t.ExitBar + 1] = 1
|
|
108
|
+
|
|
109
|
+
s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
|
|
110
|
+
s.loc['Equity Final [$]'] = equity[-1]
|
|
111
|
+
s.loc['Equity Peak [$]'] = equity.max()
|
|
112
|
+
if commissions:
|
|
113
|
+
s.loc['Commissions [$]'] = commissions
|
|
114
|
+
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
|
|
115
|
+
first_trading_bar = _indicator_warmup_nbars(strategy_instance)
|
|
116
|
+
c = ohlc_data.Close.values
|
|
117
|
+
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[first_trading_bar]) / c[first_trading_bar] * 100 # long-only return
|
|
118
|
+
|
|
119
|
+
gmean_day_return: float = 0
|
|
120
|
+
day_returns = np.array(np.nan)
|
|
121
|
+
annual_trading_days = np.nan
|
|
122
|
+
is_datetime_index = isinstance(index, pd.DatetimeIndex)
|
|
123
|
+
if is_datetime_index:
|
|
124
|
+
freq_days = cast(pd.Timedelta, _data_period(index)).days
|
|
125
|
+
have_weekends = index.dayofweek.to_series().between(5, 6).mean() > 2 / 7 * .6
|
|
126
|
+
annual_trading_days = (
|
|
127
|
+
52 if freq_days == 7 else
|
|
128
|
+
12 if freq_days == 31 else
|
|
129
|
+
1 if freq_days == 365 else
|
|
130
|
+
(365 if have_weekends else 252))
|
|
131
|
+
freq = {7: 'W', 31: 'ME', 365: 'YE'}.get(freq_days, 'D')
|
|
132
|
+
day_returns = equity_df['Equity'].resample(freq).last().dropna().pct_change()
|
|
133
|
+
gmean_day_return = geometric_mean(day_returns)
|
|
134
|
+
|
|
135
|
+
# Annualized return and risk metrics are computed based on the (mostly correct)
|
|
136
|
+
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
|
|
137
|
+
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
|
|
138
|
+
# our risk doesn't; they use the simpler approach below.
|
|
139
|
+
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
|
|
140
|
+
s.loc['Return (Ann.) [%]'] = annualized_return * 100
|
|
141
|
+
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2 * annual_trading_days)) * 100 # noqa: E501
|
|
142
|
+
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
|
|
143
|
+
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
|
|
144
|
+
if is_datetime_index:
|
|
145
|
+
time_in_years = (s.loc['Duration'].days + s.loc['Duration'].seconds / 86400) / annual_trading_days
|
|
146
|
+
s.loc['CAGR [%]'] = ((s.loc['Equity Final [$]'] / equity[0])**(1 / time_in_years) - 1) * 100 if time_in_years else np.nan # noqa: E501
|
|
147
|
+
|
|
148
|
+
# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
|
|
149
|
+
# and simple standard deviation
|
|
150
|
+
s.loc['Sharpe Ratio'] = (s.loc['Return (Ann.) [%]'] - risk_free_rate * 100) / (s.loc['Volatility (Ann.) [%]'] or np.nan) # noqa: E501
|
|
151
|
+
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
|
|
152
|
+
with np.errstate(divide='ignore'):
|
|
153
|
+
s.loc['Sortino Ratio'] = (annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)) # noqa: E501
|
|
154
|
+
max_dd = -np.nan_to_num(dd.max())
|
|
155
|
+
s.loc['Calmar Ratio'] = annualized_return / (-max_dd or np.nan)
|
|
156
|
+
equity_log_returns = np.log(equity[1:] / equity[:-1])
|
|
157
|
+
market_log_returns = np.log(c[1:] / c[:-1])
|
|
158
|
+
beta = np.nan
|
|
159
|
+
if len(equity_log_returns) > 1 and len(market_log_returns) > 1:
|
|
160
|
+
# len == 0 on dummy call `stats_keys = compute_stats(...)` pre optimization
|
|
161
|
+
cov_matrix = np.cov(equity_log_returns, market_log_returns)
|
|
162
|
+
beta = cov_matrix[0, 1] / cov_matrix[1, 1]
|
|
163
|
+
# Jensen CAPM Alpha: can be strongly positive when beta is negative and B&H Return is large
|
|
164
|
+
s.loc['Alpha [%]'] = s.loc['Return [%]'] - risk_free_rate * 100 - beta * (s.loc['Buy & Hold Return [%]'] - risk_free_rate * 100) # noqa: E501
|
|
165
|
+
s.loc['Beta'] = beta
|
|
166
|
+
s.loc['Max. Drawdown [%]'] = max_dd * 100
|
|
167
|
+
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
|
|
168
|
+
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
|
|
169
|
+
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
|
|
170
|
+
s.loc['# Trades'] = n_trades = len(trades_df)
|
|
171
|
+
win_rate = np.nan if not n_trades else (pl > 0).mean()
|
|
172
|
+
s.loc['Win Rate [%]'] = win_rate * 100
|
|
173
|
+
s.loc['Best Trade [%]'] = returns.max() * 100
|
|
174
|
+
s.loc['Worst Trade [%]'] = returns.min() * 100
|
|
175
|
+
mean_return = geometric_mean(returns)
|
|
176
|
+
s.loc['Avg. Trade [%]'] = mean_return * 100
|
|
177
|
+
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
|
|
178
|
+
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
|
|
179
|
+
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
|
|
180
|
+
s.loc['Expectancy [%]'] = returns.mean() * 100
|
|
181
|
+
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)
|
|
182
|
+
s.loc['Kelly Criterion'] = win_rate - (1 - win_rate) / (pl[pl > 0].mean() / -pl[pl < 0].mean())
|
|
183
|
+
|
|
184
|
+
s.loc['_strategy'] = strategy_instance
|
|
185
|
+
s.loc['_equity_curve'] = equity_df
|
|
186
|
+
s.loc['_trades'] = trades_df
|
|
187
|
+
|
|
188
|
+
s = _Stats(s)
|
|
189
|
+
return s
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class _Stats(pd.Series):
|
|
193
|
+
def __repr__(self):
|
|
194
|
+
with pd.option_context(
|
|
195
|
+
'display.max_colwidth', 20, # Prevent expansion due to _equity and _trades dfs
|
|
196
|
+
'display.max_rows', len(self), # Reveal self whole
|
|
197
|
+
'display.precision', 5, # Enough for my eyes at least
|
|
198
|
+
# 'format.na_rep', '--', # TODO: Enable once it works
|
|
199
|
+
):
|
|
200
|
+
return super().__repr__()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def dummy_stats():
|
|
204
|
+
from .backtesting import Trade, _Broker
|
|
205
|
+
index = pd.DatetimeIndex(['2025'])
|
|
206
|
+
data = pd.DataFrame({col: [np.nan] for col in ('Close',)}, index=index)
|
|
207
|
+
trade = Trade(_Broker(data=data, cash=10000, spread=.01, commission=.01, margin=.1,
|
|
208
|
+
trade_on_close=True, hedging=True, exclusive_orders=False, index=index),
|
|
209
|
+
1, 1, 0, None)
|
|
210
|
+
trade._replace(exit_price=1, exit_bar=0)
|
|
211
|
+
trade._commissions = np.nan
|
|
212
|
+
return compute_stats([trade], np.r_[[np.nan]], data, None, 0)
|
BackcastPro/_util.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import warnings
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from functools import partial
|
|
8
|
+
from itertools import chain
|
|
9
|
+
from multiprocessing import resource_tracker as _mprt
|
|
10
|
+
from multiprocessing import shared_memory as _mpshm
|
|
11
|
+
from numbers import Number
|
|
12
|
+
from threading import Lock
|
|
13
|
+
from typing import Dict, List, Optional, Sequence, Union, cast
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from tqdm.auto import tqdm as _tqdm
|
|
20
|
+
_tqdm = partial(_tqdm, leave=False)
|
|
21
|
+
except ImportError:
|
|
22
|
+
def _tqdm(seq, **_):
|
|
23
|
+
return seq
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def try_(lazy_func, default=None, exception=Exception):
|
|
27
|
+
try:
|
|
28
|
+
return lazy_func()
|
|
29
|
+
except exception:
|
|
30
|
+
return default
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@contextmanager
|
|
34
|
+
def patch(obj, attr, newvalue):
|
|
35
|
+
had_attr = hasattr(obj, attr)
|
|
36
|
+
orig_value = getattr(obj, attr, None)
|
|
37
|
+
setattr(obj, attr, newvalue)
|
|
38
|
+
try:
|
|
39
|
+
yield
|
|
40
|
+
finally:
|
|
41
|
+
if had_attr:
|
|
42
|
+
setattr(obj, attr, orig_value)
|
|
43
|
+
else:
|
|
44
|
+
delattr(obj, attr)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _as_str(value) -> str:
|
|
48
|
+
if isinstance(value, (Number, str)):
|
|
49
|
+
return str(value)
|
|
50
|
+
if isinstance(value, pd.DataFrame):
|
|
51
|
+
return 'df'
|
|
52
|
+
name = str(getattr(value, 'name', '') or '')
|
|
53
|
+
if name in ('Open', 'High', 'Low', 'Close', 'Volume'):
|
|
54
|
+
return name[:1]
|
|
55
|
+
if callable(value):
|
|
56
|
+
name = getattr(value, '__name__', value.__class__.__name__).replace('<lambda>', 'λ')
|
|
57
|
+
if len(name) > 10:
|
|
58
|
+
name = name[:9] + '…'
|
|
59
|
+
return name
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _as_list(value) -> List:
|
|
63
|
+
if isinstance(value, Sequence) and not isinstance(value, str):
|
|
64
|
+
return list(value)
|
|
65
|
+
return [value]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _batch(seq):
|
|
69
|
+
# XXX: Replace with itertools.batched
|
|
70
|
+
n = np.clip(int(len(seq) // (os.cpu_count() or 1)), 1, 300)
|
|
71
|
+
for i in range(0, len(seq), n):
|
|
72
|
+
yield seq[i:i + n]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _data_period(index) -> Union[pd.Timedelta, Number]:
|
|
76
|
+
"""Return data index period as pd.Timedelta"""
|
|
77
|
+
values = pd.Series(index[-100:])
|
|
78
|
+
return values.diff().dropna().median()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _strategy_indicators(strategy):
|
|
82
|
+
return {attr: indicator
|
|
83
|
+
for attr, indicator in strategy.__dict__.items()
|
|
84
|
+
if isinstance(indicator, _Indicator)}.items()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _indicator_warmup_nbars(strategy):
|
|
88
|
+
if strategy is None:
|
|
89
|
+
return 0
|
|
90
|
+
nbars = max((np.isnan(indicator.astype(float)).argmin(axis=-1).max()
|
|
91
|
+
for _, indicator in _strategy_indicators(strategy)
|
|
92
|
+
if not indicator._opts['scatter']), default=0)
|
|
93
|
+
return nbars
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class _Array(np.ndarray):
|
|
97
|
+
"""
|
|
98
|
+
ndarray extended to supply .name and other arbitrary properties
|
|
99
|
+
in ._opts dict.
|
|
100
|
+
"""
|
|
101
|
+
def __new__(cls, array, *, name=None, **kwargs):
|
|
102
|
+
obj = np.asarray(array).view(cls)
|
|
103
|
+
obj.name = name or array.name
|
|
104
|
+
obj._opts = kwargs
|
|
105
|
+
return obj
|
|
106
|
+
|
|
107
|
+
def __array_finalize__(self, obj):
|
|
108
|
+
if obj is not None:
|
|
109
|
+
self.name = getattr(obj, 'name', '')
|
|
110
|
+
self._opts = getattr(obj, '_opts', {})
|
|
111
|
+
|
|
112
|
+
# Make sure properties name and _opts are carried over
|
|
113
|
+
# when (un-)pickling.
|
|
114
|
+
def __reduce__(self):
|
|
115
|
+
value = super().__reduce__()
|
|
116
|
+
return value[:2] + (value[2] + (self.__dict__,),)
|
|
117
|
+
|
|
118
|
+
def __setstate__(self, state):
|
|
119
|
+
self.__dict__.update(state[-1])
|
|
120
|
+
super().__setstate__(state[:-1])
|
|
121
|
+
|
|
122
|
+
def __bool__(self):
|
|
123
|
+
try:
|
|
124
|
+
return bool(self[-1])
|
|
125
|
+
except IndexError:
|
|
126
|
+
return super().__bool__()
|
|
127
|
+
|
|
128
|
+
def __float__(self):
|
|
129
|
+
try:
|
|
130
|
+
return float(self[-1])
|
|
131
|
+
except IndexError:
|
|
132
|
+
return super().__float__()
|
|
133
|
+
|
|
134
|
+
def to_series(self):
|
|
135
|
+
warnings.warn("`.to_series()` is deprecated. For pd.Series conversion, use accessor `.s`")
|
|
136
|
+
return self.s
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def s(self) -> pd.Series:
|
|
140
|
+
values = np.atleast_2d(self)
|
|
141
|
+
index = self._opts['index'][:values.shape[1]]
|
|
142
|
+
return pd.Series(values[0], index=index, name=self.name)
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def df(self) -> pd.DataFrame:
|
|
146
|
+
values = np.atleast_2d(np.asarray(self))
|
|
147
|
+
index = self._opts['index'][:values.shape[1]]
|
|
148
|
+
df = pd.DataFrame(values.T, index=index, columns=[self.name] * len(values))
|
|
149
|
+
return df
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class _Indicator(_Array):
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class _Data:
|
|
157
|
+
"""
|
|
158
|
+
A data array accessor. Provides access to OHLCV "columns"
|
|
159
|
+
as a standard `pd.DataFrame` would, except it's not a DataFrame
|
|
160
|
+
and the returned "series" are _not_ `pd.Series` but `np.ndarray`
|
|
161
|
+
for performance reasons.
|
|
162
|
+
"""
|
|
163
|
+
def __init__(self, df: pd.DataFrame):
|
|
164
|
+
self.__df = df
|
|
165
|
+
self.__len = len(df) # Current length
|
|
166
|
+
self.__pip: Optional[float] = None
|
|
167
|
+
self.__cache: Dict[str, _Array] = {}
|
|
168
|
+
self.__arrays: Dict[str, _Array] = {}
|
|
169
|
+
self._update()
|
|
170
|
+
|
|
171
|
+
def __getitem__(self, item):
|
|
172
|
+
return self.__get_array(item)
|
|
173
|
+
|
|
174
|
+
def __getattr__(self, item):
|
|
175
|
+
try:
|
|
176
|
+
return self.__get_array(item)
|
|
177
|
+
except KeyError:
|
|
178
|
+
raise AttributeError(f"Column '{item}' not in data") from None
|
|
179
|
+
|
|
180
|
+
def _set_length(self, length):
|
|
181
|
+
self.__len = length
|
|
182
|
+
self.__cache.clear()
|
|
183
|
+
|
|
184
|
+
def _update(self):
|
|
185
|
+
index = self.__df.index.copy()
|
|
186
|
+
self.__arrays = {col: _Array(arr, index=index)
|
|
187
|
+
for col, arr in self.__df.items()}
|
|
188
|
+
# Leave index as Series because pd.Timestamp nicer API to work with
|
|
189
|
+
self.__arrays['__index'] = index
|
|
190
|
+
|
|
191
|
+
def __repr__(self):
|
|
192
|
+
i = min(self.__len, len(self.__df)) - 1
|
|
193
|
+
index = self.__arrays['__index'][i]
|
|
194
|
+
items = ', '.join(f'{k}={v}' for k, v in self.__df.iloc[i].items())
|
|
195
|
+
return f'<Data i={i} ({index}) {items}>'
|
|
196
|
+
|
|
197
|
+
def __len__(self):
|
|
198
|
+
return self.__len
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def df(self) -> pd.DataFrame:
|
|
202
|
+
return (self.__df.iloc[:self.__len]
|
|
203
|
+
if self.__len < len(self.__df)
|
|
204
|
+
else self.__df)
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def pip(self) -> float:
|
|
208
|
+
if self.__pip is None:
|
|
209
|
+
self.__pip = float(10**-np.median([len(s.partition('.')[-1])
|
|
210
|
+
for s in self.__arrays['Close'].astype(str)]))
|
|
211
|
+
return self.__pip
|
|
212
|
+
|
|
213
|
+
def __get_array(self, key) -> _Array:
|
|
214
|
+
arr = self.__cache.get(key)
|
|
215
|
+
if arr is None:
|
|
216
|
+
arr = self.__cache[key] = cast(_Array, self.__arrays[key][:self.__len])
|
|
217
|
+
return arr
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def Open(self) -> _Array:
|
|
221
|
+
return self.__get_array('Open')
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def High(self) -> _Array:
|
|
225
|
+
return self.__get_array('High')
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def Low(self) -> _Array:
|
|
229
|
+
return self.__get_array('Low')
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def Close(self) -> _Array:
|
|
233
|
+
return self.__get_array('Close')
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def Volume(self) -> _Array:
|
|
237
|
+
return self.__get_array('Volume')
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def index(self) -> pd.DatetimeIndex:
|
|
241
|
+
return self.__get_array('__index')
|
|
242
|
+
|
|
243
|
+
# Make pickling in Backtest.optimize() work with our catch-all __getattr__
|
|
244
|
+
def __getstate__(self):
|
|
245
|
+
return self.__dict__
|
|
246
|
+
|
|
247
|
+
def __setstate__(self, state):
|
|
248
|
+
self.__dict__ = state
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
if sys.version_info >= (3, 13):
|
|
252
|
+
SharedMemory = _mpshm.SharedMemory
|
|
253
|
+
else:
|
|
254
|
+
class SharedMemory(_mpshm.SharedMemory):
|
|
255
|
+
# From https://github.com/python/cpython/issues/82300#issuecomment-2169035092
|
|
256
|
+
__lock = Lock()
|
|
257
|
+
|
|
258
|
+
def __init__(self, *args, track: bool = True, **kwargs):
|
|
259
|
+
self._track = track
|
|
260
|
+
if track:
|
|
261
|
+
return super().__init__(*args, **kwargs)
|
|
262
|
+
with self.__lock:
|
|
263
|
+
with patch(_mprt, 'register', lambda *a, **kw: None):
|
|
264
|
+
super().__init__(*args, **kwargs)
|
|
265
|
+
|
|
266
|
+
def unlink(self):
|
|
267
|
+
if _mpshm._USE_POSIX and self._name:
|
|
268
|
+
_mpshm._posixshmem.shm_unlink(self._name)
|
|
269
|
+
if self._track:
|
|
270
|
+
_mprt.unregister(self._name, "shared_memory")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class SharedMemoryManager:
|
|
274
|
+
"""
|
|
275
|
+
A simple shared memory contextmanager based on
|
|
276
|
+
https://docs.python.org/3/library/multiprocessing.shared_memory.html#multiprocessing.shared_memory.SharedMemory
|
|
277
|
+
"""
|
|
278
|
+
def __init__(self, create=False) -> None:
|
|
279
|
+
self._shms: list[SharedMemory] = []
|
|
280
|
+
self.__create = create
|
|
281
|
+
|
|
282
|
+
def SharedMemory(self, *, name=None, create=False, size=0, track=True):
|
|
283
|
+
shm = SharedMemory(name=name, create=create, size=size, track=track)
|
|
284
|
+
shm._create = create
|
|
285
|
+
# Essential to keep refs on Windows
|
|
286
|
+
# https://stackoverflow.com/questions/74193377/filenotfounderror-when-passing-a-shared-memory-to-a-new-process#comment130999060_74194875 # noqa: E501
|
|
287
|
+
self._shms.append(shm)
|
|
288
|
+
return shm
|
|
289
|
+
|
|
290
|
+
def __enter__(self):
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
def __exit__(self, *args, **kwargs):
|
|
294
|
+
for shm in self._shms:
|
|
295
|
+
try:
|
|
296
|
+
shm.close()
|
|
297
|
+
if shm._create:
|
|
298
|
+
shm.unlink()
|
|
299
|
+
except Exception:
|
|
300
|
+
warnings.warn(f'Failed to unlink shared memory {shm.name!r}',
|
|
301
|
+
category=ResourceWarning, stacklevel=2)
|
|
302
|
+
raise
|
|
303
|
+
|
|
304
|
+
def arr2shm(self, vals):
|
|
305
|
+
"""Array to shared memory. Returns (shm_name, shape, dtype) used for restore."""
|
|
306
|
+
assert vals.ndim == 1, (vals.ndim, vals.shape, vals)
|
|
307
|
+
shm = self.SharedMemory(size=vals.nbytes, create=True)
|
|
308
|
+
# np.array can't handle pandas' tz-aware datetimes
|
|
309
|
+
# https://github.com/numpy/numpy/issues/18279
|
|
310
|
+
buf = np.ndarray(vals.shape, dtype=vals.dtype.base, buffer=shm.buf)
|
|
311
|
+
has_tz = getattr(vals.dtype, 'tz', None)
|
|
312
|
+
buf[:] = vals.tz_localize(None) if has_tz else vals # Copy into shared memory
|
|
313
|
+
return shm.name, vals.shape, vals.dtype
|
|
314
|
+
|
|
315
|
+
def df2shm(self, df):
|
|
316
|
+
return tuple((
|
|
317
|
+
(column, *self.arr2shm(values))
|
|
318
|
+
for column, values in chain([(self._DF_INDEX_COL, df.index)], df.items())
|
|
319
|
+
))
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def shm2s(shm, shape, dtype) -> pd.Series:
|
|
323
|
+
arr = np.ndarray(shape, dtype=dtype.base, buffer=shm.buf)
|
|
324
|
+
arr.setflags(write=False)
|
|
325
|
+
return pd.Series(arr, dtype=dtype)
|
|
326
|
+
|
|
327
|
+
_DF_INDEX_COL = '__bt_index'
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
def shm2df(data_shm):
|
|
331
|
+
shm = [SharedMemory(name=name, create=False, track=False) for _, name, _, _ in data_shm]
|
|
332
|
+
df = pd.DataFrame({
|
|
333
|
+
col: SharedMemoryManager.shm2s(shm, shape, dtype)
|
|
334
|
+
for shm, (col, _, shape, dtype) in zip(shm, data_shm)})
|
|
335
|
+
df.set_index(SharedMemoryManager._DF_INDEX_COL, drop=True, inplace=True)
|
|
336
|
+
df.index.name = None
|
|
337
|
+
return df, shm
|