BackcastPro 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of BackcastPro might be problematic. Click here for more details.

BackcastPro/_stats.py CHANGED
@@ -1,212 +1,169 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, List, Union, cast
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
- from ._util import _data_period, _indicator_warmup_nbars
9
-
10
- if TYPE_CHECKING:
11
- from .backtesting import Strategy, Trade
12
-
13
-
14
- def compute_drawdown_duration_peaks(dd: pd.Series):
15
- iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
16
- iloc = pd.Series(iloc, index=dd.index[iloc])
17
- df = iloc.to_frame('iloc').assign(prev=iloc.shift())
18
- df = df[df['iloc'] > df['prev'] + 1].astype(np.int64)
19
-
20
- # If no drawdown since no trade, avoid below for pandas sake and return nan series
21
- if not len(df):
22
- return (dd.replace(0, np.nan),) * 2
23
-
24
- df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
25
- df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
26
- df = df.reindex(dd.index)
27
- return df['duration'], df['peak_dd']
28
-
29
-
30
- def geometric_mean(returns: pd.Series) -> float:
31
- returns = returns.fillna(0) + 1
32
- if np.any(returns <= 0):
33
- return 0
34
- return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1
35
-
36
-
37
- def compute_stats(
38
- trades: Union[List['Trade'], pd.DataFrame],
39
- equity: np.ndarray,
40
- ohlc_data: pd.DataFrame,
41
- strategy_instance: Strategy | None,
42
- risk_free_rate: float = 0,
43
- ) -> pd.Series:
44
- assert -1 < risk_free_rate < 1
45
-
46
- index = ohlc_data.index
47
- dd = 1 - equity / np.maximum.accumulate(equity)
48
- dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
49
-
50
- equity_df = pd.DataFrame({
51
- 'Equity': equity,
52
- 'DrawdownPct': dd,
53
- 'DrawdownDuration': dd_dur},
54
- index=index)
55
-
56
- if isinstance(trades, pd.DataFrame):
57
- trades_df: pd.DataFrame = trades
58
- commissions = None # Not shown
59
- else:
60
- # Came straight from Backtest.run()
61
- trades_df = pd.DataFrame({
62
- 'Size': [t.size for t in trades],
63
- 'EntryBar': [t.entry_bar for t in trades],
64
- 'ExitBar': [t.exit_bar for t in trades],
65
- 'EntryPrice': [t.entry_price for t in trades],
66
- 'ExitPrice': [t.exit_price for t in trades],
67
- 'SL': [t.sl for t in trades],
68
- 'TP': [t.tp for t in trades],
69
- 'PnL': [t.pl for t in trades],
70
- 'Commission': [t._commissions for t in trades],
71
- 'ReturnPct': [t.pl_pct for t in trades],
72
- 'EntryTime': [t.entry_time for t in trades],
73
- 'ExitTime': [t.exit_time for t in trades],
74
- })
75
- trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
76
- trades_df['Tag'] = [t.tag for t in trades]
77
-
78
- # Add indicator values
79
- if len(trades_df) and strategy_instance:
80
- for ind in strategy_instance._indicators:
81
- ind = np.atleast_2d(ind)
82
- for i, values in enumerate(ind): # multi-d indicators
83
- suffix = f'_{i}' if len(ind) > 1 else ''
84
- trades_df[f'Entry_{ind.name}{suffix}'] = values[trades_df['EntryBar'].values]
85
- trades_df[f'Exit_{ind.name}{suffix}'] = values[trades_df['ExitBar'].values]
86
-
87
- commissions = sum(t._commissions for t in trades)
88
- del trades
89
-
90
- pl = trades_df['PnL']
91
- returns = trades_df['ReturnPct']
92
- durations = trades_df['Duration']
93
-
94
- def _round_timedelta(value, _period=_data_period(index)):
95
- if not isinstance(value, pd.Timedelta):
96
- return value
97
- resolution = getattr(_period, 'resolution_string', None) or _period.resolution
98
- return value.ceil(resolution)
99
-
100
- s = pd.Series(dtype=object)
101
- s.loc['Start'] = index[0]
102
- s.loc['End'] = index[-1]
103
- s.loc['Duration'] = s.End - s.Start
104
-
105
- have_position = np.repeat(0, len(index))
106
- for t in trades_df.itertuples(index=False):
107
- have_position[t.EntryBar:t.ExitBar + 1] = 1
108
-
109
- s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
110
- s.loc['Equity Final [$]'] = equity[-1]
111
- s.loc['Equity Peak [$]'] = equity.max()
112
- if commissions:
113
- s.loc['Commissions [$]'] = commissions
114
- s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
115
- first_trading_bar = _indicator_warmup_nbars(strategy_instance)
116
- c = ohlc_data.Close.values
117
- s.loc['Buy & Hold Return [%]'] = (c[-1] - c[first_trading_bar]) / c[first_trading_bar] * 100 # long-only return
118
-
119
- gmean_day_return: float = 0
120
- day_returns = np.array(np.nan)
121
- annual_trading_days = np.nan
122
- is_datetime_index = isinstance(index, pd.DatetimeIndex)
123
- if is_datetime_index:
124
- freq_days = cast(pd.Timedelta, _data_period(index)).days
125
- have_weekends = index.dayofweek.to_series().between(5, 6).mean() > 2 / 7 * .6
126
- annual_trading_days = (
127
- 52 if freq_days == 7 else
128
- 12 if freq_days == 31 else
129
- 1 if freq_days == 365 else
130
- (365 if have_weekends else 252))
131
- freq = {7: 'W', 31: 'ME', 365: 'YE'}.get(freq_days, 'D')
132
- day_returns = equity_df['Equity'].resample(freq).last().dropna().pct_change()
133
- gmean_day_return = geometric_mean(day_returns)
134
-
135
- # Annualized return and risk metrics are computed based on the (mostly correct)
136
- # assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
137
- # Our annualized return matches `empyrical.annual_return(day_returns)` whereas
138
- # our risk doesn't; they use the simpler approach below.
139
- annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
140
- s.loc['Return (Ann.) [%]'] = annualized_return * 100
141
- s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2 * annual_trading_days)) * 100 # noqa: E501
142
- # s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
143
- # s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
144
- if is_datetime_index:
145
- time_in_years = (s.loc['Duration'].days + s.loc['Duration'].seconds / 86400) / annual_trading_days
146
- s.loc['CAGR [%]'] = ((s.loc['Equity Final [$]'] / equity[0])**(1 / time_in_years) - 1) * 100 if time_in_years else np.nan # noqa: E501
147
-
148
- # Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
149
- # and simple standard deviation
150
- s.loc['Sharpe Ratio'] = (s.loc['Return (Ann.) [%]'] - risk_free_rate * 100) / (s.loc['Volatility (Ann.) [%]'] or np.nan) # noqa: E501
151
- # Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
152
- with np.errstate(divide='ignore'):
153
- s.loc['Sortino Ratio'] = (annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)) # noqa: E501
154
- max_dd = -np.nan_to_num(dd.max())
155
- s.loc['Calmar Ratio'] = annualized_return / (-max_dd or np.nan)
156
- equity_log_returns = np.log(equity[1:] / equity[:-1])
157
- market_log_returns = np.log(c[1:] / c[:-1])
158
- beta = np.nan
159
- if len(equity_log_returns) > 1 and len(market_log_returns) > 1:
160
- # len == 0 on dummy call `stats_keys = compute_stats(...)` pre optimization
161
- cov_matrix = np.cov(equity_log_returns, market_log_returns)
162
- beta = cov_matrix[0, 1] / cov_matrix[1, 1]
163
- # Jensen CAPM Alpha: can be strongly positive when beta is negative and B&H Return is large
164
- s.loc['Alpha [%]'] = s.loc['Return [%]'] - risk_free_rate * 100 - beta * (s.loc['Buy & Hold Return [%]'] - risk_free_rate * 100) # noqa: E501
165
- s.loc['Beta'] = beta
166
- s.loc['Max. Drawdown [%]'] = max_dd * 100
167
- s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
168
- s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
169
- s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
170
- s.loc['# Trades'] = n_trades = len(trades_df)
171
- win_rate = np.nan if not n_trades else (pl > 0).mean()
172
- s.loc['Win Rate [%]'] = win_rate * 100
173
- s.loc['Best Trade [%]'] = returns.max() * 100
174
- s.loc['Worst Trade [%]'] = returns.min() * 100
175
- mean_return = geometric_mean(returns)
176
- s.loc['Avg. Trade [%]'] = mean_return * 100
177
- s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
178
- s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
179
- s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
180
- s.loc['Expectancy [%]'] = returns.mean() * 100
181
- s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)
182
- s.loc['Kelly Criterion'] = win_rate - (1 - win_rate) / (pl[pl > 0].mean() / -pl[pl < 0].mean())
183
-
184
- s.loc['_strategy'] = strategy_instance
185
- s.loc['_equity_curve'] = equity_df
186
- s.loc['_trades'] = trades_df
187
-
188
- s = _Stats(s)
189
- return s
190
-
191
-
192
- class _Stats(pd.Series):
193
- def __repr__(self):
194
- with pd.option_context(
195
- 'display.max_colwidth', 20, # Prevent expansion due to _equity and _trades dfs
196
- 'display.max_rows', len(self), # Reveal self whole
197
- 'display.precision', 5, # Enough for my eyes at least
198
- # 'format.na_rep', '--', # TODO: Enable once it works
199
- ):
200
- return super().__repr__()
201
-
202
-
203
- def dummy_stats():
204
- from .backtesting import Trade, _Broker
205
- index = pd.DatetimeIndex(['2025'])
206
- data = pd.DataFrame({col: [np.nan] for col in ('Close',)}, index=index)
207
- trade = Trade(_Broker(data=data, cash=10000, spread=.01, commission=.01, margin=.1,
208
- trade_on_close=True, hedging=True, exclusive_orders=False, index=index),
209
- 1, 1, 0, None)
210
- trade._replace(exit_price=1, exit_bar=0)
211
- trade._commissions = np.nan
212
- return compute_stats([trade], np.r_[[np.nan]], data, None, 0)
1
+ from __future__ import annotations
2
+
3
+ from numbers import Number
4
+ from typing import TYPE_CHECKING, List, Union, cast
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ if TYPE_CHECKING:
10
+ from .strategy import Strategy
11
+ from .trade import Trade
12
+
13
+ def compute_drawdown_duration_peaks(dd: pd.Series):
14
+ iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
15
+ iloc = pd.Series(iloc, index=dd.index[iloc])
16
+ df = iloc.to_frame('iloc').assign(prev=iloc.shift())
17
+ df = df[df['iloc'] > df['prev'] + 1].astype(np.int64)
18
+
19
+ # 取引がないためドローダウンがない場合、pandasのために以下を回避し、nanシリーズを返す
20
+ if not len(df):
21
+ return (dd.replace(0, np.nan),) * 2
22
+
23
+ df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
24
+ df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
25
+ df = df.reindex(dd.index)
26
+ return df['duration'], df['peak_dd']
27
+
28
+
29
+ def geometric_mean(returns: pd.Series) -> float:
30
+ returns = returns.fillna(0) + 1
31
+ if np.any(returns <= 0):
32
+ return 0
33
+ return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1
34
+
35
+ def _data_period(index) -> Union[pd.Timedelta, Number]:
36
+ """Return data index period as pd.Timedelta"""
37
+ values = pd.Series(index[-100:])
38
+ return values.diff().dropna().median()
39
+
40
+ def compute_stats(
41
+ trades: Union[List['Trade'], pd.DataFrame],
42
+ equity: np.ndarray,
43
+ ohlc_data: pd.DataFrame,
44
+ strategy_instance: Strategy | None,
45
+ risk_free_rate: float = 0,
46
+ ) -> pd.Series:
47
+ assert -1 < risk_free_rate < 1
48
+
49
+ index = ohlc_data.index
50
+ dd = 1 - equity / np.maximum.accumulate(equity)
51
+ dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
52
+
53
+ equity_df = pd.DataFrame({
54
+ 'Equity': equity,
55
+ 'DrawdownPct': dd,
56
+ 'DrawdownDuration': dd_dur},
57
+ index=index)
58
+
59
+ if isinstance(trades, pd.DataFrame):
60
+ trades_df: pd.DataFrame = trades
61
+ commissions = None # Not shown
62
+ else:
63
+ # Came straight from Backtest.run()
64
+ trades_df = pd.DataFrame({
65
+ 'Size': [t.size for t in trades],
66
+ 'EntryBar': [t.entry_bar for t in trades],
67
+ 'ExitBar': [t.exit_bar for t in trades],
68
+ 'EntryPrice': [t.entry_price for t in trades],
69
+ 'ExitPrice': [t.exit_price for t in trades],
70
+ 'SL': [t.sl for t in trades],
71
+ 'TP': [t.tp for t in trades],
72
+ 'PnL': [t.pl for t in trades],
73
+ 'Commission': [t._commissions for t in trades],
74
+ 'ReturnPct': [t.pl_pct for t in trades],
75
+ 'EntryTime': [t.entry_time for t in trades],
76
+ 'ExitTime': [t.exit_time for t in trades],
77
+ })
78
+ trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
79
+ trades_df['Tag'] = [t.tag for t in trades]
80
+
81
+ commissions = sum(t._commissions for t in trades)
82
+ del trades
83
+
84
+ pl = trades_df['PnL']
85
+ returns = trades_df['ReturnPct']
86
+ durations = trades_df['Duration']
87
+
88
+ def _round_timedelta(value, _period=_data_period(index)):
89
+ if not isinstance(value, pd.Timedelta):
90
+ return value
91
+ resolution = getattr(_period, 'resolution_string', None) or _period.resolution
92
+ return value.ceil(resolution)
93
+
94
+ s = pd.Series(dtype=object)
95
+ s.loc['Start'] = index[0]
96
+ s.loc['End'] = index[-1]
97
+ s.loc['Duration'] = s.End - s.Start
98
+
99
+ have_position = np.repeat(0, len(index))
100
+ for t in trades_df.itertuples(index=False):
101
+ have_position[t.EntryBar:t.ExitBar + 1] = 1
102
+
103
+ s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
104
+ s.loc['Equity Final [$]'] = equity[-1]
105
+ s.loc['Equity Peak [$]'] = equity.max()
106
+ if commissions:
107
+ s.loc['Commissions [$]'] = commissions
108
+ s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
109
+
110
+ gmean_day_return: float = 0
111
+ day_returns = np.array(np.nan)
112
+ annual_trading_days = np.nan
113
+ is_datetime_index = isinstance(index, pd.DatetimeIndex)
114
+ if is_datetime_index:
115
+ freq_days = cast(pd.Timedelta, _data_period(index)).days
116
+ have_weekends = index.dayofweek.to_series().between(5, 6).mean() > 2 / 7 * .6
117
+ annual_trading_days = (
118
+ 52 if freq_days == 7 else
119
+ 12 if freq_days == 31 else
120
+ 1 if freq_days == 365 else
121
+ (365 if have_weekends else 252))
122
+ freq = {7: 'W', 31: 'ME', 365: 'YE'}.get(freq_days, 'D')
123
+ day_returns = equity_df['Equity'].resample(freq).last().dropna().pct_change()
124
+ gmean_day_return = geometric_mean(day_returns)
125
+
126
+ # Annualized return and risk metrics are computed based on the (mostly correct)
127
+ # assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
128
+ # Our annualized return matches `empyrical.annual_return(day_returns)` whereas
129
+ # our risk doesn't; they use the simpler approach below.
130
+ annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
131
+ s.loc['Return (Ann.) [%]'] = annualized_return * 100
132
+ s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2 * annual_trading_days)) * 100 # noqa: E501
133
+ # s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
134
+ # s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
135
+ if is_datetime_index:
136
+ time_in_years = (s.loc['Duration'].days + s.loc['Duration'].seconds / 86400) / annual_trading_days
137
+ s.loc['CAGR [%]'] = ((s.loc['Equity Final [$]'] / equity[0])**(1 / time_in_years) - 1) * 100 if time_in_years else np.nan # noqa: E501
138
+
139
+ # Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
140
+ # and simple standard deviation
141
+ s.loc['Sharpe Ratio'] = (s.loc['Return (Ann.) [%]'] - risk_free_rate * 100) / (s.loc['Volatility (Ann.) [%]'] or np.nan) # noqa: E501
142
+ # Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
143
+ with np.errstate(divide='ignore'):
144
+ s.loc['Sortino Ratio'] = (annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)) # noqa: E501
145
+ max_dd = -np.nan_to_num(dd.max())
146
+ s.loc['Calmar Ratio'] = annualized_return / (-max_dd or np.nan)
147
+ s.loc['Max. Drawdown [%]'] = max_dd * 100
148
+ s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
149
+ s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
150
+ s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
151
+ s.loc['# Trades'] = n_trades = len(trades_df)
152
+ win_rate = np.nan if not n_trades else (pl > 0).mean()
153
+ s.loc['Win Rate [%]'] = win_rate * 100
154
+ s.loc['Best Trade [%]'] = returns.max() * 100
155
+ s.loc['Worst Trade [%]'] = returns.min() * 100
156
+ mean_return = geometric_mean(returns)
157
+ s.loc['Avg. Trade [%]'] = mean_return * 100
158
+ s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
159
+ s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
160
+ s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan)
161
+ s.loc['Expectancy [%]'] = returns.mean() * 100
162
+ s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)
163
+ s.loc['Kelly Criterion'] = win_rate - (1 - win_rate) / (pl[pl > 0].mean() / -pl[pl < 0].mean())
164
+
165
+ s.loc['_strategy'] = strategy_instance
166
+ s.loc['_equity_curve'] = equity_df
167
+ s.loc['_trades'] = trades_df
168
+
169
+ return s