plotguy 1.2.13__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: plotguy
3
- Version: 1.2.13
3
+ Version: 2.0.0
4
4
  Summary: Plotguy
5
5
  Home-page: https://pypi.org/project/plotguy/
6
6
  Author: Plotguy Team
@@ -8,11 +8,13 @@ Author-email: plotguy.info@gmail.com
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Operating System :: OS Independent
10
10
  Description-Content-Type: text/markdown
11
- Requires-Dist: pandas
12
- Requires-Dist: numpy
11
+ Requires-Dist: pandas==2.3.3
12
+ Requires-Dist: numpy==2.4.2
13
13
  Requires-Dist: hkfdb
14
- Requires-Dist: pyarrow
14
+ Requires-Dist: pyarrow==23.0.0
15
+ Requires-Dist: fastparquet
15
16
  Requires-Dist: polars==0.18.15
17
+ Requires-Dist: plotly==5.18.0
16
18
  Requires-Dist: lxml
17
19
  Requires-Dist: dash==2.9.3
18
20
  Requires-Dist: dash_bootstrap_components
@@ -0,0 +1,388 @@
1
+ import warnings
2
+ warnings.simplefilter("ignore", UserWarning)
3
+
4
+ import datetime
5
+ import os
6
+ import sys
7
+ import copy
8
+ import multiprocessing as mp
9
+ import time
10
+ import zlib
11
+ import requests
12
+ import pandas as pd
13
+ import polars as pl
14
+ import numpy as np
15
+
16
+ from .equity_curves import *
17
+ from .signals import *
18
+ from .aggregate import *
19
+ from .components import *
20
+ import itertools
21
+
22
+ def get_all_para_combination(para_dict, backtest_attribute, df_dict, sec_profile, manager_list):
23
+
24
+ risk_free_rate = get_risk_free_rate(backtest_attribute['start_date'], backtest_attribute['end_date'])
25
+
26
+ freq = backtest_attribute['freq']
27
+
28
+ intraday = True if freq != '1D' else False
29
+ if intraday:
30
+ if not 'summary_mode' in backtest_attribute:
31
+ backtest_attribute['summary_mode'] = True
32
+ backtest_attribute['intraday'] = True
33
+ else:
34
+ backtest_attribute['summary_mode'] = False
35
+ backtest_attribute['intraday'] = False
36
+
37
+ backtest_attribute['risk_free_rate'] = risk_free_rate
38
+
39
+ para_keys = para_dict.keys()
40
+ para_values = para_dict.values()
41
+ para_list = list(itertools.product(*para_values))
42
+ para_keys_str = '|'.join(para_keys)
43
+
44
+ print('number of combination:', len(para_list))
45
+
46
+ all_combinations = list(itertools.product(*para_values))
47
+ all_para_combination = {}
48
+
49
+ for combination in all_combinations:
50
+ para_combination = dict(zip(para_keys, combination))
51
+ para_combination.update(backtest_attribute)
52
+ para_combination['manager_list'] = manager_list
53
+
54
+ concatenated_values = ''.join(map(str, para_combination.values())).encode('utf-8')
55
+ ref_code = zlib.crc32(concatenated_values)
56
+
57
+ code = para_combination['code']
58
+ para_combination['df'] = df_dict[code]
59
+ para_combination['sec_profile'] = sec_profile
60
+ para_combination['para_keys_str'] = para_keys_str
61
+
62
+ all_para_combination[ref_code] = para_combination
63
+
64
+ # attr_df = pd.DataFrame([backtest_attribute])
65
+ # attr_df.to_parquet('attr_df.parquet')
66
+
67
+ return all_para_combination
68
+
69
+ def generate_or_read_backtest_result(read_only, mp_mode, number_of_core, manager_list
70
+ ,all_para_combination, backtest):
71
+
72
+ if read_only and os.path.isfile('backtest_result.parquet'):
73
+ backtest_result_df = pd.read_parquet('backtest_result.parquet')
74
+ #backtest_result_df = backtest_result_df[backtest_result_df.index.isin(all_para_combination.keys())]
75
+
76
+ else:
77
+ t1 = datetime.datetime.now()
78
+ if mp_mode:
79
+ pool = mp.Pool(processes=number_of_core)
80
+ pool.map(backtest, all_para_combination.items())
81
+ pool.close()
82
+ else:
83
+ for para_combination_item in all_para_combination.items():
84
+ backtest(para_combination_item)
85
+
86
+ backtest_result_df = pd.DataFrame(list(manager_list))
87
+ backtest_result_df = backtest_result_df.set_index('ref_code')
88
+
89
+ if os.path.isfile('backtest_result.parquet'):
90
+ old_backtest_result_df = pd.read_parquet('backtest_result.parquet')
91
+ backtest_result_df = pd.concat([backtest_result_df, old_backtest_result_df])
92
+ backtest_result_df = backtest_result_df[~backtest_result_df.index.duplicated(keep='last')]
93
+
94
+ backtest_result_df.to_parquet('backtest_result.parquet')
95
+
96
+ print('backtest time used:', (datetime.datetime.now() - t1).seconds, 'seconds')
97
+
98
+ backtest_result_df = backtest_result_df[backtest_result_df.index.isin(all_para_combination.keys())]
99
+
100
+ return backtest_result_df
101
+
102
+ def get_source_data_path(data_folder, code, freq):
103
+ file_path = os.path.join(data_folder, code + '_' + freq + '.parquet')
104
+ return file_path
105
+
106
+
107
+ def save_backtest_result(df, para_combination_item):
108
+
109
+ ref_code, para_combination = para_combination_item
110
+
111
+ equity_curve_folder = para_combination['equity_curve_folder']
112
+ manager_list = para_combination['manager_list']
113
+ risk_free_rate = para_combination['risk_free_rate']
114
+ default_market_price = para_combination['default_market_price']
115
+ intraday = para_combination['intraday']
116
+
117
+ total_commission = df['commission'].sum()
118
+
119
+ df = df[[default_market_price, 'action', 'trd_side', 'equity_value']]
120
+ df = df.rename(columns={default_market_price: 'price'})
121
+
122
+ eqiuty_curve_save_path = os.path.join('', equity_curve_folder, f'{ref_code}.parquet')
123
+ ##############################################################
124
+
125
+ if default_market_price == 'close':
126
+ agg_price = 'last'
127
+ elif default_market_price == 'open':
128
+ agg_price = 'first'
129
+ ### for intraday ###
130
+ if intraday:
131
+ df_daily = df.resample('D').agg({'price': agg_price})
132
+ else:
133
+ df_daily = df
134
+
135
+ ########### price result ###############
136
+ df_daily['running_max'] = df_daily['price'].cummax()
137
+ df_daily['dd_dollar'] = df_daily['running_max'] - df_daily['price']
138
+ df_daily['dd_pct'] = df_daily['dd_dollar'] / df_daily['running_max'] * 100
139
+ price_mdd_dollar = df_daily['dd_dollar'].max()
140
+ price_mdd_pct = df_daily['dd_pct'].max()
141
+
142
+ price_pct_series = df_daily['price'].pct_change().dropna()
143
+ price_net_profit = df_daily.at[df_daily.index[-1], 'price'] - df_daily.at[df_daily.index[0], 'price']
144
+ holding_period_day = (df_daily.index[-1].date() - df_daily.index[0].date()).days
145
+
146
+ price_return_on_capital = price_net_profit / df_daily.at[df_daily.index[0], 'price']
147
+ price_annualized_return = (np.sign(1 + price_return_on_capital) * np.abs(1 + price_return_on_capital)) ** (
148
+ 365 / holding_period_day) - 1
149
+ price_annualized_std = price_pct_series.std() * math.sqrt(365)
150
+ price_annualized_sr = (
151
+ price_annualized_return - risk_free_rate / 100) / price_annualized_std if price_annualized_std != 0 else 0
152
+ price_net_profit_to_mdd = price_net_profit / price_mdd_dollar if price_mdd_dollar != 0 else 0
153
+
154
+ price_return_on_capital = round(100 * price_return_on_capital, 2)
155
+ price_annualized_return = round(100 * price_annualized_return, 2)
156
+ price_annualized_std = round(100 * price_annualized_std, 2)
157
+ price_annualized_sr = round(price_annualized_sr, 2)
158
+ price_net_profit_to_mdd = round(100 * price_net_profit_to_mdd, 2)
159
+
160
+ df_count = df[(df['trd_side'] == 'BUY') | (df['trd_side'] == 'SELL_SHORT')].copy()
161
+
162
+ if len(df_count) == 0:
163
+ num_of_trade = 0
164
+ equity_return_on_capital = 0
165
+ equity_annualized_return = 0
166
+ equity_annualized_std = 0
167
+ equity_annualized_sr = 0
168
+ equity_net_profit_to_mdd = np.inf
169
+
170
+ num_of_win = 0
171
+ num_of_trade = 0
172
+ win_rate = 0
173
+ yearly_stats_string = ''
174
+
175
+ cov_return = 0
176
+ cov_count = 0
177
+ total_commission = 0
178
+
179
+ elif len(df_count) > 0:
180
+
181
+ ########### by year count, win rate and return ###############
182
+
183
+ num_of_trade = len(df_count)
184
+ df_count = pd.concat([df_count, df[df['trd_side'] != ''].tail(1)])
185
+ df_count['realized_pnl'] = df_count['equity_value'] - df_count['equity_value'].shift(1)
186
+ df_count['win_trade'] = df_count['realized_pnl'] >= 0
187
+
188
+ num_of_win = df_count['win_trade'].sum()
189
+ win_rate = round(100 * num_of_win / num_of_trade, 2)
190
+
191
+ yearly_stats = df_count.groupby(df_count.index.year).agg(
192
+ year_pnl=('realized_pnl', 'sum'),
193
+ year_win_count=('win_trade', 'sum'),
194
+ year_trade_count=('realized_pnl', 'count'),
195
+ year_start_equity_value=('equity_value', 'first'))
196
+ yearly_stats['year_return'] = 100 * (yearly_stats['year_pnl'] / yearly_stats['year_start_equity_value'])
197
+ yearly_stats['year_win_rate'] = 100 * (yearly_stats['year_win_count'] / yearly_stats['year_trade_count'])
198
+
199
+ cov_return = yearly_stats['year_win_rate'].std() / yearly_stats['year_win_rate'].mean() if yearly_stats['year_win_rate'].mean() != 0 else 0
200
+ cov_count = yearly_stats['year_trade_count'].std() / yearly_stats['year_trade_count'].mean() if yearly_stats['year_trade_count'].mean() != 0 else 0
201
+
202
+ yearly_stats = yearly_stats.applymap(lambda x: f'{x:.2f}')
203
+ formatted_rows = yearly_stats.apply(
204
+ lambda
205
+ row: f"{row.name},{'year_trade_count'}:{row['year_trade_count']},{'year_win_rate'}:{row['year_win_rate']},{'year_return'}:{row['year_return']}",
206
+ axis=1)
207
+
208
+ yearly_stats_string = "|".join(formatted_rows)
209
+
210
+ ######## resample to daily #############
211
+ if 'summary_mode' in para_combination:
212
+ if not para_combination['summary_mode']:
213
+ eqiuty_curve_non_summary_save_path = os.path.join('', equity_curve_folder, f'{ref_code}_non-summary-intraday.parquet')
214
+ df.to_parquet(eqiuty_curve_non_summary_save_path)
215
+
216
+ ########### equity value result ###############
217
+
218
+ ### for intraday ###
219
+ if intraday:
220
+ df = df_count.resample('D').agg({'equity_value': 'last'})
221
+ df = pd.concat([df_daily, df], axis=1)
222
+ df = df[df['price'].notna()]
223
+ df['equity_value'] = df['equity_value'].ffill()
224
+ df['equity_value'] = df['equity_value'].bfill()
225
+
226
+ df['equity_value'] = df['equity_value'].astype(np.int32)
227
+ ######################
228
+
229
+ df['running_max'] = df['equity_value'].cummax()
230
+ df['dd_dollar'] = df['running_max'] - df['equity_value']
231
+ df['dd_pct'] = df['dd_dollar'] / df['running_max'] * 100
232
+ equity_mdd_dollar = df['dd_dollar'].max()
233
+ equity_mdd_pct = df['dd_pct'].max()
234
+
235
+ holding_period_day = (df.index[-1].date() - df.index[0].date()).days
236
+ equity_pct_series = df['equity_value'].pct_change().dropna()
237
+ equity_net_profit = df.at[df.index[-1], 'equity_value'] - df.at[df.index[0], 'equity_value']
238
+
239
+ equity_return_on_capital = equity_net_profit / df.at[df.index[0], 'equity_value']
240
+ equity_annualized_return = (np.sign(1 + equity_return_on_capital) * np.abs(1 + equity_return_on_capital)) ** (
241
+ 365 / holding_period_day) - 1
242
+ equity_annualized_std = equity_pct_series.std() * math.sqrt(365)
243
+ equity_annualized_sr = (
244
+ equity_annualized_return - risk_free_rate / 100) / equity_annualized_std if equity_annualized_std != 0 else 0
245
+ equity_net_profit_to_mdd = equity_net_profit / equity_mdd_dollar if equity_mdd_dollar != 0 else 0
246
+
247
+ equity_return_on_capital = round(100 * equity_return_on_capital, 2)
248
+ equity_annualized_return = round(100 * equity_annualized_return, 2)
249
+ equity_annualized_std = round(100 * equity_annualized_std, 2)
250
+ equity_annualized_sr = round(equity_annualized_sr, 2)
251
+ equity_net_profit_to_mdd = round(100 * equity_net_profit_to_mdd, 2)
252
+
253
+ return_on_capital_diff = equity_annualized_return - price_annualized_return
254
+
255
+ if intraday:
256
+ df = df[['price', 'equity_value']]
257
+ elif not intraday:
258
+ df = df[['price', 'action', 'trd_side', 'equity_value']]
259
+ df.to_parquet(eqiuty_curve_save_path)
260
+ print('backtest is runnung...', eqiuty_curve_save_path)
261
+
262
+ ####################################
263
+ del para_combination['df']
264
+ del para_combination['sec_profile']
265
+ del para_combination['manager_list']
266
+
267
+ cov_return = round(cov_return, 2)
268
+ cov_count = round(cov_count, 2)
269
+ total_commission = int(round(total_commission))
270
+
271
+ backtest_result_dict = {
272
+ 'ref_code': ref_code,
273
+ 'num_of_trade': num_of_trade,
274
+
275
+ 'equity_net_profit': equity_net_profit,
276
+ 'equity_return_on_capital': equity_return_on_capital,
277
+ 'equity_annualized_return': equity_annualized_return,
278
+ 'equity_annualized_std': equity_annualized_std,
279
+ 'equity_annualized_sr': equity_annualized_sr,
280
+ 'equity_net_profit_to_mdd': equity_net_profit_to_mdd,
281
+ 'equity_mdd_dollar': equity_mdd_dollar,
282
+ 'equity_mdd_pct': equity_mdd_pct,
283
+
284
+ 'price_net_profit' : price_net_profit,
285
+ 'price_return_on_capital': price_return_on_capital,
286
+ 'price_annualized_return': price_annualized_return,
287
+ 'price_annualized_std': price_annualized_std,
288
+ 'price_annualized_sr': price_annualized_sr,
289
+ 'price_net_profit_to_mdd': price_net_profit_to_mdd,
290
+ 'price_mdd_dollar': price_mdd_dollar,
291
+ 'price_mdd_pct': price_mdd_pct,
292
+
293
+ 'return_on_capital_diff': return_on_capital_diff,
294
+
295
+ 'num_of_win': num_of_win,
296
+ 'num_of_trade': num_of_trade,
297
+ 'win_rate': win_rate,
298
+ 'yearly_stats_string': yearly_stats_string,
299
+ 'cov_return': cov_return,
300
+ 'cov_count': cov_count,
301
+ 'total_commission': total_commission
302
+ }
303
+
304
+ para_combination.update(backtest_result_dict)
305
+ manager_list.append(para_combination)
306
+
307
+
308
+ ########################################################################################################
309
+ ########################################################################################################
310
+ ########################################################################################################
311
+
312
+ def get_risk_free_rate(start_date, end_date):
313
+
314
+ if '-' in start_date:
315
+ start_date_year = datetime.datetime.strptime(start_date, '%Y-%m-%d').year
316
+ end_date_year = datetime.datetime.strptime(end_date, '%Y-%m-%d').year
317
+ else:
318
+ start_date_year = datetime.datetime.strptime(start_date, '%Y%m%d').year
319
+ end_date_year = datetime.datetime.strptime(end_date, '%Y%m%d').year
320
+
321
+ try:
322
+ if end_date_year == start_date_year and end_date_year == datetime.datetime.now().year:
323
+ risk_free_rate = get_latest_fed_fund_rate()
324
+ else:
325
+ risk_free_rate = get_geometric_mean_of_yearly_rate(start_date_year, end_date_year)
326
+ except:
327
+ risk_free_rate = 2 # if network error, set rate to 2 %
328
+ print('Network error. Risk free rate: {:.2f} %'.format(risk_free_rate))
329
+
330
+ return risk_free_rate
331
+
332
+
333
+ # def plot_signal_analysis(py_filename, output_folder, start_date, end_date, para_dict, signal_settings):
334
+ # app = signals.Signals(py_filename, output_folder, start_date, end_date, para_dict, generate_backtest_output_path,
335
+ # signal_settings)
336
+ #
337
+ # return app
338
+
339
+
340
+ def plot(mode, backtest_result_df = None, number_of_curves=20):
341
+
342
+ if mode == 'equity_curves':
343
+ app = equity_curves.Plot(backtest_result_df, number_of_curves)
344
+
345
+ if mode == 'aggregate':
346
+ app = aggregate.Aggregate()
347
+
348
+ return app
349
+
350
+
351
+ def get_latest_fed_fund_rate():
352
+ url = "https://fred.stlouisfed.org/series/FEDFUNDS"
353
+ page = requests.get(url)
354
+ soup = BeautifulSoup(page.content, "html.parser")
355
+
356
+ fed_funds_rate = soup.find("span", class_="series-meta-observation-value").text
357
+ print("Latest Federal Funds Rate:", fed_funds_rate, '%')
358
+ # fed_funds_rate = float(fed_funds_rate) / 100
359
+ fed_funds_rate = round(float(fed_funds_rate), 2)
360
+ return fed_funds_rate
361
+
362
+
363
+ def get_geometric_mean_of_yearly_rate(start_year, end_year): # backtest period
364
+ url = "https://fred.stlouisfed.org/graph/fredgraph.csv?id=DTB3"
365
+ response = requests.get(url)
366
+ data = response.text.split("\n")[:-1]
367
+ data = [row.split(",") for row in data]
368
+ df = pd.DataFrame(data[1:], columns=data[0])
369
+ df.columns = ["date", "risk_free_rate"]
370
+ df["date"] = pd.to_datetime(df["date"])
371
+ df["risk_free_rate"] = pd.to_numeric(df["risk_free_rate"], errors='coerce')
372
+ df.dropna(subset=['risk_free_rate'], inplace=True)
373
+
374
+ risk_free_rate_history_yearly = df.resample("A", on="date").mean()
375
+ risk_free_rate_history_yearly = risk_free_rate_history_yearly.round(3)
376
+
377
+ # show only start between start_year and end_year
378
+ risk_free_rate_history_yearly = risk_free_rate_history_yearly[
379
+ risk_free_rate_history_yearly.index.year >= start_year]
380
+ risk_free_rate_history_yearly = risk_free_rate_history_yearly[risk_free_rate_history_yearly.index.year <= end_year]
381
+
382
+ fed_fund_rate_geometric_mean = np.exp(np.log(risk_free_rate_history_yearly["risk_free_rate"]).mean())
383
+ fed_fund_rate_geometric_mean = round(fed_fund_rate_geometric_mean, 2)
384
+ print("Federal Funds Rate Geometric mean from {} to {}: {} %".format(start_year, end_year,
385
+ fed_fund_rate_geometric_mean))
386
+
387
+ return fed_fund_rate_geometric_mean
388
+