plotguy 1.2.10__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plotguy-2.0.2/PKG-INFO ADDED
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: plotguy
3
+ Version: 2.0.2
4
+ Summary: Plotguy
5
+ Home-page: https://pypi.org/project/plotguy/
6
+ Author: Plotguy Team
7
+ Author-email: plotguy.info@gmail.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: pandas==2.3.3
12
+ Requires-Dist: steptools
13
+ Requires-Dist: numpy==2.4.2
14
+ Requires-Dist: hkfdb
15
+ Requires-Dist: pyarrow==23.0.0
16
+ Requires-Dist: fastparquet
17
+ Requires-Dist: polars==0.18.15
18
+ Requires-Dist: plotly==5.18.0
19
+ Requires-Dist: lxml
20
+ Requires-Dist: dash==2.9.3
21
+ Requires-Dist: dash_bootstrap_components
22
+ Requires-Dist: dash_daq
23
+ Requires-Dist: dash_dangerously_set_inner_html
24
+ Dynamic: author
25
+ Dynamic: author-email
26
+ Dynamic: classifier
27
+ Dynamic: description
28
+ Dynamic: description-content-type
29
+ Dynamic: home-page
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
32
+
33
+ # Plotguy
@@ -0,0 +1,387 @@
1
+ import warnings
2
+ warnings.simplefilter("ignore", UserWarning)
3
+
4
+ import datetime
5
+ import os
6
+ import sys
7
+ import copy
8
+ import multiprocessing as mp
9
+ import time
10
+ import zlib
11
+ import requests
12
+ import pandas as pd
13
+ import numpy as np
14
+
15
+ from .equity_curves import *
16
+ from .signals import *
17
+ from .aggregate import *
18
+ from .components import *
19
+ import itertools
20
+
21
+ def get_all_para_combination(para_dict, backtest_attribute, df_dict, sec_profile, manager_list):
22
+
23
+ risk_free_rate = get_risk_free_rate(backtest_attribute['start_date'], backtest_attribute['end_date'])
24
+
25
+ freq = backtest_attribute['freq']
26
+
27
+ intraday = True if freq != '1D' else False
28
+ if intraday:
29
+ if not 'summary_mode' in backtest_attribute:
30
+ backtest_attribute['summary_mode'] = True
31
+ backtest_attribute['intraday'] = True
32
+ else:
33
+ backtest_attribute['summary_mode'] = False
34
+ backtest_attribute['intraday'] = False
35
+
36
+ backtest_attribute['risk_free_rate'] = risk_free_rate
37
+
38
+ para_keys = para_dict.keys()
39
+ para_values = para_dict.values()
40
+ para_list = list(itertools.product(*para_values))
41
+ para_keys_str = '|'.join(para_keys)
42
+
43
+ print('number of combination:', len(para_list))
44
+
45
+ all_combinations = list(itertools.product(*para_values))
46
+ all_para_combination = {}
47
+
48
+ for combination in all_combinations:
49
+ para_combination = dict(zip(para_keys, combination))
50
+ para_combination.update(backtest_attribute)
51
+ para_combination['manager_list'] = manager_list
52
+
53
+ concatenated_values = ''.join(map(str, para_combination.values())).encode('utf-8')
54
+ ref_code = zlib.crc32(concatenated_values)
55
+
56
+ code = para_combination['code']
57
+ para_combination['df'] = df_dict[code]
58
+ para_combination['sec_profile'] = sec_profile
59
+ para_combination['para_keys_str'] = para_keys_str
60
+
61
+ all_para_combination[ref_code] = para_combination
62
+
63
+ # attr_df = pd.DataFrame([backtest_attribute])
64
+ # attr_df.to_parquet('attr_df.parquet')
65
+
66
+ return all_para_combination
67
+
68
+ def generate_or_read_backtest_result(read_only, mp_mode, number_of_core, manager_list
69
+ ,all_para_combination, backtest):
70
+
71
+ if read_only and os.path.isfile('backtest_result.parquet'):
72
+ backtest_result_df = pd.read_parquet('backtest_result.parquet')
73
+ #backtest_result_df = backtest_result_df[backtest_result_df.index.isin(all_para_combination.keys())]
74
+
75
+ else:
76
+ t1 = datetime.datetime.now()
77
+ if mp_mode:
78
+ pool = mp.Pool(processes=number_of_core)
79
+ pool.map(backtest, all_para_combination.items())
80
+ pool.close()
81
+ else:
82
+ for para_combination_item in all_para_combination.items():
83
+ backtest(para_combination_item)
84
+
85
+ backtest_result_df = pd.DataFrame(list(manager_list))
86
+ backtest_result_df = backtest_result_df.set_index('ref_code')
87
+
88
+ if os.path.isfile('backtest_result.parquet'):
89
+ old_backtest_result_df = pd.read_parquet('backtest_result.parquet')
90
+ backtest_result_df = pd.concat([backtest_result_df, old_backtest_result_df])
91
+ backtest_result_df = backtest_result_df[~backtest_result_df.index.duplicated(keep='last')]
92
+
93
+ backtest_result_df.to_parquet('backtest_result.parquet')
94
+
95
+ print('backtest time used:', (datetime.datetime.now() - t1).seconds, 'seconds')
96
+
97
+ backtest_result_df = backtest_result_df[backtest_result_df.index.isin(all_para_combination.keys())]
98
+
99
+ return backtest_result_df
100
+
101
+ def get_source_data_path(data_folder, code, freq):
102
+ file_path = os.path.join(data_folder, code + '_' + freq + '.parquet')
103
+ return file_path
104
+
105
+
106
+ def save_backtest_result(df, para_combination_item):
107
+
108
+ ref_code, para_combination = para_combination_item
109
+
110
+ equity_curve_folder = para_combination['equity_curve_folder']
111
+ manager_list = para_combination['manager_list']
112
+ risk_free_rate = para_combination['risk_free_rate']
113
+ default_market_price = para_combination['default_market_price']
114
+ intraday = para_combination['intraday']
115
+
116
+ total_commission = df['commission'].sum()
117
+
118
+ df = df[[default_market_price, 'action', 'trd_side', 'equity_value']]
119
+ df = df.rename(columns={default_market_price: 'price'})
120
+
121
+ eqiuty_curve_save_path = os.path.join('', equity_curve_folder, f'{ref_code}.parquet')
122
+ ##############################################################
123
+
124
+ if default_market_price == 'close':
125
+ agg_price = 'last'
126
+ elif default_market_price == 'open':
127
+ agg_price = 'first'
128
+ ### for intraday ###
129
+ if intraday:
130
+ df_daily = df.resample('D').agg({'price': agg_price})
131
+ else:
132
+ df_daily = df
133
+
134
+ ########### price result ###############
135
+ df_daily['running_max'] = df_daily['price'].cummax()
136
+ df_daily['dd_dollar'] = df_daily['running_max'] - df_daily['price']
137
+ df_daily['dd_pct'] = df_daily['dd_dollar'] / df_daily['running_max'] * 100
138
+ price_mdd_dollar = df_daily['dd_dollar'].max()
139
+ price_mdd_pct = df_daily['dd_pct'].max()
140
+
141
+ price_pct_series = df_daily['price'].pct_change().dropna()
142
+ price_net_profit = df_daily.at[df_daily.index[-1], 'price'] - df_daily.at[df_daily.index[0], 'price']
143
+ holding_period_day = (df_daily.index[-1].date() - df_daily.index[0].date()).days
144
+
145
+ price_return_on_capital = price_net_profit / df_daily.at[df_daily.index[0], 'price']
146
+ price_annualized_return = (np.sign(1 + price_return_on_capital) * np.abs(1 + price_return_on_capital)) ** (
147
+ 365 / holding_period_day) - 1
148
+ price_annualized_std = price_pct_series.std() * math.sqrt(365)
149
+ price_annualized_sr = (
150
+ price_annualized_return - risk_free_rate / 100) / price_annualized_std if price_annualized_std != 0 else 0
151
+ price_net_profit_to_mdd = price_net_profit / price_mdd_dollar if price_mdd_dollar != 0 else 0
152
+
153
+ price_return_on_capital = round(100 * price_return_on_capital, 2)
154
+ price_annualized_return = round(100 * price_annualized_return, 2)
155
+ price_annualized_std = round(100 * price_annualized_std, 2)
156
+ price_annualized_sr = round(price_annualized_sr, 2)
157
+ price_net_profit_to_mdd = round(100 * price_net_profit_to_mdd, 2)
158
+
159
+ df_count = df[(df['trd_side'] == 'BUY') | (df['trd_side'] == 'SELL_SHORT')].copy()
160
+
161
+ if len(df_count) == 0:
162
+ num_of_trade = 0
163
+ equity_return_on_capital = 0
164
+ equity_annualized_return = 0
165
+ equity_annualized_std = 0
166
+ equity_annualized_sr = 0
167
+ equity_net_profit_to_mdd = np.inf
168
+
169
+ num_of_win = 0
170
+ num_of_trade = 0
171
+ win_rate = 0
172
+ yearly_stats_string = ''
173
+
174
+ cov_return = 0
175
+ cov_count = 0
176
+ total_commission = 0
177
+
178
+ elif len(df_count) > 0:
179
+
180
+ ########### by year count, win rate and return ###############
181
+
182
+ num_of_trade = len(df_count)
183
+ df_count = pd.concat([df_count, df[df['trd_side'] != ''].tail(1)])
184
+ df_count['realized_pnl'] = df_count['equity_value'] - df_count['equity_value'].shift(1)
185
+ df_count['win_trade'] = df_count['realized_pnl'] >= 0
186
+
187
+ num_of_win = df_count['win_trade'].sum()
188
+ win_rate = round(100 * num_of_win / num_of_trade, 2)
189
+
190
+ yearly_stats = df_count.groupby(df_count.index.year).agg(
191
+ year_pnl=('realized_pnl', 'sum'),
192
+ year_win_count=('win_trade', 'sum'),
193
+ year_trade_count=('realized_pnl', 'count'),
194
+ year_start_equity_value=('equity_value', 'first'))
195
+ yearly_stats['year_return'] = 100 * (yearly_stats['year_pnl'] / yearly_stats['year_start_equity_value'])
196
+ yearly_stats['year_win_rate'] = 100 * (yearly_stats['year_win_count'] / yearly_stats['year_trade_count'])
197
+
198
+ cov_return = yearly_stats['year_win_rate'].std() / yearly_stats['year_win_rate'].mean() if yearly_stats['year_win_rate'].mean() != 0 else 0
199
+ cov_count = yearly_stats['year_trade_count'].std() / yearly_stats['year_trade_count'].mean() if yearly_stats['year_trade_count'].mean() != 0 else 0
200
+
201
+ yearly_stats = yearly_stats.applymap(lambda x: f'{x:.2f}')
202
+ formatted_rows = yearly_stats.apply(
203
+ lambda
204
+ row: f"{row.name},{'year_trade_count'}:{row['year_trade_count']},{'year_win_rate'}:{row['year_win_rate']},{'year_return'}:{row['year_return']}",
205
+ axis=1)
206
+
207
+ yearly_stats_string = "|".join(formatted_rows)
208
+
209
+ ######## resample to daily #############
210
+ if 'summary_mode' in para_combination:
211
+ if not para_combination['summary_mode']:
212
+ eqiuty_curve_non_summary_save_path = os.path.join('', equity_curve_folder, f'{ref_code}_non-summary-intraday.parquet')
213
+ df.to_parquet(eqiuty_curve_non_summary_save_path)
214
+
215
+ ########### equity value result ###############
216
+
217
+ ### for intraday ###
218
+ if intraday:
219
+ df = df_count.resample('D').agg({'equity_value': 'last'})
220
+ df = pd.concat([df_daily, df], axis=1)
221
+ df = df[df['price'].notna()]
222
+ df['equity_value'] = df['equity_value'].ffill()
223
+ df['equity_value'] = df['equity_value'].bfill()
224
+
225
+ df['equity_value'] = df['equity_value'].astype(np.int32)
226
+ ######################
227
+
228
+ df['running_max'] = df['equity_value'].cummax()
229
+ df['dd_dollar'] = df['running_max'] - df['equity_value']
230
+ df['dd_pct'] = df['dd_dollar'] / df['running_max'] * 100
231
+ equity_mdd_dollar = df['dd_dollar'].max()
232
+ equity_mdd_pct = df['dd_pct'].max()
233
+
234
+ holding_period_day = (df.index[-1].date() - df.index[0].date()).days
235
+ equity_pct_series = df['equity_value'].pct_change().dropna()
236
+ equity_net_profit = df.at[df.index[-1], 'equity_value'] - df.at[df.index[0], 'equity_value']
237
+
238
+ equity_return_on_capital = equity_net_profit / df.at[df.index[0], 'equity_value']
239
+ equity_annualized_return = (np.sign(1 + equity_return_on_capital) * np.abs(1 + equity_return_on_capital)) ** (
240
+ 365 / holding_period_day) - 1
241
+ equity_annualized_std = equity_pct_series.std() * math.sqrt(365)
242
+ equity_annualized_sr = (
243
+ equity_annualized_return - risk_free_rate / 100) / equity_annualized_std if equity_annualized_std != 0 else 0
244
+ equity_net_profit_to_mdd = equity_net_profit / equity_mdd_dollar if equity_mdd_dollar != 0 else 0
245
+
246
+ equity_return_on_capital = round(100 * equity_return_on_capital, 2)
247
+ equity_annualized_return = round(100 * equity_annualized_return, 2)
248
+ equity_annualized_std = round(100 * equity_annualized_std, 2)
249
+ equity_annualized_sr = round(equity_annualized_sr, 2)
250
+ equity_net_profit_to_mdd = round(100 * equity_net_profit_to_mdd, 2)
251
+
252
+ return_on_capital_diff = equity_annualized_return - price_annualized_return
253
+
254
+ if intraday:
255
+ df = df[['price', 'equity_value']]
256
+ elif not intraday:
257
+ df = df[['price', 'action', 'trd_side', 'equity_value']]
258
+ df.to_parquet(eqiuty_curve_save_path)
259
+ print('backtest is runnung...', eqiuty_curve_save_path)
260
+
261
+ ####################################
262
+ del para_combination['df']
263
+ del para_combination['sec_profile']
264
+ del para_combination['manager_list']
265
+
266
+ cov_return = round(cov_return, 2)
267
+ cov_count = round(cov_count, 2)
268
+ total_commission = int(round(total_commission))
269
+
270
+ backtest_result_dict = {
271
+ 'ref_code': ref_code,
272
+ 'num_of_trade': num_of_trade,
273
+
274
+ 'equity_net_profit': equity_net_profit,
275
+ 'equity_return_on_capital': equity_return_on_capital,
276
+ 'equity_annualized_return': equity_annualized_return,
277
+ 'equity_annualized_std': equity_annualized_std,
278
+ 'equity_annualized_sr': equity_annualized_sr,
279
+ 'equity_net_profit_to_mdd': equity_net_profit_to_mdd,
280
+ 'equity_mdd_dollar': equity_mdd_dollar,
281
+ 'equity_mdd_pct': equity_mdd_pct,
282
+
283
+ 'price_net_profit' : price_net_profit,
284
+ 'price_return_on_capital': price_return_on_capital,
285
+ 'price_annualized_return': price_annualized_return,
286
+ 'price_annualized_std': price_annualized_std,
287
+ 'price_annualized_sr': price_annualized_sr,
288
+ 'price_net_profit_to_mdd': price_net_profit_to_mdd,
289
+ 'price_mdd_dollar': price_mdd_dollar,
290
+ 'price_mdd_pct': price_mdd_pct,
291
+
292
+ 'return_on_capital_diff': return_on_capital_diff,
293
+
294
+ 'num_of_win': num_of_win,
295
+ 'num_of_trade': num_of_trade,
296
+ 'win_rate': win_rate,
297
+ 'yearly_stats_string': yearly_stats_string,
298
+ 'cov_return': cov_return,
299
+ 'cov_count': cov_count,
300
+ 'total_commission': total_commission
301
+ }
302
+
303
+ para_combination.update(backtest_result_dict)
304
+ manager_list.append(para_combination)
305
+
306
+
307
+ ########################################################################################################
308
+ ########################################################################################################
309
+ ########################################################################################################
310
+
311
+ def get_risk_free_rate(start_date, end_date):
312
+
313
+ if '-' in start_date:
314
+ start_date_year = datetime.datetime.strptime(start_date, '%Y-%m-%d').year
315
+ end_date_year = datetime.datetime.strptime(end_date, '%Y-%m-%d').year
316
+ else:
317
+ start_date_year = datetime.datetime.strptime(start_date, '%Y%m%d').year
318
+ end_date_year = datetime.datetime.strptime(end_date, '%Y%m%d').year
319
+
320
+ try:
321
+ if end_date_year == start_date_year and end_date_year == datetime.datetime.now().year:
322
+ risk_free_rate = get_latest_fed_fund_rate()
323
+ else:
324
+ risk_free_rate = get_geometric_mean_of_yearly_rate(start_date_year, end_date_year)
325
+ except:
326
+ risk_free_rate = 2 # if network error, set rate to 2 %
327
+ print('Network error. Risk free rate: {:.2f} %'.format(risk_free_rate))
328
+
329
+ return risk_free_rate
330
+
331
+
332
+ # def plot_signal_analysis(py_filename, output_folder, start_date, end_date, para_dict, signal_settings):
333
+ # app = signals.Signals(py_filename, output_folder, start_date, end_date, para_dict, generate_backtest_output_path,
334
+ # signal_settings)
335
+ #
336
+ # return app
337
+
338
+
339
+ def plot(mode, backtest_result_df = None, number_of_curves=20):
340
+
341
+ if mode == 'equity_curves':
342
+ app = equity_curves.Plot(backtest_result_df, number_of_curves)
343
+
344
+ if mode == 'aggregate':
345
+ app = aggregate.Aggregate()
346
+
347
+ return app
348
+
349
+
350
+ def get_latest_fed_fund_rate():
351
+ url = "https://fred.stlouisfed.org/series/FEDFUNDS"
352
+ page = requests.get(url)
353
+ soup = BeautifulSoup(page.content, "html.parser")
354
+
355
+ fed_funds_rate = soup.find("span", class_="series-meta-observation-value").text
356
+ print("Latest Federal Funds Rate:", fed_funds_rate, '%')
357
+ # fed_funds_rate = float(fed_funds_rate) / 100
358
+ fed_funds_rate = round(float(fed_funds_rate), 2)
359
+ return fed_funds_rate
360
+
361
+
362
+ def get_geometric_mean_of_yearly_rate(start_year, end_year): # backtest period
363
+ url = "https://fred.stlouisfed.org/graph/fredgraph.csv?id=DTB3"
364
+ response = requests.get(url)
365
+ data = response.text.split("\n")[:-1]
366
+ data = [row.split(",") for row in data]
367
+ df = pd.DataFrame(data[1:], columns=data[0])
368
+ df.columns = ["date", "risk_free_rate"]
369
+ df["date"] = pd.to_datetime(df["date"])
370
+ df["risk_free_rate"] = pd.to_numeric(df["risk_free_rate"], errors='coerce')
371
+ df.dropna(subset=['risk_free_rate'], inplace=True)
372
+
373
+ risk_free_rate_history_yearly = df.resample("A", on="date").mean()
374
+ risk_free_rate_history_yearly = risk_free_rate_history_yearly.round(3)
375
+
376
+ # show only start between start_year and end_year
377
+ risk_free_rate_history_yearly = risk_free_rate_history_yearly[
378
+ risk_free_rate_history_yearly.index.year >= start_year]
379
+ risk_free_rate_history_yearly = risk_free_rate_history_yearly[risk_free_rate_history_yearly.index.year <= end_year]
380
+
381
+ fed_fund_rate_geometric_mean = np.exp(np.log(risk_free_rate_history_yearly["risk_free_rate"]).mean())
382
+ fed_fund_rate_geometric_mean = round(fed_fund_rate_geometric_mean, 2)
383
+ print("Federal Funds Rate Geometric mean from {} to {}: {} %".format(start_year, end_year,
384
+ fed_fund_rate_geometric_mean))
385
+
386
+ return fed_fund_rate_geometric_mean
387
+